Browse Source

wip BinaryCIF column classifier

David Sehnal 6 years ago
parent
commit
3624d2d04a

+ 4 - 1
src/helpers.d.ts

@@ -9,7 +9,10 @@ declare module Helpers {
     export type Mutable<T> = {
         -readonly [P in keyof T]: T[P]
     }
-    export type TypedArray = Int8Array | Uint8Array | Int16Array | Uint16Array | Int32Array | Uint32Array | Uint8ClampedArray | Float32Array | Float64Array
+    export type TypedIntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array
+    export type TypedFloatArray = Float32Array | Float64Array
+
+    export type TypedArray = TypedIntArray | TypedFloatArray
     export type NumberArray = TypedArray | number[]
     export type UintArray = Uint8Array | Uint16Array | Uint32Array | number[]
     export type ValueOf<T> = T[keyof T]

+ 14 - 14
src/mol-data/db/column.ts

@@ -10,7 +10,7 @@ import { Tensor as Tensors } from 'mol-math/linear-algebra'
 
 interface Column<T> {
     readonly schema: Column.Schema,
-    readonly '@array': ArrayLike<any> | undefined,
+    readonly __array: ArrayLike<any> | undefined,
 
     readonly isDefined: boolean,
     readonly rowCount: number,
@@ -159,14 +159,14 @@ namespace Column {
 
     /** Makes the column backed by an array. Useful for columns that are accessed often. */
     export function asArrayColumn<T>(c: Column<T>, array?: ArrayCtor<T>): Column<T> {
-        if (c['@array']) return c;
+        if (c.__array) return c;
         if (!c.isDefined) return Undefined(c.rowCount, c.schema) as any as Column<T>;
         return arrayColumn({ array: c.toArray({ array }), schema: c.schema, valueKind: c.valueKind });
     }
 
     export function copyToArray<T extends number>(c: Column<T>, array: { [k: number]: T, length: number }, offset = 0) {
         if (!c.isDefined) return;
-        const cArray = c['@array']
+        const cArray = c.__array
         if (cArray) {
             for (let i = 0, _i = cArray.length; i < _i; i++) array[offset + i] = cArray[i];
         } else {
@@ -200,7 +200,7 @@ function constColumn<T extends Column.Schema>(v: T['T'], rowCount: number, schem
     const value: Column<T['T']>['value'] = row => v;
     return {
         schema: schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: valueKind === Column.ValueKind.Present,
         rowCount,
         value,
@@ -217,7 +217,7 @@ function constColumn<T extends Column.Schema>(v: T['T'], rowCount: number, schem
 function lambdaColumn<T extends Column.Schema>({ value, valueKind, rowCount, schema }: Column.LambdaSpec<T>): Column<T['T']> {
     return {
         schema: schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: true,
         rowCount,
         value,
@@ -240,7 +240,7 @@ function arrayColumn<T extends Column.Schema>({ array, schema, valueKind }: Colu
     const isTyped = ColumnHelpers.isTypedArray(array);
     return {
         schema: schema,
-        '@array': array,
+        __array: array,
         isDefined: true,
         rowCount,
         value,
@@ -271,12 +271,12 @@ function arrayColumn<T extends Column.Schema>({ array, schema, valueKind }: Colu
 function windowColumn<T>(column: Column<T>, start: number, end: number) {
     if (!column.isDefined) return Column.Undefined(end - start, column.schema);
     if (start === 0 && end === column.rowCount) return column;
-    if (!!column['@array'] && ColumnHelpers.isTypedArray(column['@array'])) return windowTyped(column, start, end);
+    if (!!column.__array && ColumnHelpers.isTypedArray(column.__array)) return windowTyped(column, start, end);
     return windowFull(column, start, end);
 }
 
 function windowTyped<T>(c: Column<T>, start: number, end: number): Column<T> {
-    const array = ColumnHelpers.typedArrayWindow(c['@array'], { start, end });
+    const array = ColumnHelpers.typedArrayWindow(c.__array, { start, end });
     return arrayColumn({ array, schema: c.schema, valueKind: c.valueKind }) as any;
 }
 
@@ -286,7 +286,7 @@ function windowFull<T>(c: Column<T>, start: number, end: number): Column<T> {
     const rowCount = end - start;
     return {
         schema: c.schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: c.isDefined,
         rowCount,
         value,
@@ -311,12 +311,12 @@ function isIdentity(map: ArrayLike<number>, rowCount: number) {
 function columnView<T>(c: Column<T>, map: ArrayLike<number>, checkIdentity: boolean): Column<T> {
     if (!c.isDefined) return c;
     if (checkIdentity && isIdentity(map, c.rowCount)) return c;
-    if (!!c['@array']) return arrayView(c, map);
+    if (!!c.__array) return arrayView(c, map);
     return viewFull(c, map);
 }
 
 function arrayView<T>(c: Column<T>, map: ArrayLike<number>): Column<T> {
-    const array = c['@array']!;
+    const array = c.__array!;
     const ret = new (array as any).constructor(map.length);
     for (let i = 0, _i = map.length; i < _i; i++) ret[i] = array[map[i]];
     return arrayColumn({ array: ret, schema: c.schema, valueKind: c.valueKind });
@@ -328,7 +328,7 @@ function viewFull<T>(c: Column<T>, map: ArrayLike<number>): Column<T> {
     const rowCount = map.length;
     return {
         schema: c.schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: c.isDefined,
         rowCount,
         value,
@@ -351,12 +351,12 @@ function mapToArrayImpl<T, S>(c: Column<T>, f: (v: T) => S, ctor: Column.ArrayCt
 function areColumnsEqual(a: Column<any>, b: Column<any>) {
     if (a === b) return true;
     if (a.rowCount !== b.rowCount || a.isDefined !== b.isDefined || a.schema.valueType !== b.schema.valueType) return false;
-    if (!!a['@array'] && !!b['@array']) return areArraysEqual(a, b);
+    if (!!a.__array && !!b.__array) return areArraysEqual(a, b);
     return areValuesEqual(a, b);
 }
 
 function areArraysEqual(a: Column<any>, b: Column<any>) {
-    const xs = a['@array']!, ys = b['@array']!;
+    const xs = a.__array!, ys = b.__array!;
     for (let i = 0, _i = a.rowCount; i < _i; i++) {
         if (xs[i] !== ys[i]) return false;
     }

+ 7 - 7
src/mol-io/common/binary-cif/array-encoder.ts

@@ -100,7 +100,7 @@ export namespace ArrayEncoding {
         [Encoding.FloatDataType.Float64]: 8
     }
 
-    export function byteArray(data: Encoding.FloatArray | Encoding.IntArray) {
+    export function byteArray(data: Encoding.TypedFloatArray | Encoding.TypedIntArray) {
         let type = Encoding.getDataType(data);
 
         if (type === Encoding.IntDataType.Int8) return int8(data as Int8Array);
@@ -118,7 +118,7 @@ export namespace ArrayEncoding {
         };
     }
 
-    function _fixedPoint(data: Encoding.FloatArray, factor: number): Result {
+    function _fixedPoint(data: Encoding.TypedFloatArray, factor: number): Result {
         let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
         let result = new Int32Array(data.length);
         for (let i = 0, n = data.length; i < n; i++) {
@@ -129,9 +129,9 @@ export namespace ArrayEncoding {
             data: result
         };
     }
-    export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.FloatArray, factor); }
+    export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.TypedFloatArray, factor); }
 
-    function _intervalQuantizaiton(data: Encoding.FloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray): Result {
+    function _intervalQuantizaiton(data: Encoding.TypedFloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.TypedIntArray): Result {
         let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
         if (!data.length) {
             return {
@@ -161,11 +161,11 @@ export namespace ArrayEncoding {
             data: output
         };
     }
-    export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray = Int32Array): Provider {
-        return data => _intervalQuantizaiton(data as Encoding.FloatArray, min, max, numSteps, arrayType);
+    export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.TypedIntArray = Int32Array): Provider {
+        return data => _intervalQuantizaiton(data as Encoding.TypedFloatArray, min, max, numSteps, arrayType);
     }
 
-    export function runLength(data: Encoding.IntArray): Result {
+    export function runLength(data: Encoding.TypedIntArray): Result {
         let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
         if (srcType === void 0) {
             data = new Int32Array(data);

+ 173 - 0
src/mol-io/common/binary-cif/classifier.ts

@@ -0,0 +1,173 @@
+
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { ArrayEncoder, ArrayEncoding as E } from './array-encoder';
+import { getArrayMantissaMultiplier } from 'mol-util/number';
+
+export function classifyIntArray(xs: ArrayLike<number>) {
+    return IntClassifier.classify(xs as number[]);
+}
+
+export function classifyFloatArray(xs: ArrayLike<number>) {
+    return FloatClassifier.classify(xs as number[]);
+}
+
+namespace IntClassifier {
+    function packSize(value: number, upperLimit: number) {
+        return value >= 0
+            ? Math.ceil((value + 1) / upperLimit)
+            : Math.ceil((value + 1) / (-upperLimit - 1));
+    }
+
+    type IntColumnInfo = { signed: boolean, limit8: number, limit16: number };
+
+    function getInfo(data: number[]): IntColumnInfo {
+        let signed = false;
+        for (let i = 0, n = data.length; i < n; i++) {
+            if (data[i] < 0) {
+                signed = true;
+                break;
+            }
+        }
+        return signed ? { signed, limit8: 0x7F, limit16: 0x7FFF } : { signed, limit8: 0xFF, limit16: 0xFFFF };
+    }
+
+    type SizeInfo = { pack8: number, pack16: number, count: number }
+    function SizeInfo(): SizeInfo { return { pack8: 0, pack16: 0, count: 0 } };
+
+    function incSize({ limit8, limit16 }: IntColumnInfo, info: SizeInfo, value: number) {
+        info.pack8 += packSize(value, limit8);
+        info.pack16 += packSize(value, limit16);
+        info.count += 1;
+    }
+
+    function incSizeSigned(info: SizeInfo, value: number) {
+        info.pack8 += packSize(value, 0x7F);
+        info.pack16 += packSize(value, 0x7FFF);
+        info.count += 1;
+    }
+
+    function byteSize(info: SizeInfo) {
+        if (info.count * 4 < info.pack16 * 2) return { length: info.count * 4, elem: 4 };
+        if (info.pack16 * 2 < info.pack8) return { length: info.pack16 * 2, elem: 2 };
+        return { length: info.pack8, elem: 1 };
+    }
+
+    function packingSize(data: number[], info: IntColumnInfo) {
+        const size = SizeInfo();
+        for (let i = 0, n = data.length; i < n; i++) {
+            incSize(info, size, data[i]);
+        }
+        return { ...byteSize(size), kind: 'pack' };
+    }
+
+    function deltaSize(data: number[], info: IntColumnInfo) {
+        const size = SizeInfo();
+        let prev = data[0];
+        for (let i = 1, n = data.length; i < n; i++) {
+            incSizeSigned(size, data[i] - prev);
+            prev = data[i];
+        }
+        return { ...byteSize(size), kind: 'delta' };
+    }
+
+    function rleSize(data: number[], info: IntColumnInfo) {
+        const size = SizeInfo();
+        let run = 1;
+        for (let i = 1, n = data.length; i < n; i++) {
+            if (data[i - 1] !== data[i]) {
+                incSize(info, size, data[i - 1]);
+                incSize(info, size, run);
+                run = 1;
+            } else {
+                run++;
+            }
+        }
+        incSize(info, size, data[data.length - 1]);
+        incSize(info, size, run);
+
+        return { ...byteSize(size), kind: 'rle' };
+    }
+
+    function deltaRleSize(data: number[], info: IntColumnInfo) {
+        const size = SizeInfo();
+        let run = 1, prev = 0, prevValue = 0;
+        for (let i = 1, n = data.length; i < n; i++) {
+            const v = data[i] - prev;
+            if (prevValue !== v) {
+                incSizeSigned(size, prevValue);
+                incSizeSigned(size, run);
+                run = 1;
+            } else {
+                run++;
+            }
+            prevValue = v;
+            prev = data[i];
+        }
+        incSizeSigned(size, prevValue);
+        incSizeSigned(size, run);
+
+        return { ...byteSize(size), kind: 'delta-rle' };
+    }
+
+    export function getSize(data: number[]) {
+        const info = getInfo(data);
+        const sizes = [packingSize(data, info), rleSize(data, info), deltaSize(data, info), deltaRleSize(data, info)];
+        sizes.sort((a, b) => a.length - b.length);
+        return sizes;
+    }
+
+    export function classify(data: number[]): ArrayEncoder {
+        if (data.length < 2) return E.by(E.byteArray);
+
+        const sizes = getSize(data);
+        const size = sizes[0];
+
+        switch (size.kind) {
+            case 'pack': return E.by(E.integerPacking);
+            case 'rle': return E.by(E.runLength).and(E.integerPacking);
+            case 'delta': return E.by(E.delta).and(E.integerPacking);
+            case 'delta-rle': return E.by(E.delta).and(E.runLength).and(E.integerPacking);
+        }
+
+        throw new Error('should not happen :)');
+    }
+}
+
+namespace FloatClassifier {
+    const delta = 1e-6;
+    export function classify(data: number[]) {
+        const digitCount = getArrayMantissaMultiplier(data, 4, delta);
+        if (digitCount < 0) return { encoder: E.by(E.byteArray), typedArray: Float64Array };
+
+        // TODO: check for overflows here?
+        if (digitCount === 1) return { encoder: IntClassifier.classify(data), typedArray: Int32Array }
+
+        const intArray = new Int32Array(data.length);
+        for (let i = 0, n = data.length; i < n; i++) {
+            const v = digitCount * data[i];
+            intArray[i] = v;
+            // check if the value didn't overflow
+            if (Math.abs(Math.round(v) / digitCount - intArray[i] / digitCount) > delta) {
+                return { encoder: E.by(E.byteArray), typedArray: Float64Array };
+            }
+        }
+
+        const sizes = IntClassifier.getSize(intArray as any);
+        const size = sizes[0];
+
+        const fp = E.by(E.fixedPoint(digitCount));
+        switch (size.kind) {
+            case 'pack': return { encoder: fp.and(E.integerPacking), typedArray: Float32Array };
+            case 'rle': return { encoder: fp.and(E.runLength).and(E.integerPacking), typedArray: Float32Array };
+            case 'delta': return { encoder: fp.and(E.delta).and(E.integerPacking), typedArray: Float32Array };
+            case 'delta-rle': return { encoder: fp.and(E.delta).and(E.runLength).and(E.integerPacking), typedArray: Float32Array };
+        }
+
+        throw new Error('should not happen :)');
+    }
+}

+ 4 - 4
src/mol-io/common/binary-cif/encoding.ts

@@ -71,10 +71,10 @@ export namespace Encoding {
 
     export type DataType = IntDataType | FloatDataType
 
-    export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array
-    export type FloatArray = Float32Array | Float64Array
+    export type TypedIntArray = Helpers.TypedIntArray
+    export type TypedFloatArray = Helpers.TypedFloatArray
 
-    export function getDataType(data: IntArray | FloatArray): DataType {
+    export function getDataType(data: TypedIntArray | TypedFloatArray): DataType {
         let srcType: DataType;
         if (data instanceof Int8Array) srcType = Encoding.IntDataType.Int8;
         else if (data instanceof Int16Array) srcType = Encoding.IntDataType.Int16;
@@ -88,7 +88,7 @@ export namespace Encoding {
         return srcType;
     }
 
-    export function isSignedIntegerDataType(data: IntArray) {
+    export function isSignedIntegerDataType(data: TypedIntArray) {
         return data instanceof Int8Array || data instanceof Int16Array || data instanceof Int32Array;
     }
 

+ 1 - 1
src/mol-io/reader/cif/binary/field.ts

@@ -38,7 +38,7 @@ export default function Field(column: EncodedColumn): Data.CifField {
     const rowCount = data.length;
 
     return {
-        '@array': data,
+        __array: data,
         isDefined: true,
         rowCount,
         str,

+ 1 - 1
src/mol-io/reader/cif/data-model.ts

@@ -62,7 +62,7 @@ export namespace CifCategory {
  * This is to ensure that the functions can invoked without having to "bind" them.
  */
 export interface CifField {
-    readonly '@array': ArrayLike<any> | undefined
+    readonly __array: ArrayLike<any> | undefined
     readonly isDefined: boolean,
     readonly rowCount: number,
 

+ 3 - 3
src/mol-io/reader/cif/schema.ts

@@ -41,7 +41,7 @@ function getColumnCtor(t: Column.Schema): ColumnCtor {
 function createColumn<T>(schema: Column.Schema, field: Data.CifField, value: (row: number) => T, toArray: Column<T>['toArray']): Column<T> {
     return {
         schema,
-        '@array': field['@array'],
+        __array: field.__array,
         isDefined: field.isDefined,
         rowCount: field.rowCount,
         value,
@@ -61,7 +61,7 @@ function createListColumn<T extends number|string>(schema: Column.Schema.List<T>
 
     return {
         schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: !!f,
         rowCount: category.rowCount,
         value,
@@ -89,7 +89,7 @@ function createTensorColumn(schema: Column.Schema.Tensor, category: Data.CifCate
 
     return {
         schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: first.isDefined,
         rowCount: category.rowCount,
         value,

+ 1 - 1
src/mol-io/reader/cif/text/field.ts

@@ -38,7 +38,7 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Cif
     };
 
     return {
-        '@array': void 0,
+        __array: void 0,
         isDefined: true,
         rowCount,
         str,

+ 1 - 1
src/mol-io/reader/common/text/column/fixed.ts

@@ -35,7 +35,7 @@ export function FixedColumn<T extends Column.Schema>(lines: Tokens, offset: numb
     };
     return {
         schema: schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: true,
         rowCount,
         value,

+ 1 - 1
src/mol-io/reader/common/text/column/token.ts

@@ -27,7 +27,7 @@ export function TokenColumn<T extends Column.Schema>(tokens: Tokens, schema: T):
 
     return {
         schema: schema,
-        '@array': void 0,
+        __array: void 0,
         isDefined: true,
         rowCount,
         value,

+ 5 - 7
src/mol-script/script/mol-script/symbols.ts

@@ -4,14 +4,12 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { MSymbol, Arguments, Argument } from '../../language/symbol'
-import { MolScriptBuilder as B } from '../../language/builder'
+import { UniqueArray } from 'mol-data/generic';
+import Expression from '../../language/expression';
+import { Argument, MSymbol } from '../../language/symbol';
 //import * as M from './macro'
-import { MolScriptSymbolTable as MolScript } from '../../language/symbol-table'
-import Type from '../../language/type'
-import * as Struct from '../../language/symbol-table/structure-query'
-import Expression from '../../language/expression'
-import { UniqueArray } from 'mol-data/generic'
+import { MolScriptSymbolTable as MolScript } from '../../language/symbol-table';
+import Type from '../../language/type';
 
 export type MolScriptSymbol =
     | { kind: 'alias', aliases: string[], symbol: MSymbol }

+ 33 - 0
src/mol-util/number.ts

@@ -0,0 +1,33 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ */
+
+/**
+ * Determine the number of digits in a floating point number
+ * Find a number M such that round(M * v) - M * v <= delta.
+ * If no such M exists, return -1.
+ */
+export function getMantissaMultiplier(v: number, maxDigits: number, delta: number) {
+    let m = 1;
+    for (let i = 0; i < maxDigits; i++) {
+        let mv = m * v;
+        if (Math.abs(Math.round(mv) - mv) <= delta) return m;
+        m *= 10;
+    }
+    return -1;
+}
+
+/**
+ * Determine the maximum number of digits in a floating point array.
+ * Find a number M such that round(M * v) - M * v <= delta.
+ * If no such M exists, return -1.
+ */
+export function getArrayMantissaMultiplier(xs: ArrayLike<number>, maxDigits: number, delta: number) {
+    let m = 1;
+    for (let i = 0, _i = xs.length; i < _i; i++) {
+        const t = getMantissaMultiplier(xs[i], maxDigits, delta);
+        if (t < 0) return -1;
+        if (t > m) m = t;
+    }
+    return m;
+}