David Sehnal 7 년 전
부모
커밋
bbb3479511

+ 16 - 0
src/apps/cif2bcif.ts

@@ -0,0 +1,16 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import * as fs from 'fs'
+import convert from './cif2bcif/converter'
+
+(async function () {
+    const src = process.argv[2];
+    const out = process.argv[3];
+
+    const res = await convert(src);
+    fs.writeFileSync(out, res);
+}());

+ 54 - 0
src/apps/cif2bcif/converter.ts

@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import Iterator from 'mol-base/collections/iterator'
+import CIF, { Category } from 'mol-io/reader/cif'
+import TextCIFEncoder from 'mol-io/writer/cif/encoder/text'
+import BinaryCIFEncoder from 'mol-io/writer/cif/encoder/binary'
+import * as Encoder from 'mol-io/writer/cif/encoder'
+import * as fs from 'fs'
+import classify from './field-classifier'
+
+async function getCIF(path: string) {
+    const str = fs.readFileSync(path, 'utf8');
+    const parsed = await CIF.parseText(str)();
+    if (parsed.isError) {
+        throw new Error(parsed.toString());
+    }
+    return parsed.result;
+}
+
+function createDefinition(cat: Category): Encoder.CategoryDefinition {
+    return {
+        name: cat.name,
+        fields: cat.fieldNames.map(n => classify(n, cat.getField(n)!))
+    }
+}
+
+function getCategoryInstanceProvider(cat: Category): Encoder.CategoryProvider {
+    return function (ctx: any) {
+        return {
+            data: cat,
+            definition: createDefinition(cat),
+            keys: () => Iterator.Range(0, cat.rowCount - 1),
+            rowCount: cat.rowCount
+        };
+    }
+}
+
+export default async function convert(path: string, asText = false) {
+    const cif = await getCIF(path);
+
+    const encoder = asText ? new TextCIFEncoder() : new BinaryCIFEncoder('mol* cif2bcif');
+    for (const b of cif.blocks) {
+        encoder.startDataBlock(b.header);
+        for (const _c of Object.keys(b.categories)) {
+            encoder.writeCategory(getCategoryInstanceProvider(b.categories[_c]));
+        }
+    }
+    return encoder.getData();
+}
+

+ 30 - 0
src/apps/cif2bcif/field-classifier.ts

@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column } from 'mol-base/collections/database'
+import { Field } from 'mol-io/reader/cif/data-model'
+import { FieldDefinition, FieldType } from 'mol-io/writer/cif/encoder'
+
+const intRegex = /^-?\d+$/
+const floatRegex = /^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?/
+
+function classify(name: string, field: Field): FieldDefinition {
+    let floatCount = 0, hasString = false;
+    for (let i = 0, _i = field.rowCount; i < _i; i++) {
+        const k = field.valueKind(i);
+        if (k !== Column.ValueKind.Present) continue;
+        const v = field.str(i);
+        if (intRegex.test(v)) continue;
+        else if (floatRegex.test(v)) floatCount++;
+        else { hasString = true; break; }
+    }
+
+    if (hasString) return { name, type: FieldType.Str, value: field.str, valueKind: field.valueKind };
+    if (floatCount > 0) return { name, type: FieldType.Float, value: field.float, valueKind: field.valueKind };
+    return { name, type: FieldType.Int, value: field.int, valueKind: field.valueKind };
+}
+
+export default classify;

+ 5 - 4
src/mol-data/structure/export/mmcif.ts

@@ -8,7 +8,8 @@ import { Column, Table } from 'mol-base/collections/database'
 import Iterator from 'mol-base/collections/iterator'
 import * as Encoder from 'mol-io/writer/cif/encoder'
 //import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'
-import CIFEncoder from 'mol-io/writer/cif/encoder/text'
+import TextCIFEncoder from 'mol-io/writer/cif/encoder/text'
+import BinaryCIFEncoder from 'mol-io/writer/cif/encoder/binary'
 import { Structure, Atom, AtomSet } from '../structure'
 import { Model } from '../model'
 import P from '../query/properties'
@@ -142,13 +143,13 @@ function atomSiteProvider({ structure }: Context): Encoder.CategoryInstance {
     }
 }
 
-function getCifString(name: string, structure: Structure) {
+function to_mmCIF(name: string, structure: Structure, asBinary = false) {
     const models = Structure.getModels(structure);
     if (models.length !== 1) throw 'cant export stucture composed from multiple models.';
     const model = models[0];
 
     const ctx: Context = { structure, model };
-    const w = new CIFEncoder();
+    const w = asBinary ? new BinaryCIFEncoder('mol*') : new TextCIFEncoder();
 
     w.startDataBlock(name);
     w.writeCategory(entityProvider, [ctx]);
@@ -156,4 +157,4 @@ function getCifString(name: string, structure: Structure) {
     return w.getData();
 }
 
-export default getCifString
+export default to_mmCIF

+ 11 - 0
src/mol-io/common/binary-cif.ts

@@ -0,0 +1,11 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import decode from './binary-cif/decoder'
+
+export * from './binary-cif/encoding'
+export * from './binary-cif/array-encoder'
+export { decode }

+ 396 - 0
src/mol-io/common/binary-cif/array-encoder.ts

@@ -0,0 +1,396 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js; MIT) and MMTF (https://github.com/rcsb/mmtf-javascript/; MIT)
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import ChunkedArray from 'mol-base/collections/chunked-array'
+import { Encoding, EncodedData } from './encoding'
+
+export interface ArrayEncoder {
+    and(f: ArrayEncoding.Provider): ArrayEncoder,
+    encode(data: ArrayLike<any>): EncodedData
+}
+
+export class ArrayEncoderImpl implements ArrayEncoder {
+    and(f: ArrayEncoding.Provider) {
+        return new ArrayEncoderImpl(this.providers.concat([f]));
+    }
+
+    encode(data: ArrayLike<any>): EncodedData {
+        let encoding: Encoding[] = [];
+        for (let p of this.providers) {
+            let t = p(data);
+
+            if (!t.encodings.length) {
+                throw new Error('Encodings must be non-empty.');
+            }
+
+            data = t.data;
+            for (let e of t.encodings) {
+                encoding.push(e);
+            }
+        }
+        if (!(data instanceof Uint8Array)) {
+            throw new Error('The encoding must result in a Uint8Array. Fix your encoding chain.');
+        }
+        return {
+            encoding,
+            data
+        }
+    }
+
+    constructor(private providers: ArrayEncoding.Provider[]) {
+
+    }
+}
+
+export namespace ArrayEncoder {
+    export function by(f: ArrayEncoding.Provider): ArrayEncoder {
+        return new ArrayEncoderImpl([f]);
+    }
+}
+
+export namespace ArrayEncoding {
+    export type TypedArrayCtor = { new(size: number): ArrayLike<number> & { buffer: ArrayBuffer, byteLength: number, byteOffset: number, BYTES_PER_ELEMENT: number } }
+
+    export interface Result {
+        encodings: Encoding[],
+        data: any
+    }
+
+    export type Provider = (data: any) => Result
+
+    export function by(f: Provider): ArrayEncoder {
+        return new ArrayEncoderImpl([f]);
+    }
+
+    function uint8(data: Uint8Array): Result {
+        return {
+            encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Uint8 }],
+            data
+        };
+    }
+
+    function int8(data: Int8Array): Result {
+        return {
+            encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Int8 }],
+            data: new Uint8Array(data.buffer, data.byteOffset)
+        };
+    }
+
+    const writers = {
+        [Encoding.IntDataType.Int16]: function (v: DataView, i: number, a: number) { v.setInt16(2 * i, a, true) },
+        [Encoding.IntDataType.Uint16]: function (v: DataView, i: number, a: number) { v.setUint16(2 * i, a, true) },
+        [Encoding.IntDataType.Int32]: function (v: DataView, i: number, a: number) { v.setInt32(4 * i, a, true) },
+        [Encoding.IntDataType.Uint32]: function (v: DataView, i: number, a: number) { v.setUint32(4 * i, a, true) },
+        [Encoding.FloatDataType.Float32]: function (v: DataView, i: number, a: number) { v.setFloat32(4 * i, a, true) },
+        [Encoding.FloatDataType.Float64]: function (v: DataView, i: number, a: number) { v.setFloat64(8 * i, a, true) }
+    }
+
+    const byteSizes = {
+        [Encoding.IntDataType.Int16]: 2,
+        [Encoding.IntDataType.Uint16]: 2,
+        [Encoding.IntDataType.Int32]: 4,
+        [Encoding.IntDataType.Uint32]: 4,
+        [Encoding.FloatDataType.Float32]: 4,
+        [Encoding.FloatDataType.Float64]: 8
+    }
+
+    export function byteArray(data: Encoding.FloatArray | Encoding.IntArray) {
+        let type = Encoding.getDataType(data);
+
+        if (type === Encoding.IntDataType.Int8) return int8(data as Int8Array);
+        else if (type === Encoding.IntDataType.Uint8) return uint8(data as Uint8Array);
+
+        let result = new Uint8Array(data.length * byteSizes[type]);
+        let w = writers[type];
+        let view = new DataView(result.buffer);
+        for (let i = 0, n = data.length; i < n; i++) {
+            w(view, i, data[i]);
+        }
+        return {
+            encodings: [<Encoding.ByteArray>{ kind: 'ByteArray', type }],
+            data: result
+        };
+    }
+
+    function _fixedPoint(data: Encoding.FloatArray, factor: number): Result {
+        let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
+        let result = new Int32Array(data.length);
+        for (let i = 0, n = data.length; i < n; i++) {
+            result[i] = Math.round(data[i] * factor);
+        }
+        return {
+            encodings: [{ kind: 'FixedPoint', factor, srcType }],
+            data: result
+        };
+    }
+    export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.FloatArray, factor); }
+
+    function _intervalQuantizaiton(data: Encoding.FloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray): Result {
+        let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
+        if (!data.length) {
+            return {
+                encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
+                data: new Int32Array(0)
+            };
+        }
+
+        if (max < min) {
+            let t = min;
+            min = max;
+            max = t;
+        }
+
+        let delta = (max - min) / (numSteps - 1);
+
+        let output = new arrayType(data.length);
+        for (let i = 0, n = data.length; i < n; i++) {
+            let v = data[i];
+            if (v <= min) output[i] = 0;
+            else if (v >= max) output[i] = numSteps;
+            else output[i] = (Math.round((v - min) / delta)) | 0;
+        }
+
+        return {
+            encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
+            data: output
+        };
+    }
+    export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray = Int32Array): Provider {
+        return data => _intervalQuantizaiton(data as Encoding.FloatArray, min, max, numSteps, arrayType);
+    }
+
+    export function runLength(data: Encoding.IntArray): Result {
+        let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
+        if (srcType === void 0) {
+            data = new Int32Array(data);
+            srcType = Encoding.IntDataType.Int32;
+        }
+
+        if (!data.length) {
+            return {
+                encodings: [{ kind: 'RunLength', srcType, srcSize: 0 }],
+                data: new Int32Array(0)
+            };
+        }
+
+        // calculate output size
+        let fullLength = 2;
+        for (let i = 1, il = data.length; i < il; i++) {
+            if (data[i - 1] !== data[i]) {
+                fullLength += 2;
+            }
+        }
+        let output = new Int32Array(fullLength);
+        let offset = 0;
+        let runLength = 1;
+        for (let i = 1, il = data.length; i < il; i++) {
+            if (data[i - 1] !== data[i]) {
+                output[offset] = data[i - 1];
+                output[offset + 1] = runLength;
+                runLength = 1;
+                offset += 2;
+            } else {
+                ++runLength;
+            }
+        }
+        output[offset] = data[data.length - 1];
+        output[offset + 1] = runLength;
+        return {
+            encodings: [{ kind: 'RunLength', srcType, srcSize: data.length }],
+            data: output
+        };
+    }
+
+    export function delta(data: Int8Array | Int16Array | Int32Array): Result {
+        if (!Encoding.isSignedIntegerDataType(data)) {
+            throw new Error('Only signed integer types can be encoded using delta encoding.');
+        }
+
+        let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
+        if (srcType === void 0) {
+            data = new Int32Array(data);
+            srcType = Encoding.IntDataType.Int32;
+        }
+        if (!data.length) {
+            return {
+                encodings: [{ kind: 'Delta', origin: 0, srcType }],
+                data: new (data as any).constructor(0)
+            };
+        }
+
+        let output = new (data as any).constructor(data.length);
+        let origin = data[0];
+        output[0] = data[0];
+        for (let i = 1, n = data.length; i < n; i++) {
+            output[i] = data[i] - data[i - 1];
+        }
+        output[0] = 0;
+        return {
+            encodings: [{ kind: 'Delta', origin, srcType }],
+            data: output
+        };
+    }
+
+    function isSigned(data: Int32Array) {
+        for (let i = 0, n = data.length; i < n; i++) {
+            if (data[i] < 0) return true;
+        }
+        return false;
+    }
+
+    function packingSize(data: Int32Array, upperLimit: number) {
+        let lowerLimit = -upperLimit - 1;
+        let size = 0;
+        for (let i = 0, n = data.length; i < n; i++) {
+            let value = data[i];
+            if (value === 0) {
+                size += 1;
+            } else if (value > 0) {
+                size += Math.ceil(value / upperLimit);
+                if (value % upperLimit === 0) size += 1;
+            } else {
+                size += Math.ceil(value / lowerLimit);
+                if (value % lowerLimit === 0) size += 1;
+            }
+        }
+        return size;
+    }
+
+    function determinePacking(data: Int32Array): { isSigned: boolean, size: number, bytesPerElement: number } {
+        let signed = isSigned(data);
+        let size8 = signed ? packingSize(data, 0x7F) : packingSize(data, 0xFF);
+        let size16 = signed ? packingSize(data, 0x7FFF) : packingSize(data, 0xFFFF);
+
+        if (data.length * 4 < size16 * 2) {
+            // 4 byte packing is the most effective
+            return {
+                isSigned: signed,
+                size: data.length,
+                bytesPerElement: 4
+            };
+        } else if (size16 * 2 < size8) {
+            // 2 byte packing is the most effective
+            return {
+                isSigned: signed,
+                size: size16,
+                bytesPerElement: 2
+            }
+        } else {
+            // 1 byte packing is the most effective
+            return {
+                isSigned: signed,
+                size: size8,
+                bytesPerElement: 1
+            }
+        };
+    }
+
+    function _integerPacking(data: Int32Array, packing: { isSigned: boolean, size: number, bytesPerElement: number }): Result {
+        let upperLimit = packing.isSigned
+            ? (packing.bytesPerElement === 1 ? 0x7F : 0x7FFF)
+            : (packing.bytesPerElement === 1 ? 0xFF : 0xFFFF);
+
+        let lowerLimit = -upperLimit - 1;
+        let n = data.length;
+        let packed = packing.isSigned
+            ? packing.bytesPerElement === 1 ? new Int8Array(packing.size) : new Int16Array(packing.size)
+            : packing.bytesPerElement === 1 ? new Uint8Array(packing.size) : new Uint16Array(packing.size);
+        let j = 0;
+        for (let i = 0; i < n; i++) {
+            let value = data[i];
+            if (value >= 0) {
+                while (value >= upperLimit) {
+                    packed[j] = upperLimit;
+                    ++j;
+                    value -= upperLimit;
+                }
+            } else {
+                while (value <= lowerLimit) {
+                    packed[j] = lowerLimit;
+                    ++j;
+                    value -= lowerLimit;
+                }
+            }
+            packed[j] = value;
+            ++j;
+        }
+
+        let result = byteArray(packed);
+        return {
+            encodings: [{
+                kind: 'IntegerPacking',
+                byteCount: packing.bytesPerElement,
+                isUnsigned: !packing.isSigned,
+                srcSize: n
+            },
+            result.encodings[0]
+            ],
+            data: result.data
+        };
+    }
+
+    /**
+     * Packs Int32 array. The packing level is determined automatically to either 1-, 2-, or 4-byte words.
+     */
+    export function integerPacking(data: Int32Array): Result {
+        if (!(data instanceof Int32Array)) {
+            throw new Error('Integer packing can only be applied to Int32 data.');
+        }
+
+        let packing = determinePacking(data);
+
+        if (packing.bytesPerElement === 4) {
+            // no packing done, Int32 encoding will be used
+            return byteArray(data);
+        }
+
+        return _integerPacking(data, packing);
+    }
+
+    export function stringArray(data: string[]): Result {
+        let map: any = Object.create(null);
+        let strings: string[] = [];
+        let accLength = 0;
+        let offsets = ChunkedArray.create<number>(s => new Int32Array(s), 1, 1024, true)
+        let output = new Int32Array(data.length);
+
+        ChunkedArray.add(offsets, 0);
+        let i = 0;
+        for (let s of data) {
+            // handle null strings.
+            if (s === null || s === void 0) {
+                output[i++] = -1;
+                continue;
+            }
+
+            let index = map[s];
+            if (index === void 0) {
+                // increment the length
+                accLength += s.length;
+
+                // store the string and index
+                index = strings.length;
+                strings[index] = s;
+                map[s] = index;
+
+                // write the offset
+                ChunkedArray.add(offsets, accLength);
+            }
+            output[i++] = index;
+        }
+
+        let encOffsets = ArrayEncoder.by(delta).and(integerPacking).encode(ChunkedArray.compact(offsets));
+        let encOutput = ArrayEncoder.by(delta).and(runLength).and(integerPacking).encode(output);
+
+        return {
+            encodings: [{ kind: 'StringArray', dataEncoding: encOutput.encoding, stringData: strings.join(''), offsetEncoding: encOffsets.encoding, offsets: encOffsets.data }],
+            data: encOutput.data
+        };
+    }
+}

+ 0 - 0
src/mol-io/reader/cif/binary/decoder.ts → src/mol-io/common/binary-cif/decoder.ts


+ 0 - 0
src/mol-io/reader/cif/binary/encoding.ts → src/mol-io/common/binary-cif/encoding.ts


+ 0 - 0
src/mol-io/utils/msgpack/decode.ts → src/mol-io/common/msgpack/decode.ts


+ 0 - 0
src/mol-io/utils/msgpack/encode.ts → src/mol-io/common/msgpack/encode.ts


+ 0 - 0
src/mol-io/utils/utf8.ts → src/mol-io/common/utf8.ts


+ 2 - 21
src/mol-io/reader/cif/binary/field.ts

@@ -6,8 +6,7 @@
 
 import { Column, ColumnHelpers } from 'mol-base/collections/database'
 import * as Data from '../data-model'
-import { EncodedColumn } from './encoding'
-import decode from './decoder'
+import { EncodedColumn, decode } from '../../../common/binary-cif'
 import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser'
 
 function wrap(o: Data.Field) {
@@ -58,22 +57,4 @@ export default function Field(column: EncodedColumn): Data.Field {
             ? params => ColumnHelpers.typedArrayWindow(data, params)
             : params => ColumnHelpers.createAndFillArray(rowCount, float, params)
     });
-}
-
-// return wrap({
-//     '@array': data,
-//     isDefined: true,
-//     rowCount,
-//     str: str as any,
-//     int,
-//     float,
-//     valueKind,
-//     areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB],
-//     toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
-//     toIntArray: isNumeric
-//         ? params => ColumnHelpers.typedArrayWindow(data, params)
-//         : params => ColumnHelpers.createAndFillArray(rowCount, int, params),
-//     toFloatArray: isNumeric
-//         ? params => ColumnHelpers.typedArrayWindow(data, params)
-//         : params => ColumnHelpers.createAndFillArray(rowCount, float, params)
-// });
+}

+ 6 - 4
src/mol-io/reader/cif/binary/parser.ts

@@ -5,10 +5,10 @@
  */
 
 import * as Data from '../data-model'
-import * as Encoding from './encoding'
+import { EncodedCategory, EncodedFile } from '../../../common/binary-cif'
 import Field from './field'
 import Result from '../../result'
-import decodeMsgPack from '../../../utils/msgpack/decode'
+import decodeMsgPack from '../../../common/msgpack/decode'
 import Computation from 'mol-base/computation'
 
 function checkVersions(min: number[], current: number[]) {
@@ -18,12 +18,14 @@ function checkVersions(min: number[], current: number[]) {
     return true;
 }
 
-function Category(data: Encoding.EncodedCategory): Data.Category {
+function Category(data: EncodedCategory): Data.Category {
     const map = Object.create(null);
     const cache = Object.create(null);
     for (const col of data.columns) map[col.name] = col;
     return {
         rowCount: data.rowCount,
+        name: data.name,
+        fieldNames: data.columns.map(c => c.name),
         getField(name) {
             const col = map[name];
             if (!col) return void 0;
@@ -39,7 +41,7 @@ export default function parse(data: Uint8Array) {
         const minVersion = [0, 3];
 
         try {
-            const unpacked = decodeMsgPack(data) as Encoding.EncodedFile;
+            const unpacked = decodeMsgPack(data) as EncodedFile;
             if (!checkVersions(minVersion, unpacked.version.match(/(\d)\.(\d)\.\d/)!.slice(1).map(v => +v))) {
                 return Result.error<Data.File>(`Unsupported format version. Current ${unpacked.version}, required ${minVersion.join('.')}.`);
             }

+ 7 - 3
src/mol-io/reader/cif/data-model.ts

@@ -39,15 +39,19 @@ export type Categories = { readonly [name: string]: Category }
 
 export interface Category {
     readonly rowCount: number,
+    readonly name: string,
+    readonly fieldNames: ReadonlyArray<string>,
     getField(name: string): Field | undefined
 }
 
-export function Category(rowCount: number, fields: { [name: string]: Field }): Category {
-    return { rowCount, getField(name) { return fields[name]; } };
+export function Category(name: string, rowCount: number, fieldNames: string[], fields: { [name: string]: Field }): Category {
+    return { rowCount, name, fieldNames: [...fieldNames], getField(name) { return fields[name]; } };
 }
 
 export namespace Category {
-    export const Empty: Category = { rowCount: 0, getField(name: string) { return void 0; } };
+    export function empty(name: string): Category {
+        return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } };
+    };
 }
 
 /**

+ 1 - 1
src/mol-io/reader/cif/schema.ts

@@ -88,5 +88,5 @@ function createDatabase(schema: Database.Schema, frame: Data.Frame): Database<an
 
 function createTable(key: string, schema: Table.Schema, frame: Data.Frame) {
     const cat = frame.categories[key[0] === '_' ? key : '_' + key];
-    return new CategoryTable(cat || Data.Category.Empty, schema, !!cat);
+    return new CategoryTable(cat || Data.Category.empty(key), schema, !!cat);
 }

+ 4 - 2
src/mol-io/reader/cif/text/parser.ts

@@ -418,6 +418,7 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
     const nsStart = tokenizer.tokenStart, nsEnd = getNamespaceEnd(tokenizer);
     const name = getNamespace(tokenizer, nsEnd);
     const fields = Object.create(null);
+    const fieldNames: string[] = [];
 
     let readingNames = true;
     while (readingNames) {
@@ -436,10 +437,11 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
             }
         }
         fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 }, 1);
+        fieldNames[fieldNames.length] = fieldName;
         moveNext(tokenizer);
     }
 
-    categories[name] = Data.Category(1, fields);
+    categories[name] = Data.Category(name.substr(1), 1, fieldNames, fields);
 
     return {
         hasError: false,
@@ -517,7 +519,7 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string
         fields[fieldNames[i]] = Field(tokens[i], rowCount);
     }
 
-    categories[name] = Data.Category(rowCount, fields);
+    categories[name] = Data.Category(name.substr(1), rowCount, fieldNames, fields);
 
     return {
         hasError: false,

+ 13 - 3
src/mol-io/writer/cif/encoder.ts

@@ -7,6 +7,7 @@
 import Iterator from 'mol-base/collections/iterator'
 import { Column } from 'mol-base/collections/database'
 import Encoder from '../encoder'
+//import { ArrayEncoder, ArrayEncoding as E } from '../../common/binary-cif'
 
 export const enum FieldType {
     Str, Int, Float
@@ -19,15 +20,24 @@ export interface FieldDefinition<Key = any, Data = any> {
     valueKind?: (key: Key, data: Data) => Column.ValueKind
 
     /** determine whether to include this field base on the context */
-    shouldInclude?: (data: Data) => boolean
+    // TODO:
+    // shouldInclude?: (data: Data) => boolean
 }
 
 export interface FieldFormat {
-    decimalPlaces: number
+    // TODO
+    // textDecimalPlaces: number,
+    // stringEncoder: ArrayEncoder,
+    // numericEncoder: ArrayEncoder,
+    // typedArray?: E.TypedArrayCtor
 }
 
 export namespace FieldFormat {
-    export const Default: FieldFormat = { decimalPlaces: 3 };
+    export const Default: FieldFormat = {
+        // textDecimalPlaces: 3,
+        // stringEncoder: ArrayEncoder.by(E.stringArray),
+        // numericEncoder: ArrayEncoder.by(E.byteArray)
+    };
 }
 
 export interface CategoryDefinition<Key = any, Data = any> {

+ 135 - 0
src/mol-io/writer/cif/encoder/binary.ts

@@ -0,0 +1,135 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js)
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import Iterator from 'mol-base/collections/iterator'
+import { Column } from 'mol-base/collections/database'
+import encodeMsgPack from '../../../common/msgpack/encode'
+import {
+    EncodedColumn, EncodedData, EncodedFile, EncodedDataBlock, EncodedCategory, ArrayEncoder, ArrayEncoding as E, VERSION
+} from '../../../common/binary-cif'
+import { FieldDefinition, FieldFormat, FieldType, CategoryProvider, CIFEncoder } from '../encoder'
+import Writer from '../../writer'
+
+export default class BinaryCIFWriter<Context> implements CIFEncoder<Uint8Array, Context> {
+    private data: EncodedFile;
+    private dataBlocks: EncodedDataBlock[] = [];
+    private encodedData: Uint8Array;
+
+    startDataBlock(header: string) {
+        this.dataBlocks.push({
+            header: (header || '').replace(/[ \n\t]/g, '').toUpperCase(),
+            categories: []
+        });
+    }
+
+    writeCategory(category: CategoryProvider, contexts?: Context[]) {
+        if (!this.data) {
+            throw new Error('The writer contents have already been encoded, no more writing.');
+        }
+
+        if (!this.dataBlocks.length) {
+            throw new Error('No data block created.');
+        }
+
+        const src = !contexts || !contexts.length ? [category(<any>void 0)] : contexts.map(c => category(c));
+        const categories = src.filter(c => c && c.rowCount > 0);
+        if (!categories.length) return;
+
+        const count = categories.reduce((a, c) => a + c.rowCount, 0);
+        if (!count) return;
+
+        const first = categories[0]!;
+        const cat: EncodedCategory = { name: '_' + first.definition.name, columns: [], rowCount: count };
+        const data = categories.map(c => ({ data: c.data, keys: () => c.keys() }));
+        for (const f of first.definition.fields) {
+            cat.columns.push(encodeField(f, data, count, FieldFormat.Default));
+        }
+        this.dataBlocks[this.dataBlocks.length - 1].categories.push(cat);
+    }
+
+    encode() {
+        if (this.encodedData) return;
+        this.encodedData = encodeMsgPack(this.data);
+        this.data = <any>null;
+        this.dataBlocks = <any>null;
+    }
+
+    writeTo(writer: Writer<Uint8Array>) {
+        writer.write(this.encodedData);
+    }
+
+    getData() {
+        this.encode();
+        return this.encodedData;
+    }
+
+    constructor(encoder: string) {
+        this.data = {
+            encoder,
+            version: VERSION,
+            dataBlocks: this.dataBlocks
+        };
+    }
+}
+
+function encodeField(field: FieldDefinition, data: { data: any, keys: () => Iterator<any> }[], totalCount: number, format: FieldFormat): EncodedColumn {
+    const isStr = field.type === FieldType.Str
+    let array: any[], encoder: ArrayEncoder;
+
+    if (isStr) {
+        array = new Array(totalCount);
+        encoder = ArrayEncoder.by(E.stringArray); //format.stringEncoder;
+    } else {
+        //array = format.typedArray ? new format.typedArray(totalCount) as any : field.type === FieldType.Int ? new Int32Array(totalCount) : new Float32Array(totalCount);
+        array = (field.type === FieldType.Int ? new Int32Array(totalCount) : new Float32Array(totalCount)) as any;
+        encoder = ArrayEncoder.by(E.byteArray);
+    }
+
+    const mask = new Uint8Array(totalCount);
+    const valueKind = field.valueKind;
+    const getter = field.value;
+    let allPresent = true;
+
+    let offset = 0;
+    for (let _d = 0; _d < data.length; _d++) {
+        const d = data[_d].data;
+        const keys = data[_d].keys();
+        while (keys.hasNext) {
+            const key = keys.move();
+            const p = valueKind ? valueKind(key, d) : Column.ValueKind.Present;
+            if (p !== Column.ValueKind.Present) {
+                mask[offset] = p;
+                if (isStr) array[offset] = '';
+                allPresent = false;
+            } else {
+                mask[offset] = Column.ValueKind.Present;
+                array[offset] = getter(key, d);
+            }
+            offset++;
+        }
+    }
+
+    const encoded = encoder.encode(array);
+
+    let maskData: EncodedData | undefined = void 0;
+
+    if (!allPresent) {
+        const maskRLE = ArrayEncoder.by(E.runLength).and(E.byteArray).encode(mask);
+        if (maskRLE.data.length < mask.length) {
+            maskData = maskRLE;
+        } else {
+            maskData = ArrayEncoder.by(E.byteArray).encode(mask);
+        }
+    }
+
+    return {
+        name: field.name,
+        data: encoded,
+        mask: maskData
+    };
+}

+ 10 - 5
src/perf-tests/structure.ts

@@ -13,7 +13,7 @@ import CIF from 'mol-io/reader/cif'
 import { Structure, Model, Queries as Q, Atom, AtomSet, Selection } from 'mol-data/structure'
 import { OrderedSet as OrdSet, Segmentation } from 'mol-base/collections/integer'
 
-import toMmCIFString from 'mol-data/structure/export/mmcif'
+import to_mmCIF from 'mol-data/structure/export/mmcif'
 
 require('util.promisify').shim();
 const readFileAsync = util.promisify(fs.readFile);
@@ -237,10 +237,15 @@ export namespace PropertyAccess {
     // }
 
     export async function run() {
-        //const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
-        const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif');
+        const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
+        //const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif');
         //const { structures, models } = await readCIF('e:/test/quick/3j3q_updated.cif');
 
+        //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');
+
+        // fs.writeFileSync('e:/test/molstar/3j3q.bcif', to_mmCIF('test', structures[0], true));
+        // return;
+
         // console.log(toMmCIFString('test', structures[0]));
 
         // return;
@@ -280,8 +285,8 @@ export namespace PropertyAccess {
             chainTest: Q.pred.inSet(P.chain.auth_asym_id, ['A', 'B', 'C', 'D']),
             residueTest: Q.pred.eq(P.residue.auth_comp_id, 'ALA')
         });
-        // const q0r = q(structures[0]);
-        // console.log(toMmCIFString('test', Selection.union(q0r)));
+        const q0r = q(structures[0]);
+        console.log(to_mmCIF('test', Selection.union(q0r)));
 
         console.time('q1')
         q1(structures[0]);