Browse Source

working on mmCIF encoder

David Sehnal 6 years ago
parent
commit
d690f6308f

+ 2 - 2
package.json

@@ -18,8 +18,8 @@
     "test": "jest",
     "build-viewer": "webpack build/node_modules/apps/viewer/index.js --mode development -o build/viewer/index.js",
     "watch-viewer": "webpack build/node_modules/apps/viewer/index.js -w --mode development -o build/viewer/index.js",
-    "run-model-server": "node build/node_modules/servers/model/server.js",
-    "run-model-server-watch": "nodemon --watch build/node_modules build/node_modules/servers/model/server.js"
+    "model-server": "node build/node_modules/servers/model/server.js",
+    "model-server-watch": "nodemon --watch build/node_modules build/node_modules/servers/model/server.js"
   },
   "nodemonConfig": {
     "ignoreRoot": ["./node_modules", ".git"],

+ 12 - 10
src/mol-io/writer/cif.ts

@@ -8,21 +8,23 @@
 import TextCIFEncoder from './cif/encoder/text'
 import BinaryCIFEncoder from './cif/encoder/binary'
 import * as _Encoder from './cif/encoder'
-
-// export * from './cif/encoder'
-
-// export function createCIFEncoder(params?: { binary?: boolean, encoderName?: string }): CIFEncoder {
-//     const { binary = false, encoderName = 'mol*' } = params || {};
-//     return binary ? new BinaryCIFEncoder(encoderName) : new TextCIFEncoder();
-//}
+import { ArrayEncoding } from '../common/binary-cif';
 
 export namespace CifWriter {
-    export import Encoder = _Encoder.CIFEncoder
-    export import Category = _Encoder.CIFCategory
-    export import Field = _Encoder.CIFField
+    export import Encoder = _Encoder.Encoder
+    export import Category = _Encoder.Category
+    export import Field = _Encoder.Field
+    export import Encoding = ArrayEncoding
 
     export function createEncoder(params?: { binary?: boolean, encoderName?: string }): Encoder {
         const { binary = false, encoderName = 'mol*' } = params || {};
         return binary ? new BinaryCIFEncoder(encoderName) : new TextCIFEncoder();
     }
+
+    import E = Encoding
+    export const Encodings = {
+        deltaRLE: E.by(E.delta).and(E.runLength).and(E.integerPacking),
+        fixedPoint2: E.by(E.fixedPoint(100)).and(E.delta).and(E.integerPacking),
+        fixedPoint3: E.by(E.fixedPoint(1000)).and(E.delta).and(E.integerPacking),
+    };
 }

+ 33 - 29
src/mol-io/writer/cif/encoder.ts

@@ -8,7 +8,7 @@
 import Iterator from 'mol-data/iterator'
 import { Column, Table, Database, DatabaseCollection } from 'mol-data/db'
 import { Tensor } from 'mol-math/linear-algebra'
-import Encoder from '../encoder'
+import EncoderBase from '../encoder'
 import { ArrayEncoder, ArrayEncoding } from '../../common/binary-cif';
 
 // TODO: support for "coordinate fields", make "coordinate precision" a parameter of the encoder
@@ -18,15 +18,15 @@ import { ArrayEncoder, ArrayEncoding } from '../../common/binary-cif';
 // TODO: Add "higher level fields"? (i.e. generalization of repeat)
 // TODO: align "data blocks" to 8 byte offsets for fast typed array windows? (prolly needs some testing if this is actually the case too)
 
-export interface CIFField<Key = any, Data = any> {
+export interface Field<Key = any, Data = any> {
     name: string,
-    type: CIFField.Type,
+    type: Field.Type,
     valueKind?: (key: Key, data: Data) => Column.ValueKind,
-    defaultFormat?: CIFField.Format,
+    defaultFormat?: Field.Format,
     value(key: Key, data: Data): string | number
 }
 
-export namespace CIFField {
+export namespace Field {
     export const enum Type { Str, Int, Float }
 
     export interface Format {
@@ -35,16 +35,16 @@ export namespace CIFField {
         typedArray?: ArrayEncoding.TypedArrayCtor
     }
 
-    export function getDigitCount(field: CIFField) {
-        if (field.defaultFormat && typeof field.defaultFormat.digitCount !== 'undefined') return field.defaultFormat.digitCount;
+    export function getDigitCount(field: Field) {
+        if (field.defaultFormat && typeof field.defaultFormat.digitCount !== 'undefined') return Math.max(0, Math.min(field.defaultFormat.digitCount, 16));
         return 6;
     }
 
-    export function str<K, D = any>(name: string, value: (k: K, d: D) => string, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder }): CIFField<K, D> {
+    export function str<K, D = any>(name: string, value: (k: K, d: D) => string, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder }): Field<K, D> {
         return { name, type: Type.Str, value, valueKind: params && params.valueKind, defaultFormat: params && params.encoder ? { encoder: params.encoder } : void 0 };
     }
 
-    export function int<K, D = any>(name: string, value: (k: K, d: D) => number, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder, typedArray?: ArrayEncoding.TypedArrayCtor }): CIFField<K, D> {
+    export function int<K, D = any>(name: string, value: (k: K, d: D) => number, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder, typedArray?: ArrayEncoding.TypedArrayCtor }): Field<K, D> {
         return {
             name,
             type: Type.Int,
@@ -53,7 +53,7 @@ export namespace CIFField {
             defaultFormat: params ? { encoder: params.encoder, typedArray: params.typedArray } : void 0 };
     }
 
-    export function float<K, D = any>(name: string, value: (k: K, d: D) => number, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder, typedArray?: ArrayEncoding.TypedArrayCtor, digitCount?: number }): CIFField<K, D> {
+    export function float<K, D = any>(name: string, value: (k: K, d: D) => number, params?: { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder, typedArray?: ArrayEncoding.TypedArrayCtor, digitCount?: number }): Field<K, D> {
         return {
             name,
             type: Type.Float,
@@ -64,47 +64,51 @@ export namespace CIFField {
     }
 }
 
-export interface CIFCategory<Key = any, Data = any> {
+export interface Category<Key = any, Data = any> {
     name: string,
-    fields: CIFField<Key, Data>[],
+    fields: Field<Key, Data>[],
     data?: Data,
     rowCount: number,
     keys?: () => Iterator<Key>
 }
 
-export namespace CIFCategory {
+export namespace Category {
+    export const Empty: Category = { name: 'empty', rowCount: 0, fields: [] };
+
     export interface Provider<Ctx = any> {
-        (ctx: Ctx): CIFCategory
+        (ctx: Ctx): Category
     }
 
-    export function ofTable(name: string, table: Table<Table.Schema>): CIFCategory<number, Table<Table.Schema>> {
+    export function ofTable(name: string, table: Table<Table.Schema>, indices?: ArrayLike<number>): Category<number, Table<Table.Schema>> {
+        if (indices) {
+            return { name, fields: cifFieldsFromTableSchema(table._schema), data: table, rowCount: indices.length, keys: () => Iterator.Array(indices) };
+        }
         return { name, fields: cifFieldsFromTableSchema(table._schema), data: table, rowCount: table._rowCount };
     }
 }
 
-export interface CIFEncoder<T = string | Uint8Array> extends Encoder {
+export interface Encoder<T = string | Uint8Array> extends EncoderBase {
     // setFormatter(): void,
     startDataBlock(header: string): void,
-    writeCategory<Ctx>(category: CIFCategory.Provider<Ctx>, contexts?: Ctx[]): void,
+    writeCategory<Ctx>(category: Category.Provider<Ctx>, contexts?: Ctx[]): void,
     getData(): T
 }
 
-export namespace CIFEncoder {
-    export function writeDatabase(encoder: CIFEncoder, name: string, database: Database<Database.Schema>) {
+export namespace Encoder {
+    export function writeDatabase(encoder: Encoder, name: string, database: Database<Database.Schema>) {
         encoder.startDataBlock(name);
         for (const table of database._tableNames) {
-            encoder.writeCategory(() => CIFCategory.ofTable(table, database[table]));
+            encoder.writeCategory(() => Category.ofTable(table, database[table]));
         }
     }
 
-    export function writeDatabaseCollection(encoder: CIFEncoder, collection: DatabaseCollection<Database.Schema>) {
+    export function writeDatabaseCollection(encoder: Encoder, collection: DatabaseCollection<Database.Schema>) {
         for (const name of Object.keys(collection)) {
             writeDatabase(encoder, name, collection[name])
         }
     }
 }
 
-
 function columnValue(k: string) {
     return (i: number, d: any) => d[k].value(i);
 }
@@ -122,8 +126,8 @@ function columnValueKind(k: string) {
 }
 
 function getTensorDefinitions(field: string, space: Tensor.Space) {
-    const fieldDefinitions: CIFField[] = []
-    const type = CIFField.Type.Float
+    const fieldDefinitions: Field[] = []
+    const type = Field.Type.Float
     const valueKind = columnValueKind(field)
     if (space.rank === 1) {
         const rows = space.dimensions[0]
@@ -156,17 +160,17 @@ function getTensorDefinitions(field: string, space: Tensor.Space) {
 }
 
 function cifFieldsFromTableSchema(schema: Table.Schema) {
-    const fields: CIFField[] = [];
+    const fields: Field[] = [];
     for (const k of Object.keys(schema)) {
         const t = schema[k];
         if (t.valueType === 'int') {
-            fields.push({ name: k, type: CIFField.Type.Int, value: columnValue(k), valueKind: columnValueKind(k) });
+            fields.push({ name: k, type: Field.Type.Int, value: columnValue(k), valueKind: columnValueKind(k) });
         } else if (t.valueType === 'float') {
-            fields.push({ name: k, type: CIFField.Type.Float, value: columnValue(k), valueKind: columnValueKind(k) });
+            fields.push({ name: k, type: Field.Type.Float, value: columnValue(k), valueKind: columnValueKind(k) });
         } else if (t.valueType === 'str') {
-            fields.push({ name: k, type: CIFField.Type.Str, value: columnValue(k), valueKind: columnValueKind(k) });
+            fields.push({ name: k, type: Field.Type.Str, value: columnValue(k), valueKind: columnValueKind(k) });
         } else if (t.valueType === 'list') {
-            fields.push({ name: k, type: CIFField.Type.Str, value: columnListValue(k), valueKind: columnValueKind(k) })
+            fields.push({ name: k, type: Field.Type.Str, value: columnListValue(k), valueKind: columnValueKind(k) })
         } else if (t.valueType === 'tensor') {
             fields.push(...getTensorDefinitions(k, t.space))
         } else {

+ 8 - 8
src/mol-io/writer/cif/encoder/binary.ts

@@ -12,10 +12,10 @@ import encodeMsgPack from '../../../common/msgpack/encode'
 import {
     EncodedColumn, EncodedData, EncodedFile, EncodedDataBlock, EncodedCategory, ArrayEncoder, ArrayEncoding as E, VERSION
 } from '../../../common/binary-cif'
-import { CIFField, CIFCategory, CIFEncoder } from '../encoder'
+import { Field, Category, Encoder } from '../encoder'
 import Writer from '../../writer'
 
-export default class BinaryCIFWriter implements CIFEncoder<Uint8Array> {
+export default class BinaryCIFWriter implements Encoder<Uint8Array> {
     private data: EncodedFile;
     private dataBlocks: EncodedDataBlock[] = [];
     private encodedData: Uint8Array;
@@ -27,7 +27,7 @@ export default class BinaryCIFWriter implements CIFEncoder<Uint8Array> {
         });
     }
 
-    writeCategory<Ctx>(category: CIFCategory.Provider<Ctx>, contexts?: Ctx[]) {
+    writeCategory<Ctx>(category: Category.Provider<Ctx>, contexts?: Ctx[]) {
         if (!this.data) {
             throw new Error('The writer contents have already been encoded, no more writing.');
         }
@@ -77,14 +77,14 @@ export default class BinaryCIFWriter implements CIFEncoder<Uint8Array> {
     }
 }
 
-function createArray(field: CIFField, count: number) {
-    if (field.type === CIFField.Type.Str) return new Array(count) as any;
+function createArray(field: Field, count: number) {
+    if (field.type === Field.Type.Str) return new Array(count) as any;
     else if (field.defaultFormat && field.defaultFormat.typedArray) return new field.defaultFormat.typedArray(count) as any;
-    else return (field.type === CIFField.Type.Int ? new Int32Array(count) : new Float32Array(count)) as any;
+    else return (field.type === Field.Type.Int ? new Int32Array(count) : new Float32Array(count)) as any;
 }
 
-function encodeField(field: CIFField, data: { data: any, keys: () => Iterator<any> }[], totalCount: number, format?: CIFField.Format): EncodedColumn {
-    const isStr = field.type === CIFField.Type.Str;
+function encodeField(field: Field, data: { data: any, keys: () => Iterator<any> }[], totalCount: number, format?: Field.Format): EncodedColumn {
+    const isStr = field.type === Field.Type.Str;
     const array = createArray(field, totalCount);
     let encoder: ArrayEncoder;
 

+ 10 - 10
src/mol-io/writer/cif/encoder/text.ts

@@ -9,10 +9,10 @@
 import { Iterator } from 'mol-data'
 import { Column } from 'mol-data/db'
 import StringBuilder from 'mol-util/string-builder'
-import { CIFCategory, CIFField, CIFEncoder } from '../encoder'
+import { Category, Field, Encoder } from '../encoder'
 import Writer from '../../writer'
 
-export default class TextCIFEncoder implements CIFEncoder<string> {
+export default class TextCIFEncoder implements Encoder<string> {
     private builder = StringBuilder.create();
     private encoded = false;
     private dataBlockCreated = false;
@@ -22,7 +22,7 @@ export default class TextCIFEncoder implements CIFEncoder<string> {
         StringBuilder.write(this.builder, `data_${(header || '').replace(/[ \n\t]/g, '').toUpperCase()}\n#\n`);
     }
 
-    writeCategory<Ctx>(category: CIFCategory.Provider<Ctx>, contexts?: Ctx[]) {
+    writeCategory<Ctx>(category: Category.Provider<Ctx>, contexts?: Ctx[]) {
         if (this.encoded) {
             throw new Error('The writer contents have already been encoded, no more writing.');
         }
@@ -61,7 +61,7 @@ export default class TextCIFEncoder implements CIFEncoder<string> {
     }
 }
 
-function writeValue(builder: StringBuilder, data: any, key: any, f: CIFField<any, any>, floatPrecision: number): boolean {
+function writeValue(builder: StringBuilder, data: any, key: any, f: Field<any, any>, floatPrecision: number): boolean {
     const kind = f.valueKind;
     const p = kind ? kind(key, data) : Column.ValueKind.Present;
     if (p !== Column.ValueKind.Present) {
@@ -70,14 +70,14 @@ function writeValue(builder: StringBuilder, data: any, key: any, f: CIFField<any
     } else {
         const val = f.value(key, data);
         const t = f.type;
-        if (t === CIFField.Type.Str) {
+        if (t === Field.Type.Str) {
             if (isMultiline(val as string)) {
                 writeMultiline(builder, val as string);
                 return true;
             } else {
                 return writeChecked(builder, val as string);
             }
-        } else if (t === CIFField.Type.Int) {
+        } else if (t === Field.Type.Int) {
             writeInteger(builder, val as number);
         } else {
             writeFloat(builder, val as number, floatPrecision);
@@ -86,15 +86,15 @@ function writeValue(builder: StringBuilder, data: any, key: any, f: CIFField<any
     return false;
 }
 
-function getFloatPrecisions(cat: CIFCategory) {
+function getFloatPrecisions(cat: Category) {
     const ret: number[] = [];
     for (const f of cat.fields) {
-        ret[ret.length] = f.type === CIFField.Type.Float ? Math.pow(10, CIFField.getDigitCount(f)) : 0;
+        ret[ret.length] = f.type === Field.Type.Float ? Math.pow(10, Field.getDigitCount(f)) : 0;
     }
     return ret;
 }
 
-function writeCifSingleRecord(category: CIFCategory<any>, builder: StringBuilder) {
+function writeCifSingleRecord(category: Category<any>, builder: StringBuilder) {
     const fields = category.fields;
     const data = category.data;
     const width = fields.reduce((w, s) => Math.max(w, s.name.length), 0) + category.name.length + 6;
@@ -113,7 +113,7 @@ function writeCifSingleRecord(category: CIFCategory<any>, builder: StringBuilder
     StringBuilder.write(builder, '#\n');
 }
 
-function writeCifLoop(categories: CIFCategory[], builder: StringBuilder) {
+function writeCifLoop(categories: Category[], builder: StringBuilder) {
     const first = categories[0];
     const fields = first.fields;
     const fieldCount = fields.length;

+ 0 - 0
src/mol-io/writer/cif/filter.ts


+ 39 - 16
src/mol-model/structure/export/mmcif.ts

@@ -6,7 +6,7 @@
  */
 
 import { CifWriter } from 'mol-io/writer/cif'
-// import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'
+import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'
 import { Structure, Element } from '../structure'
 import { Model } from '../model'
 import P from '../query/properties'
@@ -19,40 +19,52 @@ interface Context {
 import CifField = CifWriter.Field
 import CifCategory = CifWriter.Category
 
+import E = CifWriter.Encodings
+
 const atom_site_fields: CifField<Element.Location>[] = [
     CifField.str('group_PDB', P.residue.group_PDB),
-    CifField.int('id', P.atom.id),
+    CifField.int('id', P.atom.id, { encoder: E.deltaRLE }),
     CifField.str('type_symbol', P.atom.type_symbol as any),
     CifField.str('label_atom_id', P.atom.label_atom_id),
     CifField.str('label_alt_id', P.atom.label_alt_id),
 
     CifField.str('label_comp_id', P.residue.label_comp_id),
-    CifField.int('label_seq_id', P.residue.label_seq_id),
+    CifField.int('label_seq_id', P.residue.label_seq_id, { encoder: E.deltaRLE }),
     CifField.str('pdbx_PDB_ins_code', P.residue.pdbx_PDB_ins_code),
 
     CifField.str('label_asym_id', P.chain.label_asym_id),
     CifField.str('label_entity_id', P.chain.label_entity_id),
 
-    CifField.float('Cartn_x', P.atom.x),
-    CifField.float('Cartn_y', P.atom.y),
-    CifField.float('Cartn_z', P.atom.z),
-    CifField.float('occupancy', P.atom.occupancy),
-    CifField.int('pdbx_formal_charge', P.atom.pdbx_formal_charge),
+    CifField.float('Cartn_x', P.atom.x, { digitCount: 3, encoder: E.fixedPoint3 }),
+    CifField.float('Cartn_y', P.atom.y, { digitCount: 3, encoder: E.fixedPoint3 }),
+    CifField.float('Cartn_z', P.atom.z, { digitCount: 3, encoder: E.fixedPoint3 }),
+    CifField.float('occupancy', P.atom.occupancy, { digitCount: 2, encoder: E.fixedPoint2 }),
+    CifField.int('pdbx_formal_charge', P.atom.pdbx_formal_charge, { encoder: E.deltaRLE }),
 
     CifField.str('auth_atom_id', P.atom.auth_atom_id),
     CifField.str('auth_comp_id', P.residue.auth_comp_id),
-    CifField.int('auth_seq_id', P.residue.auth_seq_id),
+    CifField.int('auth_seq_id', P.residue.auth_seq_id, { encoder: E.deltaRLE }),
     CifField.str('auth_asym_id', P.chain.auth_asym_id),
 
-    CifField.int('pdbx_PDB_model_num', P.unit.model_num),
+    CifField.int('pdbx_PDB_model_num', P.unit.model_num, { encoder: E.deltaRLE }),
     CifField.str('operator_name', P.unit.operator_name)
 ];
 
-function entityProvider({ model }: Context): CifCategory {
-    return CifCategory.ofTable('entity', model.entities.data);
+function copy_mmCif_cat(name: keyof mmCIF_Schema) {
+    return ({ model }: Context) => {
+        if (model.sourceData.kind !== 'mmCIF') return CifCategory.Empty;
+        const table = model.sourceData.data[name];
+        if (!table || !table._rowCount) return CifCategory.Empty;
+        return CifCategory.ofTable(name, table);
+    };
+}
+
+function _entity({ model, structure }: Context): CifCategory {
+    const keys = Structure.getEntityKeys(structure);
+    return CifCategory.ofTable('entity', model.entities.data, keys);
 }
 
-function atomSiteProvider({ structure }: Context): CifCategory {
+function _atom_site({ structure }: Context): CifCategory {
     return {
         data: void 0,
         name: 'atom_site',
@@ -62,15 +74,26 @@ function atomSiteProvider({ structure }: Context): CifCategory {
     }
 }
 
+const Categories = [
+    copy_mmCif_cat('entry'),
+    copy_mmCif_cat('exptl'),
+    copy_mmCif_cat('cell'),
+    copy_mmCif_cat('symmetry'),
+    _entity,
+    _atom_site
+]
+
 /** Doesn't start a data block */
 export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure) {
     const models = Structure.getModels(structure);
     if (models.length !== 1) throw 'Can\'t export stucture composed from multiple models.';
     const model = models[0];
 
-    const ctx: Context = { structure, model };
-    encoder.writeCategory(entityProvider, [ctx]);
-    encoder.writeCategory(atomSiteProvider, [ctx]);
+    const ctx: Context[] = [{ structure, model }];
+
+    for (const cat of Categories) {
+        encoder.writeCategory(cat, ctx);
+    }
 }
 
 function to_mmCIF(name: string, structure: Structure, asBinary = false) {

+ 27 - 2
src/mol-model/structure/structure/structure.ts

@@ -4,16 +4,17 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { IntMap, SortedArray, Iterator } from 'mol-data/int'
+import { IntMap, SortedArray, Iterator, Segmentation } from 'mol-data/int'
 import { UniqueArray } from 'mol-data/generic'
 import { SymmetryOperator } from 'mol-math/geometry/symmetry-operator'
 import { Model } from '../model'
-import { sort, arraySwap, hash1 } from 'mol-data/util';
+import { sort, arraySwap, hash1, sortArray } from 'mol-data/util';
 import Element from './element'
 import Unit from './unit'
 import { StructureLookup3D } from './util/lookup3d';
 import { CoarseElements } from '../model/properties/coarse';
 import { StructureSubsetBuilder } from './util/subset-builder';
+import { Queries } from '../query';
 
 class Structure {
     readonly unitMap: IntMap<Unit>;
@@ -221,6 +222,30 @@ namespace Structure {
             }
         }
     }
+
+    export function getEntityKeys(structure: Structure) {
+        const { units } = structure;
+        const l = Element.Location();
+        const keys = UniqueArray.create<number, number>();
+
+        for (const unit of units) {
+            const prop = unit.kind === Unit.Kind.Atomic ? Queries.props.entity.key : Queries.props.coarse.entityKey;
+
+            l.unit = unit;
+            const elements = unit.elements;
+
+            const chainsIt = Segmentation.transientSegments(unit.model.atomicHierarchy.chainSegments, elements);
+            while (chainsIt.hasNext) {
+                const chainSegment = chainsIt.move();
+                l.element = elements[chainSegment.start];
+                const key = prop(l);
+                UniqueArray.add(keys, key, key);
+            }
+        }
+
+        sortArray(keys.array);
+        return keys.array;
+    }
 }
 
 export default Structure