浏览代码

basic mmCIF export

David Sehnal 7 年之前
父节点
当前提交
df6cc84e0a

+ 24 - 0
src/mol-base/collections/unique-array.ts

@@ -0,0 +1,24 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+interface UniqueArray<K, T> {
+    keys: Set<K>,
+    array: T[]
+}
+
+namespace UniqueArray {
+    export function create<K, T>(): UniqueArray<K, T> {
+        return { keys: new Set<K>(), array: [] };
+    }
+
+    export function add<K, T>({ keys, array }: UniqueArray<K, T>, key: K, value: T) {
+        if (keys.has(key)) return;
+        keys.add(key);
+        array[array.length] = value;
+    }
+}
+
+export default UniqueArray

+ 114 - 0
src/mol-data/structure/export/mmcif.ts

@@ -0,0 +1,114 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column } from 'mol-base/collections/database'
+import Iterator from 'mol-base/collections/iterator'
+import * as Encoder from 'mol-io/writer/cif/encoder'
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'
+import CIFEncoder from 'mol-io/writer/cif/encoder/text'
+import { Structure, Atom, AtomSet } from '../structure'
+import { Model } from '../model'
+import P from '../query/properties'
+
+interface Context {
+    structure: Structure,
+    model: Model
+}
+
+function str<K, D = any>(name: string, value: (k: K, d: D) => string, valueKind?: (k: K) => Column.ValueKind): Encoder.FieldDefinition<K, any> {
+    return { name, type: Encoder.FieldType.Str, value, valueKind }
+}
+
+function int<K, D = any>(name: string, value: (k: K, d: D) => number, valueKind?: (k: K) => Column.ValueKind): Encoder.FieldDefinition<K, any> {
+    return { name, type: Encoder.FieldType.Int, value, valueKind }
+}
+
+function float<K, D = any>(name: string, value: (k: K, d: D) => number, valueKind?: (k: K) => Column.ValueKind): Encoder.FieldDefinition<K, any> {
+    return { name, type: Encoder.FieldType.Float, value, valueKind }
+}
+
+type Entity =  mmCIF_Database['entity'];
+
+const entity: Encoder.CategoryDefinition<number, Entity> = {
+    name: 'entity',
+    fields: [
+        str<number, Entity>('id', (i, e) => e.id.value(i)),
+        str<number, Entity>('type', (i, e) => e.type.value(i)),
+        str<number, Entity>('src_method', (i, e) => e.src_method.value(i)),
+        str<number, Entity>('pdbx_description', (i, e) => e.pdbx_description.value(i)),
+        int<number, Entity>('formula_weight', (i, e) => e.formula_weight.value(i)),
+        float<number, Entity>('pdbx_number_of_molecules', (i, e) => e.pdbx_number_of_molecules.value(i)),
+        str<number, Entity>('details', (i, e) => e.details.value(i)),
+        str<number, Entity>('pdbx_mutation', (i, e) => e.pdbx_mutation.value(i)),
+        str<number, Entity>('pdbx_fragment', (i, e) => e.pdbx_fragment.value(i)),
+        str<number, Entity>('pdbx_ec', (i, e) => e.pdbx_ec.value(i)),
+    ]
+}
+
+const atom_site: Encoder.CategoryDefinition<Atom.Location> = {
+    name: 'atom_site',
+    fields: [
+        str<Atom.Location>('group_PDB', P.residue.group_PDB),
+        int<Atom.Location>('id', P.atom.id),
+        str<Atom.Location>('type_symbol', P.atom.type_symbol as any),
+        str<Atom.Location>('label_atom_id', P.atom.label_atom_id),
+        str<Atom.Location>('label_alt_id', P.atom.label_alt_id),
+
+        str<Atom.Location>('label_comp_id', P.residue.label_comp_id),
+        int<Atom.Location>('label_seq_id', P.residue.label_seq_id),
+        str<Atom.Location>('pdbx_PDB_ins_code', P.residue.pdbx_PDB_ins_code),
+
+        str<Atom.Location>('label_asym_id', P.chain.label_asym_id),
+        str<Atom.Location>('label_entity_id', P.chain.label_entity_id),
+
+        float<Atom.Location>('Cartn_x', P.atom.x),
+        float<Atom.Location>('Cartn_y', P.atom.y),
+        float<Atom.Location>('Cartn_z', P.atom.z),
+        float<Atom.Location>('occupancy', P.atom.occupancy),
+        str<Atom.Location>('pdbx_formal_charge', P.atom.pdbx_formal_charge),
+
+        str<Atom.Location>('auth_atom_id', P.atom.auth_atom_id),
+        str<Atom.Location>('auth_comp_id', P.residue.auth_comp_id),
+        int<Atom.Location>('auth_seq_id', P.residue.auth_seq_id),
+        str<Atom.Location>('auth_asym_id', P.chain.auth_asym_id),
+
+        str<Atom.Location>('pdbx_operator_name', P.unit.operator_name),
+    ]
+};
+
+function entityProvider({ model }: Context): Encoder.CategoryInstance {
+    return {
+        data: model.hierarchy.entities,
+        definition: entity,
+        keys: () => Iterator.Range(0, model.hierarchy.entities._rowCount - 1),
+        rowCount: model.hierarchy.entities._rowCount
+    }
+}
+
+function atomSiteProvider({ structure }: Context): Encoder.CategoryInstance {
+    return {
+        data: void 0,
+        definition: atom_site,
+        keys: () => Structure.atomLocationsTransient(structure),
+        rowCount: AtomSet.atomCount(structure.atoms)
+    }
+}
+
+function getCifString(name: string, structure: Structure) {
+    const models = Structure.getModels(structure);
+    if (models.length !== 1) throw 'cant export stucture composed from multiple models.';
+    const model = models[0];
+
+    const ctx: Context = { structure, model };
+    const w = new CIFEncoder();
+
+    w.startDataBlock(name);
+    w.writeCategory(entityProvider, [ctx]);
+    w.writeCategory(atomSiteProvider, [ctx]);
+    return w.getData();
+}
+
+export default getCifString

+ 6 - 1
src/mol-data/structure/query/properties.ts

@@ -67,12 +67,17 @@ const entity = {
     pdbx_ec: Atom.property(l => l.unit.hierarchy.entities.pdbx_ec.value(eK(l)))
 }
 
+const unit = {
+    operator_name: Atom.property(l => l.unit.operator.name)
+}
+
 const Properties = {
     constant,
     atom,
     residue,
     chain,
-    entity
+    entity,
+    unit
 }
 
 type Properties = typeof Properties

+ 12 - 1
src/mol-data/structure/structure/structure.ts

@@ -4,12 +4,14 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
+import { OrderedSet, Iterator } from 'mol-base/collections/integer'
+import UniqueArray from 'mol-base/collections/unique-array'
 import { Model, Format } from '../model'
 import Unit from './unit'
 import Operator from './operator'
 import AtomSet from './atom/set'
 import Atom from './atom'
-import { OrderedSet, Iterator } from 'mol-base/collections/integer'
+
 
 interface Structure extends Readonly<{
     units: { readonly [id: number]: Unit },
@@ -67,6 +69,15 @@ namespace Structure {
         return Iterator.map(AtomSet.atoms(s.atoms), a => update(s, l, a));
     }
 
+    export function getModels(s: Structure) {
+        const arr = UniqueArray.create<Model['id'], Model>();
+        for (const k of Object.keys(s.units)) {
+            const u = s.units[+k];
+            UniqueArray.add(arr, u.model.id, u.model);
+        }
+        return arr.array;
+    }
+
     // TODO: "lift" atom set operators?
     // TODO: "diff"
 }

+ 3 - 3
src/perf-tests/cif-encoder.ts

@@ -42,7 +42,7 @@ const category2: Enc.CategoryDefinition<number> = {
     }]
 }
 
-function getInstace(ctx: { cat: Enc.CategoryDefinition<number>, rowCount: number }): Enc.CategoryInstance {
+function getInstance(ctx: { cat: Enc.CategoryDefinition<number>, rowCount: number }): Enc.CategoryInstance {
     return {
         data: void 0,
         definition: ctx.cat,
@@ -54,6 +54,6 @@ function getInstace(ctx: { cat: Enc.CategoryDefinition<number>, rowCount: number
 const w = new CW();
 
 w.startDataBlock('test');
-w.writeCategory(getInstace, [{ rowCount: 5, cat: category1 }]);
-w.writeCategory(getInstace, [{ rowCount: 1, cat: category2 }]);
+w.writeCategory(getInstance, [{ rowCount: 5, cat: category1 }]);
+w.writeCategory(getInstance, [{ rowCount: 1, cat: category2 }]);
 console.log(w.getData());

+ 8 - 2
src/perf-tests/structure.ts

@@ -13,6 +13,8 @@ import CIF from 'mol-io/reader/cif'
 import { Structure, Model, Queries as Q, Atom, AtomSet, Selection } from 'mol-data/structure'
 import { OrderedSet as OrdSet, Segmentation } from 'mol-base/collections/integer'
 
+import toMmCIFString from 'mol-data/structure/export/mmcif'
+
 require('util.promisify').shim();
 const readFileAsync = util.promisify(fs.readFile);
 
@@ -235,10 +237,14 @@ export namespace PropertyAccess {
     // }
 
     export async function run() {
-        //const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
-        const { structures, models } = await readCIF('e:/test/quick/1jj2_full.bcif');
+        const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
+        //const { structures, models } = await readCIF('e:/test/quick/1jj2_full.bcif');
         //const { structures, models } = await readCIF('e:/test/quick/3j3q_updated.cif');
 
+        console.log(toMmCIFString('test', structures[0]));
+
+        return;
+
         console.log('parsed');
 
         console.log(baseline(models[0]));