ソースを参照

wip mol-model-parsers

David Sehnal 6 年 前
コミット
7997dfccee

+ 2 - 2
src/apps/structure-info/model.ts

@@ -14,7 +14,7 @@ import { Model, Structure, StructureElement, Unit, StructureProperties, UnitRing
 import { OrderedSet } from 'mol-data/int';
 import { openCif, downloadCif } from './helpers';
 import { Vec3 } from 'mol-math/linear-algebra';
-import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif';
+import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif';
 import { ModelFormat } from 'mol-model-parsers/structure/format';
 
 
@@ -200,7 +200,7 @@ export function printModelStats(models: ReadonlyArray<Model>) {
 }
 
 export async function getModelsAndStructure(frame: CifFrame) {
-    const models = await parse_mmCIF(ModelFormat.mmCIF(frame)).run();
+    const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(frame)).run();
     const structure = Structure.ofModel(models[0]);
     return { models, structure };
 }

+ 4 - 296
src/mol-model-parsers/structure/mmcif.ts

@@ -5,304 +5,12 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import { Column, Table } from 'mol-data/db';
-import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
-import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry';
-import { Tensor, Vec3 } from 'mol-math/linear-algebra';
-import { Task, RuntimeContext } from 'mol-task';
-import UUID from 'mol-util/uuid';
 import { Model } from 'mol-model/structure/model/model';
-import { Entities } from 'mol-model/structure/model/properties/common';
-import { CustomProperties } from 'mol-model/structure/model/properties/custom';
-import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry';
-import { createAssemblies } from './mmcif/assembly';
-import { getAtomicHierarchyAndConformation } from './mmcif/atomic';
-import { ComponentBond } from './mmcif/bonds';
-import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './mmcif/ihm';
-import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
-import { getSequence } from './mmcif/sequence';
-import { sortAtomSite } from './mmcif/sort';
-import { StructConn } from './mmcif/bonds/struct_conn';
-import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component';
-import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types';
+import { Task } from 'mol-task';
 import { ModelFormat } from './format';
-import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
+import { _parse_mmCif } from './mmcif/parser';
 import mmCIF_Format = ModelFormat.mmCIF
 
-export function parse_mmCIF(format: mmCIF_Format): Task<Model.Trajectory> {
-    const formatData = getFormatData(format)
-    return Task.create('Create mmCIF Model', async ctx => {
-        const isIHM = format.data.ihm_model_list._rowCount > 0;
-        return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData);
-    });
-}
-
-type AtomSite = mmCIF_Database['atom_site']
-
-function getSymmetry(format: mmCIF_Format): ModelSymmetry {
-    const assemblies = createAssemblies(format);
-    const spacegroup = getSpacegroup(format);
-    const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
-    return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) };
-}
-
-function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
-    const { atom_sites } = format.data;
-    if (atom_sites._rowCount === 0) return false;
-    // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
-    return false;
-}
-
-function getSpacegroup(format: mmCIF_Format): Spacegroup {
-    const { symmetry, cell } = format.data;
-    if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
-    const groupName = symmetry['space_group_name_H-M'].value(0);
-    const spaceCell = SpacegroupCell.create(groupName,
-        Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
-        Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
-
-    return Spacegroup.create(spaceCell);
-}
-
-function getNcsOperators(format: mmCIF_Format) {
-    const { struct_ncs_oper } = format.data;
-    if (struct_ncs_oper._rowCount === 0) return void 0;
-    const { id, matrix, vector } = struct_ncs_oper;
-
-    const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space;
-
-    const opers: SymmetryOperator[] = [];
-    for (let i = 0; i < struct_ncs_oper._rowCount; i++) {
-        const m = Tensor.toMat3(matrixSpace, matrix.value(i));
-        const v = Tensor.toVec3(vectorSpace, vector.value(i));
-        if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue;
-        opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v);
-    }
-    return opers;
-}
-function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] {
-    const data = format.data.pdbx_struct_mod_residue;
-    const parentId = new Map<string, string>();
-    const details = new Map<string, string>();
-    const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
-    const parent_id = data.parent_comp_id, details_data = data.details;
-
-    for (let i = 0; i < data._rowCount; i++) {
-        const id = comp_id.value(i);
-        parentId.set(id, parent_id.value(i));
-        details.set(id, details_data.value(i));
-    }
-
-    return { parentId, details };
-}
-
-function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
-    const map = new Map<string, ChemicalComponent>();
-    const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp
-    for (let i = 0, il = id.rowCount; i < il; ++i) {
-        const _id = id.value(i)
-        const _type = type.value(i)
-        const cc: ChemicalComponent = {
-            id: _id,
-            type: ComponentType[_type],
-            moleculeType: getMoleculeType(_type, _id),
-            name: name.value(i),
-            synonyms: pdbx_synonyms.value(i),
-            formula: formula.value(i),
-            formulaWeight: formula_weight.value(i),
-        }
-        map.set(_id, cc)
-    }
-    return map
-}
-
-function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
-    const map = new Map<string, SaccharideComponent>();
-    const { pdbx_chem_comp_identifier } = format.data
-    if (pdbx_chem_comp_identifier._rowCount > 0) {
-        const { comp_id, type, identifier } = pdbx_chem_comp_identifier
-        for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) {
-            if (type.value(i) === 'SNFG CARB SYMBOL') {
-                const snfgName = identifier.value(i)
-                const saccharideComp = SaccharidesSnfgMap.get(snfgName)
-                if (saccharideComp) {
-                    map.set(comp_id.value(i), saccharideComp)
-                } else {
-                    console.warn(`Unknown SNFG name '${snfgName}'`)
-                }
-            }
-        }
-    } else if (format.data.chem_comp._rowCount > 0) {
-        const { id, type  } = format.data.chem_comp
-        for (let i = 0, il = id.rowCount; i < il; ++i) {
-            const _id = id.value(i)
-            const _type = type.value(i)
-            if (SaccharideCompIdMap.has(_id)) {
-                map.set(_id, SaccharideCompIdMap.get(_id)!)
-            } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) {
-                map.set(_id, UnknownSaccharideComponent)
-            }
-        }
-    } else {
-        // TODO check if present in format.data.atom_site.label_comp_id
-        SaccharideCompIdMap.forEach((v, k) => map.set(k, v))
-    }
-    return map
-}
-
-export interface FormatData {
-    modifiedResidues: Model['properties']['modifiedResidues']
-    chemicalComponentMap: Model['properties']['chemicalComponentMap']
-    saccharideComponentMap: Model['properties']['saccharideComponentMap']
-}
-
-function getFormatData(format: mmCIF_Format): FormatData {
-    return {
-        modifiedResidues: getModifiedResidueNameMap(format),
-        chemicalComponentMap: getChemicalComponentMap(format),
-        saccharideComponentMap: getSaccharideComponentMap(format)
-    }
-}
-
-function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous);
-    if (previous && atomic.sameAsPrevious) {
-        return {
-            ...previous,
-            id: UUID.create22(),
-            modelNum: atom_site.pdbx_PDB_model_num.value(0),
-            atomicConformation: atomic.conformation,
-            _dynamicPropertyData: Object.create(null)
-        };
-    }
-
-    const coarse = EmptyIHMCoarse;
-    const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
-        ? format.data.entry.id.value(0)
-        : format.data._name;
-
-    return {
-        id: UUID.create22(),
-        label,
-        sourceData: format,
-        modelNum: atom_site.pdbx_PDB_model_num.value(0),
-        entities,
-        symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
-        atomicHierarchy: atomic.hierarchy,
-        atomicConformation: atomic.conformation,
-        coarseHierarchy: coarse.hierarchy,
-        coarseConformation: coarse.conformation,
-        properties: {
-            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
-            ...formatData
-        },
-        customProperties: new CustomProperties(),
-        _staticPropertyData: Object.create(null),
-        _dynamicPropertyData: Object.create(null)
-    };
-}
-
-function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData);
-    const coarse = getIHMCoarse(data, formatData);
-
-    return {
-        id: UUID.create22(),
-        label: data.model_name,
-        sourceData: format,
-        modelNum: data.model_id,
-        entities: data.entities,
-        symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
-        atomicHierarchy: atomic.hierarchy,
-        atomicConformation: atomic.conformation,
-        coarseHierarchy: coarse.hierarchy,
-        coarseConformation: coarse.conformation,
-        properties: {
-            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
-            ...formatData
-        },
-        customProperties: new CustomProperties(),
-        _staticPropertyData: Object.create(null),
-        _dynamicPropertyData: Object.create(null)
-    };
-}
-
-function attachProps(model: Model) {
-    ComponentBond.attachFromMmCif(model);
-    StructConn.attachFromMmCif(model);
-}
-
-function findModelEnd(num: Column<number>, startIndex: number) {
-    const rowCount = num.rowCount;
-    if (!num.isDefined) return rowCount;
-    let endIndex = startIndex + 1;
-    while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
-    return endIndex;
-}
-
-async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
-    const atomCount = format.data.atom_site._rowCount;
-    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
-
-    const models: Model[] = [];
-    let modelStart = 0;
-    while (modelStart < atomCount) {
-        const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
-        const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
-        const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
-        attachProps(model);
-        models.push(model);
-        modelStart = modelEnd;
-    }
-    return models;
-}
-
-function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
-    const ret = new Map<number, T>()
-    const rowCount = table._rowCount;
-    let modelStart = 0;
-    while (modelStart < rowCount) {
-        const modelEnd = findModelEnd(col, modelStart);
-        const id = col.value(modelStart);
-        const window = Table.window(table, table._schema, modelStart, modelEnd) as T;
-        ret.set(id, window);
-        modelStart = modelEnd;
-    }
-    return ret;
-}
-
-async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
-    const { ihm_model_list } = format.data;
-    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
-
-    if (!format.data.atom_site.ihm_model_id.isDefined) {
-        throw new Error('expected _atom_site.ihm_model_id to be defined')
-    }
-
-    // TODO: will IHM require sorting or will we trust it?
-    const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id);
-    const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id);
-    const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id);
-
-    const models: Model[] = [];
-
-    const { model_id, model_name } = ihm_model_list;
-    for (let i = 0; i < ihm_model_list._rowCount; i++) {
-        const id = model_id.value(i);
-        const data: IHMData = {
-            model_id: id,
-            model_name: model_name.value(i),
-            entities: entities,
-            atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0),
-            ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
-            ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
-        };
-        const model = createModelIHM(format, data, formatData);
-        attachProps(model);
-        models.push(model);
-    }
-
-    return models;
+export function trajecotryFromMmCIF(format: mmCIF_Format): Task<Model.Trajectory> {
+    return Task.create('Create mmCIF Model', ctx => _parse_mmCif(format, ctx));
 }

+ 1 - 1
src/mol-model-parsers/structure/mmcif/atomic.ts

@@ -15,10 +15,10 @@ import { getAtomicIndex } from 'mol-model/structure/model/properties/utils/atomi
 import { ElementSymbol } from 'mol-model/structure/model/types';
 import { Entities } from 'mol-model/structure/model/properties/common';
 import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges';
-import { FormatData } from '../mmcif';
 import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived';
 import { ModelFormat } from '../format';
 import mmCIF_Format = ModelFormat.mmCIF
+import { FormatData } from './parser';
 
 
 type AtomSite = mmCIF_Database['atom_site']

+ 1 - 1
src/mol-model-parsers/structure/mmcif/ihm.ts

@@ -14,7 +14,7 @@ import { Segmentation, Interval } from 'mol-data/int';
 import { Mat3, Tensor } from 'mol-math/linear-algebra';
 import { ElementIndex, ChainIndex } from 'mol-model/structure/model/indexing';
 import { getCoarseRanges } from 'mol-model/structure/model/properties/utils/coarse-ranges';
-import { FormatData } from '../mmcif';
+import { FormatData } from './parser';
 
 export interface IHMData {
     model_id: number,

+ 306 - 0
src/mol-model-parsers/structure/mmcif/parser.ts

@@ -0,0 +1,306 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from 'mol-data/db';
+import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry';
+import { Tensor, Vec3 } from 'mol-math/linear-algebra';
+import { RuntimeContext } from 'mol-task';
+import UUID from 'mol-util/uuid';
+import { Model } from 'mol-model/structure/model/model';
+import { Entities } from 'mol-model/structure/model/properties/common';
+import { CustomProperties } from 'mol-model/structure/model/properties/custom';
+import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry';
+import { createAssemblies } from './assembly';
+import { getAtomicHierarchyAndConformation } from './atomic';
+import { ComponentBond } from './bonds';
+import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './ihm';
+import { getSecondaryStructureMmCif } from './secondary-structure';
+import { getSequence } from './sequence';
+import { sortAtomSite } from './sort';
+import { StructConn } from './bonds/struct_conn';
+import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component';
+import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types';
+import { ModelFormat } from '../format';
+import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
+import mmCIF_Format = ModelFormat.mmCIF
+
+export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) {
+    const formatData = getFormatData(format)
+    const isIHM = format.data.ihm_model_list._rowCount > 0;
+    return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData);
+}
+
+type AtomSite = mmCIF_Database['atom_site']
+
+function getSymmetry(format: mmCIF_Format): ModelSymmetry {
+    const assemblies = createAssemblies(format);
+    const spacegroup = getSpacegroup(format);
+    const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
+    return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) };
+}
+
+function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
+    const { atom_sites } = format.data;
+    if (atom_sites._rowCount === 0) return false;
+    // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
+    return false;
+}
+
+function getSpacegroup(format: mmCIF_Format): Spacegroup {
+    const { symmetry, cell } = format.data;
+    if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
+    const groupName = symmetry['space_group_name_H-M'].value(0);
+    const spaceCell = SpacegroupCell.create(groupName,
+        Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
+        Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
+
+    return Spacegroup.create(spaceCell);
+}
+
+function getNcsOperators(format: mmCIF_Format) {
+    const { struct_ncs_oper } = format.data;
+    if (struct_ncs_oper._rowCount === 0) return void 0;
+    const { id, matrix, vector } = struct_ncs_oper;
+
+    const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space;
+
+    const opers: SymmetryOperator[] = [];
+    for (let i = 0; i < struct_ncs_oper._rowCount; i++) {
+        const m = Tensor.toMat3(matrixSpace, matrix.value(i));
+        const v = Tensor.toVec3(vectorSpace, vector.value(i));
+        if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue;
+        opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v);
+    }
+    return opers;
+}
+function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] {
+    const data = format.data.pdbx_struct_mod_residue;
+    const parentId = new Map<string, string>();
+    const details = new Map<string, string>();
+    const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
+    const parent_id = data.parent_comp_id, details_data = data.details;
+
+    for (let i = 0; i < data._rowCount; i++) {
+        const id = comp_id.value(i);
+        parentId.set(id, parent_id.value(i));
+        details.set(id, details_data.value(i));
+    }
+
+    return { parentId, details };
+}
+
+function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
+    const map = new Map<string, ChemicalComponent>();
+    const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp
+    for (let i = 0, il = id.rowCount; i < il; ++i) {
+        const _id = id.value(i)
+        const _type = type.value(i)
+        const cc: ChemicalComponent = {
+            id: _id,
+            type: ComponentType[_type],
+            moleculeType: getMoleculeType(_type, _id),
+            name: name.value(i),
+            synonyms: pdbx_synonyms.value(i),
+            formula: formula.value(i),
+            formulaWeight: formula_weight.value(i),
+        }
+        map.set(_id, cc)
+    }
+    return map
+}
+
+function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
+    const map = new Map<string, SaccharideComponent>();
+    const { pdbx_chem_comp_identifier } = format.data
+    if (pdbx_chem_comp_identifier._rowCount > 0) {
+        const { comp_id, type, identifier } = pdbx_chem_comp_identifier
+        for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) {
+            if (type.value(i) === 'SNFG CARB SYMBOL') {
+                const snfgName = identifier.value(i)
+                const saccharideComp = SaccharidesSnfgMap.get(snfgName)
+                if (saccharideComp) {
+                    map.set(comp_id.value(i), saccharideComp)
+                } else {
+                    console.warn(`Unknown SNFG name '${snfgName}'`)
+                }
+            }
+        }
+    } else if (format.data.chem_comp._rowCount > 0) {
+        const { id, type  } = format.data.chem_comp
+        for (let i = 0, il = id.rowCount; i < il; ++i) {
+            const _id = id.value(i)
+            const _type = type.value(i)
+            if (SaccharideCompIdMap.has(_id)) {
+                map.set(_id, SaccharideCompIdMap.get(_id)!)
+            } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) {
+                map.set(_id, UnknownSaccharideComponent)
+            }
+        }
+    } else {
+        // TODO check if present in format.data.atom_site.label_comp_id
+        SaccharideCompIdMap.forEach((v, k) => map.set(k, v))
+    }
+    return map
+}
+
+export interface FormatData {
+    modifiedResidues: Model['properties']['modifiedResidues']
+    chemicalComponentMap: Model['properties']['chemicalComponentMap']
+    saccharideComponentMap: Model['properties']['saccharideComponentMap']
+}
+
+function getFormatData(format: mmCIF_Format): FormatData {
+    return {
+        modifiedResidues: getModifiedResidueNameMap(format),
+        chemicalComponentMap: getChemicalComponentMap(format),
+        saccharideComponentMap: getSaccharideComponentMap(format)
+    }
+}
+
+function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous);
+    if (previous && atomic.sameAsPrevious) {
+        return {
+            ...previous,
+            id: UUID.create22(),
+            modelNum: atom_site.pdbx_PDB_model_num.value(0),
+            atomicConformation: atomic.conformation,
+            _dynamicPropertyData: Object.create(null)
+        };
+    }
+
+    const coarse = EmptyIHMCoarse;
+    const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
+        ? format.data.entry.id.value(0)
+        : format.data._name;
+
+    return {
+        id: UUID.create22(),
+        label,
+        sourceData: format,
+        modelNum: atom_site.pdbx_PDB_model_num.value(0),
+        entities,
+        symmetry: getSymmetry(format),
+        sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        atomicHierarchy: atomic.hierarchy,
+        atomicConformation: atomic.conformation,
+        coarseHierarchy: coarse.hierarchy,
+        coarseConformation: coarse.conformation,
+        properties: {
+            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
+            ...formatData
+        },
+        customProperties: new CustomProperties(),
+        _staticPropertyData: Object.create(null),
+        _dynamicPropertyData: Object.create(null)
+    };
+}
+
+function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData);
+    const coarse = getIHMCoarse(data, formatData);
+
+    return {
+        id: UUID.create22(),
+        label: data.model_name,
+        sourceData: format,
+        modelNum: data.model_id,
+        entities: data.entities,
+        symmetry: getSymmetry(format),
+        sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        atomicHierarchy: atomic.hierarchy,
+        atomicConformation: atomic.conformation,
+        coarseHierarchy: coarse.hierarchy,
+        coarseConformation: coarse.conformation,
+        properties: {
+            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
+            ...formatData
+        },
+        customProperties: new CustomProperties(),
+        _staticPropertyData: Object.create(null),
+        _dynamicPropertyData: Object.create(null)
+    };
+}
+
+function attachProps(model: Model) {
+    ComponentBond.attachFromMmCif(model);
+    StructConn.attachFromMmCif(model);
+}
+
+function findModelEnd(num: Column<number>, startIndex: number) {
+    const rowCount = num.rowCount;
+    if (!num.isDefined) return rowCount;
+    let endIndex = startIndex + 1;
+    while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
+    return endIndex;
+}
+
+async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
+    const atomCount = format.data.atom_site._rowCount;
+    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
+
+    const models: Model[] = [];
+    let modelStart = 0;
+    while (modelStart < atomCount) {
+        const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
+        const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
+        const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
+        attachProps(model);
+        models.push(model);
+        modelStart = modelEnd;
+    }
+    return models;
+}
+
+function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
+    const ret = new Map<number, T>()
+    const rowCount = table._rowCount;
+    let modelStart = 0;
+    while (modelStart < rowCount) {
+        const modelEnd = findModelEnd(col, modelStart);
+        const id = col.value(modelStart);
+        const window = Table.window(table, table._schema, modelStart, modelEnd) as T;
+        ret.set(id, window);
+        modelStart = modelEnd;
+    }
+    return ret;
+}
+
+async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
+    const { ihm_model_list } = format.data;
+    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
+
+    if (!format.data.atom_site.ihm_model_id.isDefined) {
+        throw new Error('expected _atom_site.ihm_model_id to be defined')
+    }
+
+    // TODO: will IHM require sorting or will we trust it?
+    const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id);
+    const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id);
+    const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id);
+
+    const models: Model[] = [];
+
+    const { model_id, model_name } = ihm_model_list;
+    for (let i = 0; i < ihm_model_list._rowCount; i++) {
+        const id = model_id.value(i);
+        const data: IHMData = {
+            model_id: id,
+            model_name: model_name.value(i),
+            entities: entities,
+            atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0),
+            ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
+            ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
+        };
+        const model = createModelIHM(format, data, formatData);
+        attachProps(model);
+        models.push(model);
+    }
+
+    return models;
+}

+ 21 - 0
src/mol-model-parsers/structure/pdb.ts

@@ -0,0 +1,21 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { PdbFile } from 'mol-io/reader/pdb/schema';
+import { pdbToMmCif } from './pdb/to-cif';
+import { Model } from 'mol-model/structure/model';
+import { Task } from 'mol-task';
+import { ModelFormat } from './format';
+import { _parse_mmCif } from './mmcif/parser';
+
+export function trajectoryFromPDB(pdb: PdbFile): Task<Model.Trajectory> {
+    return Task.create('Parse PDB', async ctx => {
+        await ctx.update('Converting to mmCIF');
+        const cif = await pdbToMmCif(pdb);
+        const format = ModelFormat.mmCIF(cif);
+        return _parse_mmCif(format, ctx);
+    })
+}

+ 8 - 21
src/mol-io/reader/pdb/to-cif.ts → src/mol-model-parsers/structure/pdb/to-cif.ts

@@ -4,13 +4,11 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { CifField, CifCategory } from '../cif';
-import { mmCIF_Schema } from '../cif/schema/mmcif';
-import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
-import { PdbFile } from './schema';
-import { CifFile } from '../cif/data-model';
 import { substringStartsWith } from 'mol-util/string';
-import { Task } from 'mol-task';
+import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif';
+import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
+import { PdbFile } from 'mol-io/reader/pdb/schema';
 
 function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
     return {
@@ -208,7 +206,7 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num
     sites.index++;
 }
 
-async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> {
+export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
     const { lines } = pdb;
     const { data, indices } = lines;
     const tokenizer = Tokenizer(data);
@@ -260,19 +258,8 @@ async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> {
     }
 
     return {
-        name: pdb.id,
-        blocks: [{
-            saveFrames: [],
-            header: pdb.id || 'PDB',
-            categoryNames: Object.keys(categories),
-            categories
-        }]
+        header: pdb.id || 'PDB',
+        categoryNames: Object.keys(categories),
+        categories
     };
-}
-
-export function convertPDBtoMmCif(pdb: PdbFile): Task<CifFile> {
-    return Task.create('Convert PDB to mmCIF', async ctx => {
-        await ctx.update('Converting to mmCIF...');
-        return pdbToMmCIF(pdb);
-    });
 }

+ 1 - 1
src/mol-plugin/state/actions/basic.ts

@@ -94,7 +94,7 @@ export const OpenStructure = StateAction.build({
 function createModelTree(b: StateTreeBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, format: 'pdb' | 'cif' = 'cif') {
     const parsed = format === 'cif'
         ? b.apply(StateTransforms.Data.ParseCif).apply(StateTransforms.Model.TrajectoryFromMmCif)
-        : b.apply(StateTransforms.Data.ConvertPDBtoMmCif).apply(StateTransforms.Model.TrajectoryFromMmCif);
+        : b.apply(StateTransforms.Model.TrajectoryFromPDB);
 
     return parsed.apply(StateTransforms.Model.ModelFromTrajectory, { modelIndex: 0 });
 }

+ 0 - 20
src/mol-plugin/state/transforms/data.ts

@@ -15,8 +15,6 @@ import { Transformer } from 'mol-state';
 import { readFromFile } from 'mol-util/data-source';
 import * as CCP4 from 'mol-io/reader/ccp4/parser'
 import * as DSN6 from 'mol-io/reader/dsn6/parser'
-import { parsePDB } from 'mol-io/reader/pdb/parser';
-import { convertPDBtoMmCif } from 'mol-io/reader/pdb/to-cif';
 
 export { Download }
 type Download = typeof Download
@@ -97,24 +95,6 @@ const ParseCif = PluginStateTransform.BuiltIn({
     }
 });
 
-export { ConvertPDBtoMmCif }
-type ConvertPDBtoMmCif = typeof ConvertPDBtoMmCif
-const ConvertPDBtoMmCif = PluginStateTransform.BuiltIn({
-    name: 'convert-pdb-to-mmcif',
-    display: { name: 'Convert PDB string to mmCIF' },
-    from: [SO.Data.String],
-    to: SO.Format.Cif
-})({
-    apply({ a }) {
-        return Task.create('Parse CIF', async ctx => {
-            const parsed = await parsePDB(a.data).runInContext(ctx);
-            if (parsed.isError) throw new Error(parsed.message);
-            const cif = await convertPDBtoMmCif(parsed.result).runInContext(ctx);
-            return new SO.Format.Cif(cif);
-        });
-    }
-});
-
 export { ParseCcp4 }
 type ParseCcp4 = typeof ParseCcp4
 const ParseCcp4 = PluginStateTransform.BuiltIn({

+ 25 - 2
src/mol-plugin/state/transforms/model.ts

@@ -19,8 +19,10 @@ import { stringToWords } from 'mol-util/string';
 import { volumeFromCcp4 } from 'mol-model/volume/formats/ccp4';
 import { Vec3 } from 'mol-math/linear-algebra';
 import { volumeFromDsn6 } from 'mol-model/volume/formats/dsn6';
-import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif';
+import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif';
 import { ModelFormat } from 'mol-model-parsers/structure/format';
+import { parsePDB } from 'mol-io/reader/pdb/parser';
+import { trajectoryFromPDB } from 'mol-model-parsers/structure/pdb';
 
 export { TrajectoryFromMmCif }
 type TrajectoryFromMmCif = typeof TrajectoryFromMmCif
@@ -47,7 +49,7 @@ const TrajectoryFromMmCif = PluginStateTransform.BuiltIn({
             const header = params.blockHeader || a.data.blocks[0].header;
             const block = a.data.blocks.find(b => b.header === header);
             if (!block) throw new Error(`Data block '${[header]}' not found.`);
-            const models = await parse_mmCIF(ModelFormat.mmCIF(block)).runInContext(ctx);
+            const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(block)).runInContext(ctx);
             if (models.length === 0) throw new Error('No models found.');
             const props = { label: models[0].label, description: `${models.length} model${models.length === 1 ? '' : 's'}` };
             return new SO.Molecule.Trajectory(models, props);
@@ -55,6 +57,27 @@ const TrajectoryFromMmCif = PluginStateTransform.BuiltIn({
     }
 });
 
+
+export { TrajectoryFromPDB }
+type TrajectoryFromPDB = typeof TrajectoryFromPDB
+const TrajectoryFromPDB = PluginStateTransform.BuiltIn({
+    name: 'trajectory-from-pdb',
+    display: { name: 'Parse PDB string and create trajectory' },
+    from: [SO.Data.String],
+    to: SO.Molecule.Trajectory
+})({
+    apply({ a }) {
+        return Task.create('Parse PDB', async ctx => {
+            const parsed = await parsePDB(a.data).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+            const models = await trajectoryFromPDB(parsed.result).runInContext(ctx);
+            const props = { label: models[0].label, description: `${models.length} model${models.length === 1 ? '' : 's'}` };
+            return new SO.Molecule.Trajectory(models, props);
+        });
+    }
+});
+
+
 export { ModelFromTrajectory }
 const plus1 = (v: number) => v + 1, minus1 = (v: number) => v - 1;
 type ModelFromTrajectory = typeof ModelFromTrajectory

+ 2 - 2
src/perf-tests/lookup3d.ts

@@ -8,7 +8,7 @@ import { GridLookup3D } from 'mol-math/geometry';
 // import { sortArray } from 'mol-data/util';
 import { OrderedSet } from 'mol-data/int';
 import { ModelFormat } from 'mol-model-parsers/structure/format';
-import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif';
+import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif';
 
 require('util.promisify').shim();
 const readFileAsync = util.promisify(fs.readFile);
@@ -34,7 +34,7 @@ export async function readCIF(path: string) {
     }
 
     const mmcif = ModelFormat.mmCIF(parsed.result.blocks[0]);
-    const models = await parse_mmCIF(mmcif).run();
+    const models = await trajecotryFromMmCIF(mmcif).run();
     const structures = models.map(Structure.ofModel);
 
     return { mmcif: mmcif.data, models, structures };

+ 2 - 2
src/perf-tests/structure.ts

@@ -17,7 +17,7 @@ import { Structure, Model, Queries as Q, StructureElement, StructureSelection, S
 import to_mmCIF from 'mol-model/structure/export/mmcif'
 import { Vec3 } from 'mol-math/linear-algebra';
 import { ModelFormat } from 'mol-model-parsers/structure/format';
-import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif';
+import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif';
 // import { printUnits } from 'apps/structure-info/model';
 // import { EquivalenceClasses } from 'mol-data/util';
 
@@ -76,7 +76,7 @@ export async function readCIF(path: string) {
 
     console.timeEnd('schema')
     console.time('buildModels')
-    const models = await parse_mmCIF(mmcif).run();
+    const models = await trajecotryFromMmCIF(mmcif).run();
     console.timeEnd('buildModels')
     const structures = models.map(Structure.ofModel);
 

+ 2 - 2
src/servers/model/server/structure-wrapper.ts

@@ -15,7 +15,7 @@ import * as zlib from 'zlib'
 import { Job } from './jobs';
 import { ConsoleLogger } from 'mol-util/console-logger';
 import { ModelPropertiesProvider } from '../property-provider';
-import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif';
+import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif';
 import { ModelFormat } from 'mol-model-parsers/structure/format';
 
 require('util.promisify').shim();
@@ -110,7 +110,7 @@ export async function readStructureWrapper(key: string, sourceId: string | '_loc
     const frame = (await parseCif(data)).blocks[0];
     perf.end('parse');
     perf.start('createModel');
-    const models = await parse_mmCIF(ModelFormat.mmCIF(frame)).run();
+    const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(frame)).run();
     perf.end('createModel');
 
     const modelMap = new Map<number, Model>();