Bläddra i källkod

wip added mol-model-parsers module

David Sehnal 6 år sedan
förälder
incheckning
d0b5f448c5

+ 1 - 0
README.md

@@ -17,6 +17,7 @@ The core of Mol* currently consists of these modules:
 - `mol-math` Math related (loosely) algorithms and data structures.
 - `mol-io` Parsing library. Each format is parsed into an interface that corresponds to the data stored by it. Support for common coordinate, experimental/map, and annotation data formats.
 - `mol-model` Data structures and algorithms (such as querying) for representing molecular data (including coordinate, experimental/map, and annotation data).
+- `mol-model-parsers` Data format parsers for `mol-model`.
 - `mol-model-props` Common "custom properties".
 - `mol-script` A scriting language for creating representations/scenes and querying (includes the [MolQL query language](https://molql.github.io)).
 - `mol-geo` Creating (molecular) geometries.

+ 1 - 0
package.json

@@ -54,6 +54,7 @@
       "mol-math($|/.*)": "<rootDir>/src/mol-math$1",
       "mol-model($|/.*)": "<rootDir>/src/mol-model$1",
       "mol-model-props($|/.*)": "<rootDir>/src/mol-model-props$1",
+      "mol-model-parsers($|/.*)": "<rootDir>/src/mol-model-parsers$1",
       "mol-plugin($|/.*)": "<rootDir>/src/mol-plugin$1",
       "mol-ql($|/.*)": "<rootDir>/src/mol-ql$1",
       "mol-repr($|/.*)": "<rootDir>/src/mol-repr$1",

+ 4 - 5
src/mol-io/reader/_spec/cif.spec.ts

@@ -6,17 +6,16 @@
  */
 
 import * as Data from '../cif/data-model'
-import TextField from '../cif/text/field'
 import * as Schema from '../cif/schema'
 import { Column } from 'mol-data/db'
 
 const columnData = `123abc d,e,f '4 5 6'`;
 // 123abc d,e,f '4 5 6'
 
-const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3);
-const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3);
-const strListField = TextField({ data: columnData, indices: [7, 12], count: 1 }, 1);
-const intListField = TextField({ data: columnData, indices: [14, 19], count: 1 }, 1);
+const intField = Data.CifField.ofTokens({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 });
+const strField = Data.CifField.ofTokens({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 });
+const strListField = Data.CifField.ofTokens({ data: columnData, indices: [7, 12], count: 1 });
+const intListField = Data.CifField.ofTokens({ data: columnData, indices: [14, 19], count: 1 });
 
 const testBlock = Data.CifBlock(['test'], {
     test: Data.CifCategory('test', 3, ['int', 'str', 'strList', 'intList'], {

+ 2 - 2
src/mol-io/reader/csv/field.ts

@@ -4,6 +4,6 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import Field from '../cif/text/field'
+import { CifField } from '../cif/data-model';
 
-export default Field
+export default CifField.ofTokens

+ 1 - 1
src/mol-io/reader/csv/parser.ts

@@ -254,7 +254,7 @@ async function handleRecords(state: State): Promise<Data.CsvTable> {
 
     const columns: Data.CsvColumns = Object.create(null);
     for (let i = 0; i < state.columnCount; ++i) {
-        columns[state.columnNames[i]] = Field(state.tokens[i], state.recordCount);
+        columns[state.columnNames[i]] = Field(state.tokens[i]);
     }
 
     return Data.CsvTable(state.recordCount, state.columnNames, columns)

+ 18 - 0
src/mol-model-parsers/structure/format.ts

@@ -0,0 +1,18 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
+import CIF, { CifFrame } from 'mol-io/reader/cif';
+
+type ModelFormat =
+    | ModelFormat.mmCIF
+
+namespace ModelFormat {
+    export interface mmCIF { kind: 'mmCIF', data: mmCIF_Database, frame: CifFrame }
+    export function mmCIF(frame: CifFrame, data?: mmCIF_Database): mmCIF { return { kind: 'mmCIF', data: data || CIF.schema.mmCIF(frame), frame }; }
+}
+
+export { ModelFormat }

+ 311 - 0
src/mol-model-parsers/structure/mmcif.ts

@@ -0,0 +1,311 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from 'mol-data/db';
+import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry';
+import { Tensor, Vec3 } from 'mol-math/linear-algebra';
+import { Task, RuntimeContext } from 'mol-task';
+import UUID from 'mol-util/uuid';
+import { Model } from 'mol-model/structure/model/model';
+import { Entities } from 'mol-model/structure/model/properties/common';
+import { CustomProperties } from 'mol-model/structure/model/properties/custom';
+import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry';
+import { createAssemblies } from './mmcif/assembly';
+import { getAtomicHierarchyAndConformation } from './mmcif/atomic';
+import { ComponentBond } from './mmcif/bonds';
+import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './mmcif/ihm';
+import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
+import { getSequence } from './mmcif/sequence';
+import { sortAtomSite } from './mmcif/sort';
+import { StructConn } from './mmcif/bonds/struct_conn';
+import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component';
+import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types';
+import { ModelFormat } from './format';
+import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
+
+import mmCIF_Format = ModelFormat.mmCIF
+
+type AtomSite = mmCIF_Database['atom_site']
+
+function getSymmetry(format: mmCIF_Format): ModelSymmetry {
+    const assemblies = createAssemblies(format);
+    const spacegroup = getSpacegroup(format);
+    const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
+    return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) };
+}
+
+function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
+    const { atom_sites } = format.data;
+    if (atom_sites._rowCount === 0) return false;
+    // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
+    return false;
+}
+
+function getSpacegroup(format: mmCIF_Format): Spacegroup {
+    const { symmetry, cell } = format.data;
+    if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
+    const groupName = symmetry['space_group_name_H-M'].value(0);
+    const spaceCell = SpacegroupCell.create(groupName,
+        Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
+        Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
+
+    return Spacegroup.create(spaceCell);
+}
+
+function getNcsOperators(format: mmCIF_Format) {
+    const { struct_ncs_oper } = format.data;
+    if (struct_ncs_oper._rowCount === 0) return void 0;
+    const { id, matrix, vector } = struct_ncs_oper;
+
+    const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space;
+
+    const opers: SymmetryOperator[] = [];
+    for (let i = 0; i < struct_ncs_oper._rowCount; i++) {
+        const m = Tensor.toMat3(matrixSpace, matrix.value(i));
+        const v = Tensor.toVec3(vectorSpace, vector.value(i));
+        if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue;
+        opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v);
+    }
+    return opers;
+}
+function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] {
+    const data = format.data.pdbx_struct_mod_residue;
+    const parentId = new Map<string, string>();
+    const details = new Map<string, string>();
+    const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
+    const parent_id = data.parent_comp_id, details_data = data.details;
+
+    for (let i = 0; i < data._rowCount; i++) {
+        const id = comp_id.value(i);
+        parentId.set(id, parent_id.value(i));
+        details.set(id, details_data.value(i));
+    }
+
+    return { parentId, details };
+}
+
+function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
+    const map = new Map<string, ChemicalComponent>();
+    const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp
+    for (let i = 0, il = id.rowCount; i < il; ++i) {
+        const _id = id.value(i)
+        const _type = type.value(i)
+        const cc: ChemicalComponent = {
+            id: _id,
+            type: ComponentType[_type],
+            moleculeType: getMoleculeType(_type, _id),
+            name: name.value(i),
+            synonyms: pdbx_synonyms.value(i),
+            formula: formula.value(i),
+            formulaWeight: formula_weight.value(i),
+        }
+        map.set(_id, cc)
+    }
+    return map
+}
+
+function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
+    const map = new Map<string, SaccharideComponent>();
+    const { pdbx_chem_comp_identifier } = format.data
+    if (pdbx_chem_comp_identifier._rowCount > 0) {
+        const { comp_id, type, identifier } = pdbx_chem_comp_identifier
+        for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) {
+            if (type.value(i) === 'SNFG CARB SYMBOL') {
+                const snfgName = identifier.value(i)
+                const saccharideComp = SaccharidesSnfgMap.get(snfgName)
+                if (saccharideComp) {
+                    map.set(comp_id.value(i), saccharideComp)
+                } else {
+                    console.warn(`Unknown SNFG name '${snfgName}'`)
+                }
+            }
+        }
+    } else if (format.data.chem_comp._rowCount > 0) {
+        const { id, type  } = format.data.chem_comp
+        for (let i = 0, il = id.rowCount; i < il; ++i) {
+            const _id = id.value(i)
+            const _type = type.value(i)
+            if (SaccharideCompIdMap.has(_id)) {
+                map.set(_id, SaccharideCompIdMap.get(_id)!)
+            } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) {
+                map.set(_id, UnknownSaccharideComponent)
+            }
+        }
+    } else {
+        // TODO check if present in format.data.atom_site.label_comp_id
+        SaccharideCompIdMap.forEach((v, k) => map.set(k, v))
+    }
+    return map
+}
+
+export interface FormatData {
+    modifiedResidues: Model['properties']['modifiedResidues']
+    chemicalComponentMap: Model['properties']['chemicalComponentMap']
+    saccharideComponentMap: Model['properties']['saccharideComponentMap']
+}
+
+function getFormatData(format: mmCIF_Format): FormatData {
+    return {
+        modifiedResidues: getModifiedResidueNameMap(format),
+        chemicalComponentMap: getChemicalComponentMap(format),
+        saccharideComponentMap: getSaccharideComponentMap(format)
+    }
+}
+
+function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous);
+    if (previous && atomic.sameAsPrevious) {
+        return {
+            ...previous,
+            id: UUID.create22(),
+            modelNum: atom_site.pdbx_PDB_model_num.value(0),
+            atomicConformation: atomic.conformation,
+            _dynamicPropertyData: Object.create(null)
+        };
+    }
+
+    const coarse = EmptyIHMCoarse;
+    const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
+        ? format.data.entry.id.value(0)
+        : format.data._name;
+
+    return {
+        id: UUID.create22(),
+        label,
+        sourceData: format,
+        modelNum: atom_site.pdbx_PDB_model_num.value(0),
+        entities,
+        symmetry: getSymmetry(format),
+        sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        atomicHierarchy: atomic.hierarchy,
+        atomicConformation: atomic.conformation,
+        coarseHierarchy: coarse.hierarchy,
+        coarseConformation: coarse.conformation,
+        properties: {
+            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
+            ...formatData
+        },
+        customProperties: new CustomProperties(),
+        _staticPropertyData: Object.create(null),
+        _dynamicPropertyData: Object.create(null)
+    };
+}
+
+function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData);
+    const coarse = getIHMCoarse(data, formatData);
+
+    return {
+        id: UUID.create22(),
+        label: data.model_name,
+        sourceData: format,
+        modelNum: data.model_id,
+        entities: data.entities,
+        symmetry: getSymmetry(format),
+        sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        atomicHierarchy: atomic.hierarchy,
+        atomicConformation: atomic.conformation,
+        coarseHierarchy: coarse.hierarchy,
+        coarseConformation: coarse.conformation,
+        properties: {
+            secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
+            ...formatData
+        },
+        customProperties: new CustomProperties(),
+        _staticPropertyData: Object.create(null),
+        _dynamicPropertyData: Object.create(null)
+    };
+}
+
+function attachProps(model: Model) {
+    ComponentBond.attachFromMmCif(model);
+    StructConn.attachFromMmCif(model);
+}
+
+function findModelEnd(num: Column<number>, startIndex: number) {
+    const rowCount = num.rowCount;
+    if (!num.isDefined) return rowCount;
+    let endIndex = startIndex + 1;
+    while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
+    return endIndex;
+}
+
+async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
+    const atomCount = format.data.atom_site._rowCount;
+    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
+
+    const models: Model[] = [];
+    let modelStart = 0;
+    while (modelStart < atomCount) {
+        const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
+        const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
+        const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
+        attachProps(model);
+        models.push(model);
+        modelStart = modelEnd;
+    }
+    return models;
+}
+
+function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
+    const ret = new Map<number, T>()
+    const rowCount = table._rowCount;
+    let modelStart = 0;
+    while (modelStart < rowCount) {
+        const modelEnd = findModelEnd(col, modelStart);
+        const id = col.value(modelStart);
+        const window = Table.window(table, table._schema, modelStart, modelEnd) as T;
+        ret.set(id, window);
+        modelStart = modelEnd;
+    }
+    return ret;
+}
+
+async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
+    const { ihm_model_list } = format.data;
+    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
+
+    if (!format.data.atom_site.ihm_model_id.isDefined) {
+        throw new Error('expected _atom_site.ihm_model_id to be defined')
+    }
+
+    // TODO: will IHM require sorting or will we trust it?
+    const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id);
+    const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id);
+    const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id);
+
+    const models: Model[] = [];
+
+    const { model_id, model_name } = ihm_model_list;
+    for (let i = 0; i < ihm_model_list._rowCount; i++) {
+        const id = model_id.value(i);
+        const data: IHMData = {
+            model_id: id,
+            model_name: model_name.value(i),
+            entities: entities,
+            atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0),
+            ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
+            ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
+        };
+        const model = createModelIHM(format, data, formatData);
+        attachProps(model);
+        models.push(model);
+    }
+
+    return models;
+}
+
+function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
+    const formatData = getFormatData(format)
+    return Task.create('Create mmCIF Model', async ctx => {
+        const isIHM = format.data.ihm_model_list._rowCount > 0;
+        return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData);
+    });
+}
+
+export default buildModels;

+ 150 - 0
src/mol-model-parsers/structure/mmcif/assembly.ts

@@ -0,0 +1,150 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Mat4, Tensor } from 'mol-math/linear-algebra'
+import { SymmetryOperator } from 'mol-math/geometry/symmetry-operator'
+import { Assembly, OperatorGroup, OperatorGroups } from 'mol-model/structure/model/properties/symmetry'
+import { Queries as Q } from 'mol-model/structure'
+import { StructureProperties } from 'mol-model/structure';
+import { ModelFormat } from '../format';
+import mmCIF_Format = ModelFormat.mmCIF
+
+export function createAssemblies(format: mmCIF_Format): ReadonlyArray<Assembly> {
+    const { pdbx_struct_assembly } = format.data;
+    if (!pdbx_struct_assembly._rowCount) return [];
+
+    const matrices = getMatrices(format);
+    const assemblies: Assembly[] = [];
+    for (let i = 0; i < pdbx_struct_assembly._rowCount; i++) {
+        assemblies[assemblies.length] = createAssembly(format, i, matrices);
+    }
+    return assemblies;
+}
+
+type Matrices = Map<string, Mat4>
+type Generator = { assemblyId: string, expression: string, asymIds: string[] }
+
+function createAssembly(format: mmCIF_Format, index: number, matrices: Matrices): Assembly {
+    const { pdbx_struct_assembly, pdbx_struct_assembly_gen } = format.data;
+
+    const id = pdbx_struct_assembly.id.value(index);
+    const details = pdbx_struct_assembly.details.value(index);
+    const generators: Generator[] = [];
+
+    const { assembly_id, oper_expression, asym_id_list } = pdbx_struct_assembly_gen;
+
+    for (let i = 0, _i = pdbx_struct_assembly_gen._rowCount; i < _i; i++) {
+        if (assembly_id.value(i) !== id) continue;
+        generators[generators.length] = {
+            assemblyId: id,
+            expression: oper_expression.value(i),
+            asymIds: asym_id_list.value(i)
+        };
+    }
+
+    return Assembly.create(id, details, operatorGroupsProvider(generators, matrices));
+}
+
+function operatorGroupsProvider(generators: Generator[], matrices: Matrices): () => OperatorGroups {
+    return () => {
+        const groups: OperatorGroup[] = [];
+
+        let operatorOffset = 0;
+        for (let i = 0; i < generators.length; i++) {
+            const gen = generators[i];
+            const operatorList = parseOperatorList(gen.expression);
+            const operatorNames = expandOperators(operatorList);
+            const operators = getAssemblyOperators(matrices, operatorNames, operatorOffset, gen.assemblyId);
+            const selector = Q.generators.atoms({ chainTest: Q.pred.and(
+                Q.pred.eq(ctx => StructureProperties.unit.operator_name(ctx.element), SymmetryOperator.DefaultName),
+                Q.pred.inSet(ctx => StructureProperties.chain.label_asym_id(ctx.element), gen.asymIds)
+            )});
+            groups[groups.length] = { selector, operators };
+            operatorOffset += operators.length;
+        }
+
+        return groups;
+    }
+}
+
+function getMatrices({ data }: mmCIF_Format): Matrices {
+    const { pdbx_struct_oper_list } = data;
+    const { id, matrix, vector, _schema } = pdbx_struct_oper_list;
+    const matrices = new Map<string, Mat4>();
+
+    for (let i = 0, _i = pdbx_struct_oper_list._rowCount; i < _i; i++) {
+        const m = Tensor.toMat4(_schema.matrix.space, matrix.value(i));
+        const t = Tensor.toVec3(_schema.vector.space, vector.value(i));
+        Mat4.setTranslation(m, t);
+        Mat4.setValue(m, 3, 3, 1);
+        matrices.set(id.value(i), m);
+    }
+
+    return matrices;
+}
+
+function expandOperators(operatorList: string[][]) {
+    const ops: string[][] = [];
+    const currentOp: string[] = [];
+    for (let i = 0; i < operatorList.length; i++) currentOp[i] = '';
+    expandOperators1(operatorList, ops, operatorList.length - 1, currentOp);
+    return ops;
+}
+
+function expandOperators1(operatorNames: string[][], list: string[][], i: number, current: string[]) {
+    if (i < 0) {
+        list[list.length] = current.slice(0);
+        return;
+    }
+
+    let ops = operatorNames[i], len = ops.length;
+    for (let j = 0; j < len; j++) {
+        current[i] = ops[j];
+        expandOperators1(operatorNames, list, i - 1, current);
+    }
+}
+
+function getAssemblyOperators(matrices: Matrices, operatorNames: string[][], startIndex: number, assemblyId: string) {
+    const operators: SymmetryOperator[] = [];
+
+    let index = startIndex;
+    for (let op of operatorNames) {
+        let m = Mat4.identity();
+        for (let i = 0; i < op.length; i++) {
+            Mat4.mul(m, m, matrices.get(op[i])!);
+        }
+        index++
+        operators[operators.length] = SymmetryOperator.create(`A-${index}`, m, { id: assemblyId, operList: op });
+    }
+
+    return operators;
+}
+
+function parseOperatorList(value: string): string[][] {
+    // '(X0)(1-5)' becomes [['X0'], ['1', '2', '3', '4', '5']]
+    // kudos to Glen van Ginkel.
+
+    const oeRegex = /\(?([^\(\)]+)\)?]*/g, groups: string[] = [], ret: string[][] = [];
+
+    let g: any;
+    while (g = oeRegex.exec(value)) groups[groups.length] = g[1];
+
+    groups.forEach(g => {
+        const group: string[] = [];
+        g.split(',').forEach(e => {
+            const dashIndex = e.indexOf('-');
+            if (dashIndex > 0) {
+                const from = parseInt(e.substring(0, dashIndex)), to = parseInt(e.substr(dashIndex + 1));
+                for (let i = from; i <= to; i++) group[group.length] = i.toString();
+            } else {
+                group[group.length] = e.trim();
+            }
+        });
+        ret[ret.length] = group;
+    });
+
+    return ret;
+}

+ 107 - 0
src/mol-model-parsers/structure/mmcif/atomic.ts

@@ -0,0 +1,107 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column, Table } from 'mol-data/db';
+import { Interval, Segmentation } from 'mol-data/int';
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
+import UUID from 'mol-util/uuid';
+import { ElementIndex } from 'mol-model/structure';
+import { Model } from 'mol-model/structure/model/model';
+import { AtomicConformation, AtomicData, AtomicHierarchy, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from 'mol-model/structure/model/properties/atomic';
+import { getAtomicIndex } from 'mol-model/structure/model/properties/utils/atomic-index';
+import { ElementSymbol } from 'mol-model/structure/model/types';
+import { Entities } from 'mol-model/structure/model/properties/common';
+import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges';
+import { FormatData } from '../mmcif';
+import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived';
+import { ModelFormat } from '../format';
+import mmCIF_Format = ModelFormat.mmCIF
+
+
+type AtomSite = mmCIF_Database['atom_site']
+
+function findHierarchyOffsets(atom_site: AtomSite) {
+    if (atom_site._rowCount === 0) return { residues: [], chains: [] };
+
+    const start = 0, end = atom_site._rowCount;
+    const residues = [start as ElementIndex], chains = [start as ElementIndex];
+
+    const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = atom_site;
+
+    for (let i = start + 1 as ElementIndex; i < end; i++) {
+        const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !label_asym_id.areValuesEqual(i - 1, i);
+        const newResidue = newChain
+            || !label_seq_id.areValuesEqual(i - 1, i)
+            || !auth_seq_id.areValuesEqual(i - 1, i)
+            || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i)
+            || !label_comp_id.areValuesEqual(i - 1, i);
+
+        if (newResidue) residues[residues.length] = i as ElementIndex;
+        if (newChain) chains[chains.length] = i as ElementIndex;
+    }
+    return { residues, chains };
+}
+
+function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
+    const atoms = Table.ofColumns(AtomsSchema, {
+        type_symbol: Column.ofArray({ array: Column.mapToArray(atom_site.type_symbol, ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }),
+        label_atom_id: atom_site.label_atom_id,
+        auth_atom_id: atom_site.auth_atom_id,
+        label_alt_id: atom_site.label_alt_id,
+        pdbx_formal_charge: atom_site.pdbx_formal_charge
+    });
+    const residues = Table.view(atom_site, ResiduesSchema, offsets.residues);
+    // Optimize the numeric columns
+    Table.columnToArray(residues, 'label_seq_id', Int32Array);
+    Table.columnToArray(residues, 'auth_seq_id', Int32Array);
+    const chains = Table.view(atom_site, ChainsSchema, offsets.chains);
+    return { atoms, residues, chains };
+}
+
+function getConformation(atom_site: AtomSite): AtomicConformation {
+    return {
+        id: UUID.create22(),
+        atomId: atom_site.id,
+        occupancy: atom_site.occupancy,
+        B_iso_or_equiv: atom_site.B_iso_or_equiv,
+        x: atom_site.Cartn_x.toArray({ array: Float32Array }),
+        y: atom_site.Cartn_y.toArray({ array: Float32Array }),
+        z: atom_site.Cartn_z.toArray({ array: Float32Array }),
+    }
+}
+
+function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
+    // TODO need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300
+    return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>)
+        && Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>)
+        && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
+}
+
+export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) {
+    const hierarchyOffsets = findHierarchyOffsets(atom_site);
+    const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets);
+
+    if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) {
+        return {
+            sameAsPrevious: true,
+            hierarchy: previous.atomicHierarchy,
+            conformation: getConformation(atom_site)
+        };
+    }
+
+    const conformation = getConformation(atom_site)
+
+    const hierarchySegments: AtomicSegments = {
+        residueAtomSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, Interval.ofBounds(0, atom_site._rowCount)),
+        chainAtomSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, Interval.ofBounds(0, atom_site._rowCount)),
+    }
+
+    const index = getAtomicIndex(hierarchyData, entities, hierarchySegments);
+    const derived = getAtomicDerivedData(hierarchyData, index, formatData.chemicalComponentMap);
+    const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, formatData.chemicalComponentMap);
+    const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchySegments, ...hierarchyRanges, index, derived };
+    return { sameAsPrevious: false, hierarchy, conformation };
+}

+ 9 - 0
src/mol-model-parsers/structure/mmcif/bonds.ts

@@ -0,0 +1,9 @@
+/**
+ * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+export * from './bonds/comp'
+export * from './bonds/struct_conn'

+ 164 - 0
src/mol-model-parsers/structure/mmcif/bonds/comp.ts

@@ -0,0 +1,164 @@
+/**
+ * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Model } from 'mol-model/structure/model/model'
+import { LinkType } from 'mol-model/structure/model/types'
+import { ModelPropertyDescriptor } from 'mol-model/structure/model/properties/custom';
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
+import { Structure, Unit, StructureProperties, StructureElement } from 'mol-model/structure';
+import { Segmentation } from 'mol-data/int';
+import { CifWriter } from 'mol-io/writer/cif'
+
+export interface ComponentBond {
+    entries: Map<string, ComponentBond.Entry>
+}
+
+export namespace ComponentBond {
+    export const Descriptor: ModelPropertyDescriptor = {
+        isStatic: true,
+        name: 'chem_comp_bond',
+        cifExport: {
+            prefix: '',
+            categories: [{
+                name: 'chem_comp_bond',
+                instance(ctx) {
+                    const chem_comp_bond = getChemCompBond(ctx.structures[0].model);
+                    if (!chem_comp_bond) return CifWriter.Category.Empty;
+
+                    const comp_names = getUniqueResidueNames(ctx.structures[0]);
+                    const { comp_id, _rowCount } = chem_comp_bond;
+                    const indices: number[] = [];
+                    for (let i = 0; i < _rowCount; i++) {
+                        if (comp_names.has(comp_id.value(i))) indices[indices.length] = i;
+                    }
+
+                    return CifWriter.Category.ofTable(chem_comp_bond, indices)
+                }
+            }]
+        }
+    }
+
+    export function attachFromMmCif(model: Model): boolean {
+        if (model.customProperties.has(Descriptor)) return true;
+        if (model.sourceData.kind !== 'mmCIF') return false;
+        const { chem_comp_bond } = model.sourceData.data;
+        if (chem_comp_bond._rowCount === 0) return false;
+
+        model.customProperties.add(Descriptor);
+        model._staticPropertyData.__ComponentBondData__ = chem_comp_bond;
+        return true;
+    }
+
+    export function attachFromExternalData(model: Model, bonds: ComponentBond, force = false) {
+        if (!force && model.customProperties.has(Descriptor)) return true;
+        if (model._staticPropertyData.__ComponentBondData__) delete model._staticPropertyData.__ComponentBondData__;
+        model.customProperties.add(Descriptor);
+        model._staticPropertyData[PropName] = bonds;
+        return true;
+    }
+
+    export class ComponentBondImpl implements ComponentBond {
+        entries: Map<string, ComponentBond.Entry> = new Map();
+
+        addEntry(id: string) {
+            let e = new Entry(id);
+            this.entries.set(id, e);
+            return e;
+        }
+    }
+
+    export class Entry {
+        map: Map<string, Map<string, { order: number, flags: number }>> = new Map();
+
+        add(a: string, b: string, order: number, flags: number, swap = true) {
+            let e = this.map.get(a);
+            if (e !== void 0) {
+                let f = e.get(b);
+                if (f === void 0) {
+                    e.set(b, { order, flags });
+                }
+            } else {
+                let map = new Map<string, { order: number, flags: number }>();
+                map.set(b, { order, flags });
+                this.map.set(a, map);
+            }
+
+            if (swap) this.add(b, a, order, flags, false);
+        }
+
+        constructor(public id: string) {
+        }
+    }
+
+    export function parseChemCompBond(data: mmCIF_Database['chem_comp_bond']): ComponentBond {
+        const { comp_id, atom_id_1, atom_id_2, value_order, pdbx_aromatic_flag, _rowCount: rowCount } = data;
+
+        const compBond = new ComponentBondImpl();
+        let entry = compBond.addEntry(comp_id.value(0)!);
+        for (let i = 0; i < rowCount; i++) {
+            const id = comp_id.value(i)!;
+            const nameA = atom_id_1.value(i)!;
+            const nameB = atom_id_2.value(i)!;
+            const order = value_order.value(i)!;
+            const aromatic = pdbx_aromatic_flag.value(i) === 'Y';
+
+            if (entry.id !== id) {
+                entry = compBond.addEntry(id);
+            }
+
+            let flags: number = LinkType.Flag.Covalent;
+            let ord = 1;
+            if (aromatic) flags |= LinkType.Flag.Aromatic;
+            switch (order.toLowerCase()) {
+                case 'doub':
+                case 'delo':
+                    ord = 2;
+                    break;
+                case 'trip': ord = 3; break;
+                case 'quad': ord = 4; break;
+            }
+
+            entry.add(nameA, nameB, ord, flags);
+        }
+
+        return compBond;
+    }
+
+    function getChemCompBond(model: Model) {
+        return model._staticPropertyData.__ComponentBondData__ as mmCIF_Database['chem_comp_bond'];
+    }
+
+    export const PropName = '__ComponentBond__';
+    export function get(model: Model): ComponentBond | undefined {
+        if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName];
+        if (!model.customProperties.has(Descriptor)) return void 0;
+
+        const chem_comp_bond = getChemCompBond(model);
+        if (!chem_comp_bond) return void 0;
+
+        const chemComp = parseChemCompBond(chem_comp_bond);
+        model._staticPropertyData[PropName] = chemComp;
+        return chemComp;
+    }
+
+    function getUniqueResidueNames(s: Structure) {
+        const prop = StructureProperties.residue.label_comp_id;
+        const names = new Set<string>();
+        const loc = StructureElement.create();
+        for (const unit of s.units) {
+            if (!Unit.isAtomic(unit)) continue;
+            const residues = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, unit.elements);
+            loc.unit = unit;
+            while (residues.hasNext) {
+                const seg = residues.move();
+                loc.element = unit.elements[seg.start];
+                names.add(prop(loc));
+            }
+        }
+        return names;
+    }
+}

+ 249 - 0
src/mol-model-parsers/structure/mmcif/bonds/struct_conn.ts

@@ -0,0 +1,249 @@
+/**
+ * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Model } from 'mol-model/structure/model/model'
+import { Structure } from 'mol-model/structure'
+import { LinkType } from 'mol-model/structure/model/types'
+import { findEntityIdByAsymId, findAtomIndexByLabelName } from '../util'
+import { Column } from 'mol-data/db'
+import { ModelPropertyDescriptor } from 'mol-model/structure/model/properties/custom';
+import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { SortedArray } from 'mol-data/int';
+import { CifWriter } from 'mol-io/writer/cif'
+import { ElementIndex, ResidueIndex } from 'mol-model/structure/model/indexing';
+
+export interface StructConn {
+    getResidueEntries(residueAIndex: ResidueIndex, residueBIndex: ResidueIndex): ReadonlyArray<StructConn.Entry>,
+    getAtomEntries(atomIndex: ElementIndex): ReadonlyArray<StructConn.Entry>,
+    readonly entries: ReadonlyArray<StructConn.Entry>
+}
+
+export namespace StructConn {
+    export const Descriptor: ModelPropertyDescriptor = {
+        isStatic: true,
+        name: 'struct_conn',
+        cifExport: {
+            prefix: '',
+            categories: [{
+                name: 'struct_conn',
+                instance(ctx) {
+                    const structure = ctx.structures[0], model = structure.model;
+                    const struct_conn = getStructConn(model);
+                    if (!struct_conn) return CifWriter.Category.Empty;
+
+                    const strConn = get(model);
+                    if (!strConn || strConn.entries.length === 0) return CifWriter.Category.Empty;
+
+                    const foundAtoms = new Set<ElementIndex>();
+                    const indices: number[] = [];
+                    for (const entry of strConn.entries) {
+                        const { partners } = entry;
+                        let hasAll = true;
+                        for (let i = 0, _i = partners.length; i < _i; i++) {
+                            const atom = partners[i].atomIndex;
+                            if (foundAtoms.has(atom)) continue;
+                            if (hasAtom(structure, atom)) {
+                                foundAtoms.add(atom);
+                            } else {
+                                hasAll = false;
+                                break;
+                            }
+                        }
+                        if (hasAll) {
+                            indices[indices.length] = entry.rowIndex;
+                        }
+                    }
+
+                    return CifWriter.Category.ofTable(struct_conn, indices);
+                }
+            }]
+        }
+    }
+
+    function hasAtom({ units }: Structure, element: ElementIndex) {
+        for (let i = 0, _i = units.length; i < _i; i++) {
+            if (SortedArray.indexOf(units[i].elements, element) >= 0) return true;
+        }
+        return false;
+    }
+
+    function _resKey(rA: number, rB: number) {
+        if (rA < rB) return `${rA}-${rB}`;
+        return `${rB}-${rA}`;
+    }
+    const _emptyEntry: Entry[] = [];
+
+    class StructConnImpl implements StructConn {
+        private _residuePairIndex: Map<string, StructConn.Entry[]> | undefined = void 0;
+        private _atomIndex: Map<number, StructConn.Entry[]> | undefined = void 0;
+
+        private getResiduePairIndex() {
+            if (this._residuePairIndex) return this._residuePairIndex;
+            this._residuePairIndex = new Map();
+            for (const e of this.entries) {
+                const ps = e.partners;
+                const l = ps.length;
+                for (let i = 0; i < l - 1; i++) {
+                    for (let j = i + i; j < l; j++) {
+                        const key = _resKey(ps[i].residueIndex, ps[j].residueIndex);
+                        if (this._residuePairIndex.has(key)) {
+                            this._residuePairIndex.get(key)!.push(e);
+                        } else {
+                            this._residuePairIndex.set(key, [e]);
+                        }
+                    }
+                }
+            }
+            return this._residuePairIndex;
+        }
+
+        private getAtomIndex() {
+            if (this._atomIndex) return this._atomIndex;
+            this._atomIndex = new Map();
+            for (const e of this.entries) {
+                for (const p of e.partners) {
+                    const key = p.atomIndex;
+                    if (this._atomIndex.has(key)) {
+                        this._atomIndex.get(key)!.push(e);
+                    } else {
+                        this._atomIndex.set(key, [e]);
+                    }
+                }
+            }
+            return this._atomIndex;
+        }
+
+
+        getResidueEntries(residueAIndex: ResidueIndex, residueBIndex: ResidueIndex): ReadonlyArray<StructConn.Entry> {
+            return this.getResiduePairIndex().get(_resKey(residueAIndex, residueBIndex)) || _emptyEntry;
+        }
+
+        getAtomEntries(atomIndex: ElementIndex): ReadonlyArray<StructConn.Entry> {
+            return this.getAtomIndex().get(atomIndex) || _emptyEntry;
+        }
+
+        constructor(public entries: StructConn.Entry[]) {
+        }
+    }
+
+    export interface Entry {
+        rowIndex: number,
+        distance: number,
+        order: number,
+        flags: number,
+        partners: { residueIndex: ResidueIndex, atomIndex: ElementIndex, symmetry: string }[]
+    }
+
+    type StructConnType = typeof mmCIF_Schema.struct_conn.conn_type_id.T
+
+    export function attachFromMmCif(model: Model): boolean {
+        if (model.customProperties.has(Descriptor)) return true;
+        if (model.sourceData.kind !== 'mmCIF') return false;
+        const { struct_conn } = model.sourceData.data;
+        if (struct_conn._rowCount === 0) return false;
+        model.customProperties.add(Descriptor);
+        model._staticPropertyData.__StructConnData__ = struct_conn;
+        return true;
+    }
+
+    function getStructConn(model: Model) {
+        return model._staticPropertyData.__StructConnData__ as mmCIF_Database['struct_conn'];
+    }
+
+    export const PropName = '__StructConn__';
+    export function get(model: Model): StructConn | undefined {
+        if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName];
+        if (!model.customProperties.has(Descriptor)) return void 0;
+
+        const struct_conn = getStructConn(model);
+
+        const { conn_type_id, pdbx_dist_value, pdbx_value_order } = struct_conn;
+        const p1 = {
+            label_asym_id: struct_conn.ptnr1_label_asym_id,
+            label_seq_id: struct_conn.ptnr1_label_seq_id,
+            auth_seq_id: struct_conn.ptnr1_auth_seq_id,
+            label_atom_id: struct_conn.ptnr1_label_atom_id,
+            label_alt_id: struct_conn.pdbx_ptnr1_label_alt_id,
+            ins_code: struct_conn.pdbx_ptnr1_PDB_ins_code,
+            symmetry: struct_conn.ptnr1_symmetry
+        };
+        const p2: typeof p1 = {
+            label_asym_id: struct_conn.ptnr2_label_asym_id,
+            label_seq_id: struct_conn.ptnr2_label_seq_id,
+            auth_seq_id: struct_conn.ptnr2_auth_seq_id,
+            label_atom_id: struct_conn.ptnr2_label_atom_id,
+            label_alt_id: struct_conn.pdbx_ptnr2_label_alt_id,
+            ins_code: struct_conn.pdbx_ptnr2_PDB_ins_code,
+            symmetry: struct_conn.ptnr2_symmetry
+        };
+
+        const _p = (row: number, ps: typeof p1) => {
+            if (ps.label_asym_id.valueKind(row) !== Column.ValueKind.Present) return void 0;
+            const asymId = ps.label_asym_id.value(row);
+            const residueIndex = model.atomicHierarchy.index.findResidue(
+                findEntityIdByAsymId(model, asymId),
+                asymId,
+                ps.auth_seq_id.value(row),
+                ps.ins_code.value(row)
+            );
+            if (residueIndex < 0) return void 0;
+            const atomName = ps.label_atom_id.value(row);
+            // turns out "mismat" records might not have atom name value
+            if (!atomName) return void 0;
+            const atomIndex = findAtomIndexByLabelName(model, residueIndex, atomName, ps.label_alt_id.value(row));
+            if (atomIndex < 0) return void 0;
+            return { residueIndex, atomIndex, symmetry: ps.symmetry.value(row) || '1_555' };
+        }
+
+        const _ps = (row: number) => {
+            const ret = [];
+            let p = _p(row, p1);
+            if (p) ret.push(p);
+            p = _p(row, p2);
+            if (p) ret.push(p);
+            return ret;
+        }
+
+        const entries: StructConn.Entry[] = [];
+        for (let i = 0; i < struct_conn._rowCount; i++) {
+            const partners = _ps(i);
+            if (partners.length < 2) continue;
+
+            const type = conn_type_id.value(i)! as StructConnType;
+            const orderType = (pdbx_value_order.value(i) || '').toLowerCase();
+            let flags = LinkType.Flag.None;
+            let order = 1;
+
+            switch (orderType) {
+                case 'sing': order = 1; break;
+                case 'doub': order = 2; break;
+                case 'trip': order = 3; break;
+                case 'quad': order = 4; break;
+            }
+
+            switch (type) {
+                case 'covale':
+                case 'covale_base':
+                case 'covale_phosphate':
+                case 'covale_sugar':
+                case 'modres':
+                    flags = LinkType.Flag.Covalent;
+                    break;
+                case 'disulf': flags = LinkType.Flag.Covalent | LinkType.Flag.Sulfide; break;
+                case 'hydrog': flags = LinkType.Flag.Hydrogen; break;
+                case 'metalc': flags = LinkType.Flag.MetallicCoordination; break;
+                case 'saltbr': flags = LinkType.Flag.Ionic; break;
+            }
+
+            entries.push({ rowIndex: i, flags, order, distance: pdbx_dist_value.value(i), partners });
+        }
+
+        const ret = new StructConnImpl(entries);
+        model._staticPropertyData[PropName] = ret;
+        return ret;
+    }
+}

+ 102 - 0
src/mol-model-parsers/structure/mmcif/ihm.ts

@@ -0,0 +1,102 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database as mmCIF, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'
+import { CoarseHierarchy, CoarseConformation, CoarseElementData, CoarseSphereConformation, CoarseGaussianConformation } from 'mol-model/structure/model/properties/coarse'
+import { Entities } from 'mol-model/structure/model/properties/common';
+import { Column } from 'mol-data/db';
+import { getCoarseKeys } from 'mol-model/structure/model/properties/utils/coarse-keys';
+import { UUID } from 'mol-util';
+import { Segmentation, Interval } from 'mol-data/int';
+import { Mat3, Tensor } from 'mol-math/linear-algebra';
+import { ElementIndex, ChainIndex } from 'mol-model/structure/model/indexing';
+import { getCoarseRanges } from 'mol-model/structure/model/properties/utils/coarse-ranges';
+import { FormatData } from '../mmcif';
+
+export interface IHMData {
+    model_id: number,
+    model_name: string,
+    entities: Entities,
+    atom_site: mmCIF['atom_site'],
+    ihm_sphere_obj_site: mmCIF['ihm_sphere_obj_site'],
+    ihm_gaussian_obj_site: mmCIF['ihm_gaussian_obj_site']
+}
+
+export const EmptyIHMCoarse = { hierarchy: CoarseHierarchy.Empty, conformation: void 0 as any }
+
+export function getIHMCoarse(data: IHMData, formatData: FormatData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } {
+    const { ihm_sphere_obj_site, ihm_gaussian_obj_site } = data;
+
+    if (ihm_sphere_obj_site._rowCount === 0 && ihm_gaussian_obj_site._rowCount === 0) return EmptyIHMCoarse;
+
+    const sphereData = getData(ihm_sphere_obj_site);
+    const sphereConformation = getSphereConformation(ihm_sphere_obj_site);
+    const sphereKeys = getCoarseKeys(sphereData, data.entities);
+    const sphereRanges = getCoarseRanges(sphereData, formatData.chemicalComponentMap);
+
+    const gaussianData = getData(ihm_gaussian_obj_site);
+    const gaussianConformation = getGaussianConformation(ihm_gaussian_obj_site);
+    const gaussianKeys = getCoarseKeys(gaussianData, data.entities);
+    const gaussianRanges = getCoarseRanges(gaussianData, formatData.chemicalComponentMap);
+
+    return {
+        hierarchy: {
+            isDefined: true,
+            spheres: { ...sphereData, ...sphereKeys, ...sphereRanges },
+            gaussians: { ...gaussianData, ...gaussianKeys, ...gaussianRanges },
+        },
+        conformation: {
+            id: UUID.create22(),
+            spheres: sphereConformation,
+            gaussians: gaussianConformation
+        }
+    };
+}
+
+function getSphereConformation(data: mmCIF['ihm_sphere_obj_site']): CoarseSphereConformation {
+    return {
+        x: data.Cartn_x.toArray({ array: Float32Array }),
+        y: data.Cartn_y.toArray({ array: Float32Array }),
+        z: data.Cartn_z.toArray({ array: Float32Array }),
+        radius: data.object_radius.toArray({ array: Float32Array }),
+        rmsf: data.rmsf.toArray({ array: Float32Array })
+    };
+}
+
+function getGaussianConformation(data: mmCIF['ihm_gaussian_obj_site']): CoarseGaussianConformation {
+    const matrix_space = mmCIF_Schema.ihm_gaussian_obj_site.covariance_matrix.space;
+    const covariance_matrix: Mat3[] = [];
+    const { covariance_matrix: cm } = data;
+
+    for (let i = 0, _i = cm.rowCount; i < _i; i++) {
+        covariance_matrix[i] = Tensor.toMat3(matrix_space, cm.value(i));
+    }
+
+    return {
+        x: data.mean_Cartn_x.toArray({ array: Float32Array }),
+        y: data.mean_Cartn_y.toArray({ array: Float32Array }),
+        z: data.mean_Cartn_z.toArray({ array: Float32Array }),
+        weight: data.weight.toArray({ array: Float32Array }),
+        covariance_matrix
+    };
+}
+
+function getSegments(asym_id: Column<string>, seq_id_begin: Column<number>, seq_id_end: Column<number>) {
+    const chainOffsets = [0 as ElementIndex];
+    for (let i = 1, _i = asym_id.rowCount; i < _i; i++) {
+        const newChain = !asym_id.areValuesEqual(i - 1, i);
+        if (newChain) chainOffsets[chainOffsets.length] = i as ElementIndex;
+    }
+
+    return {
+        chainElementSegments: Segmentation.ofOffsets<ElementIndex, ChainIndex>(chainOffsets, Interval.ofBounds(0, asym_id.rowCount))
+    }
+}
+
+function getData(data: mmCIF['ihm_sphere_obj_site'] | mmCIF['ihm_gaussian_obj_site']): CoarseElementData {
+    const { entity_id, seq_id_begin, seq_id_end, asym_id } = data;
+    return { count: entity_id.rowCount, entity_id, asym_id, seq_id_begin, seq_id_end, ...getSegments(asym_id, seq_id_begin, seq_id_end) };
+}

+ 8 - 0
src/mol-model-parsers/structure/mmcif/pair-restraint.ts

@@ -0,0 +1,8 @@
+/**
+ * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+export * from './pair-restraints/cross-links'
+// export * from './pair-restraints/predicted-contacts'

+ 107 - 0
src/mol-model-parsers/structure/mmcif/pair-restraints/cross-links.ts

@@ -0,0 +1,107 @@
+/**
+ * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Model } from 'mol-model/structure/model/model'
+import { Table } from 'mol-data/db'
+import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { findAtomIndexByLabelName } from '../util';
+import { Unit } from 'mol-model/structure';
+import { ElementIndex } from 'mol-model/structure/model/indexing';
+
+function findAtomIndex(model: Model, entityId: string, asymId: string, seqId: number, atomId: string) {
+    if (!model.atomicHierarchy.atoms.auth_atom_id.isDefined) return -1
+    const residueIndex = model.atomicHierarchy.index.findResidue(entityId, asymId, seqId)
+    if (residueIndex < 0) return -1
+    return findAtomIndexByLabelName(model, residueIndex, atomId, '') as ElementIndex
+}
+
+export interface IHMCrossLinkRestraint {
+    getIndicesByElement: (element: ElementIndex, kind: Unit.Kind) => number[]
+    data: Table<mmCIF_Schema['ihm_cross_link_restraint']>
+}
+
+export namespace IHMCrossLinkRestraint {
+    export const PropName = '__CrossLinkRestraint__';
+    export function fromModel(model: Model): IHMCrossLinkRestraint | undefined {
+        if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName]
+
+        if (model.sourceData.kind !== 'mmCIF') return
+        const { ihm_cross_link_restraint } = model.sourceData.data;
+        if (!ihm_cross_link_restraint._rowCount) return
+
+        const p1 = {
+            entity_id: ihm_cross_link_restraint.entity_id_1,
+            asym_id: ihm_cross_link_restraint.asym_id_1,
+            seq_id: ihm_cross_link_restraint.seq_id_1,
+            atom_id: ihm_cross_link_restraint.atom_id_1,
+        }
+
+        const p2: typeof p1 = {
+            entity_id: ihm_cross_link_restraint.entity_id_2,
+            asym_id: ihm_cross_link_restraint.asym_id_2,
+            seq_id: ihm_cross_link_restraint.seq_id_2,
+            atom_id: ihm_cross_link_restraint.atom_id_2,
+        }
+
+        function _add(map: Map<ElementIndex, number[]>, element: ElementIndex, row: number) {
+            const indices = map.get(element)
+            if (indices) indices.push(row)
+            else map.set(element, [ row ])
+        }
+
+        function add(row: number, ps: typeof p1) {
+            const entityId = ps.entity_id.value(row)
+            const asymId = ps.asym_id.value(row)
+            const seqId = ps.seq_id.value(row)
+
+            if (ihm_cross_link_restraint.model_granularity.value(row) === 'by-atom') {
+                const atomicElement = findAtomIndex(model, entityId, asymId, seqId, ps.atom_id.value(row))
+                if (atomicElement >= 0) _add(atomicElementMap, atomicElement as ElementIndex, row)
+            } else if (model.coarseHierarchy.isDefined) {
+                const sphereElement = model.coarseHierarchy.spheres.findSequenceKey(entityId, asymId, seqId)
+                if (sphereElement >= 0) {
+                    _add(sphereElementMap, sphereElement, row)
+                } else {
+                    const gaussianElement = model.coarseHierarchy.gaussians.findSequenceKey(entityId, asymId, seqId)
+                    if (gaussianElement >= 0) _add(gaussianElementMap, gaussianElement, row)
+                }
+            }
+        }
+
+        function getMapByKind(kind: Unit.Kind) {
+            switch (kind) {
+                case Unit.Kind.Atomic: return atomicElementMap;
+                case Unit.Kind.Spheres: return sphereElementMap;
+                case Unit.Kind.Gaussians: return gaussianElementMap;
+            }
+        }
+
+        /** map from atomic element to cross link indices */
+        const atomicElementMap: Map<ElementIndex, number[]> = new Map()
+        /** map from sphere element to cross link indices */
+        const sphereElementMap: Map<ElementIndex, number[]> = new Map()
+        /** map from gaussian element to cross link indices */
+        const gaussianElementMap: Map<ElementIndex, number[]> = new Map()
+
+        const emptyIndexArray: number[] = [];
+
+        for (let i = 0; i < ihm_cross_link_restraint._rowCount; ++i) {
+            add(i, p1)
+            add(i, p2)
+        }
+
+        const crossLinkRestraint = {
+            getIndicesByElement: (element: ElementIndex, kind: Unit.Kind) => {
+                const map = getMapByKind(kind)
+                const idx = map.get(element)
+                return idx !== undefined ? idx : emptyIndexArray
+            },
+            data: ihm_cross_link_restraint
+        }
+        model._staticPropertyData[PropName] = crossLinkRestraint
+        return crossLinkRestraint
+    }
+}

+ 26 - 0
src/mol-model-parsers/structure/mmcif/pair-restraints/predicted-contacts.ts

@@ -0,0 +1,26 @@
+/**
+ * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+// TODO
+// ihm_predicted_contact_restraint: {
+//     id: int,
+//     entity_id_1: str,
+//     entity_id_2: str,
+//     asym_id_1: str,
+//     asym_id_2: str,
+//     comp_id_1: str,
+//     comp_id_2: str,
+//     seq_id_1: int,
+//     seq_id_2: int,
+//     atom_id_1: str,
+//     atom_id_2: str,
+//     distance_upper_limit: float,
+//     probability: float,
+//     restraint_type: Aliased<'lower bound' | 'upper bound' | 'lower and upper bound'>(str),
+//     model_granularity: Aliased<'by-residue' | 'by-feature' | 'by-atom'>(str),
+//     dataset_list_id: int,
+//     software_id: int,
+// },

+ 175 - 0
src/mol-model-parsers/structure/mmcif/secondary-structure.ts

@@ -0,0 +1,175 @@
+
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database as mmCIF, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'
+import { SecondaryStructureType } from 'mol-model/structure/model/types';
+import { AtomicHierarchy } from 'mol-model/structure/model/properties/atomic';
+import { SecondaryStructure } from 'mol-model/structure/model/properties/seconday-structure';
+import { Column } from 'mol-data/db';
+import { ChainIndex, ResidueIndex } from 'mol-model/structure/model/indexing';
+
+export function getSecondaryStructureMmCif(data: mmCIF_Database, hierarchy: AtomicHierarchy): SecondaryStructure {
+    const map: SecondaryStructureMap = new Map();
+    const elements: SecondaryStructure.Element[] = [{ kind: 'none' }];
+    addHelices(data.struct_conf, map, elements);
+    // must add Helices 1st because of 'key' value assignment.
+    addSheets(data.struct_sheet_range, map, data.struct_conf._rowCount, elements);
+
+    const secStruct: SecondaryStructureData = {
+        type: new Int32Array(hierarchy.residues._rowCount) as any,
+        key: new Int32Array(hierarchy.residues._rowCount) as any,
+        elements
+    };
+
+    if (map.size > 0) assignSecondaryStructureRanges(hierarchy, map, secStruct);
+    return secStruct;
+}
+
+type SecondaryStructureEntry = {
+    startSeqNumber: number,
+    startInsCode: string | null,
+    endSeqNumber: number,
+    endInsCode: string | null,
+    type: SecondaryStructureType,
+    key: number
+}
+type SecondaryStructureMap = Map<string, Map<number, SecondaryStructureEntry>>
+type SecondaryStructureData = { type: SecondaryStructureType[], key: number[], elements: SecondaryStructure.Element[] }
+
+function addHelices(cat: mmCIF['struct_conf'], map: SecondaryStructureMap, elements: SecondaryStructure.Element[]) {
+    if (!cat._rowCount) return;
+
+    const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat;
+    const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat;
+    const { pdbx_PDB_helix_class, conf_type_id, details } = cat;
+
+    for (let i = 0, _i = cat._rowCount; i < _i; i++) {
+        const type = SecondaryStructureType.create(pdbx_PDB_helix_class.valueKind(i) === Column.ValueKind.Present
+            ? SecondaryStructureType.SecondaryStructurePdb[pdbx_PDB_helix_class.value(i)]
+            : conf_type_id.valueKind(i) === Column.ValueKind.Present
+                ? SecondaryStructureType.SecondaryStructureMmcif[conf_type_id.value(i)]
+                : SecondaryStructureType.Flag.NA);
+
+        const element: SecondaryStructure.Helix = {
+            kind: 'helix',
+            flags: type,
+            type_id: conf_type_id.valueKind(i) === Column.ValueKind.Present ? conf_type_id.value(i) : 'HELIX_P',
+            helix_class: pdbx_PDB_helix_class.value(i),
+            details: details.valueKind(i) === Column.ValueKind.Present ? details.value(i) : void 0
+        };
+        const entry: SecondaryStructureEntry = {
+            startSeqNumber: beg_label_seq_id.value(i),
+            startInsCode: pdbx_beg_PDB_ins_code.value(i),
+            endSeqNumber: end_label_seq_id.value(i),
+            endInsCode: pdbx_end_PDB_ins_code.value(i),
+            type,
+            key: elements.length
+        };
+
+
+        elements[elements.length] = element;
+
+        const asymId = beg_label_asym_id.value(i)!;
+        if (map.has(asymId)) {
+            map.get(asymId)!.set(entry.startSeqNumber, entry);
+        } else {
+            map.set(asymId, new Map([[entry.startSeqNumber, entry]]));
+        }
+    }
+}
+
+function addSheets(cat: mmCIF['struct_sheet_range'], map: SecondaryStructureMap, sheetCount: number, elements: SecondaryStructure.Element[]) {
+    if (!cat._rowCount) return;
+
+    const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat;
+    const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat;
+    const { sheet_id } = cat;
+
+    const sheet_id_key = new Map<string, number>();
+    let currentKey = sheetCount + 1;
+
+    for (let i = 0, _i = cat._rowCount; i < _i; i++) {
+        const id = sheet_id.value(i);
+        let key: number;
+        if (sheet_id_key.has(id)) key = sheet_id_key.get(id)!;
+        else {
+            key = currentKey++;
+            sheet_id_key.set(id, key);
+        }
+
+        const type = SecondaryStructureType.create(SecondaryStructureType.Flag.Beta | SecondaryStructureType.Flag.BetaSheet);
+        const element: SecondaryStructure.Sheet = {
+            kind: 'sheet',
+            flags: type,
+            sheet_id: id,
+            symmetry: void 0
+        }
+        const entry: SecondaryStructureEntry = {
+            startSeqNumber: beg_label_seq_id.value(i),
+            startInsCode: pdbx_beg_PDB_ins_code.value(i),
+            endSeqNumber: end_label_seq_id.value(i),
+            endInsCode: pdbx_end_PDB_ins_code.value(i),
+            type,
+            key: elements.length
+        };
+
+        elements[elements.length] = element;
+
+
+        const asymId = beg_label_asym_id.value(i)!;
+        if (map.has(asymId)) {
+            map.get(asymId)!.set(entry.startSeqNumber, entry);
+        } else {
+            map.set(asymId, new Map([[entry.startSeqNumber, entry]]));
+        }
+    }
+
+    return;
+}
+
+function assignSecondaryStructureEntry(hierarchy: AtomicHierarchy, entry: SecondaryStructureEntry, resStart: ResidueIndex, resEnd: ResidueIndex, data: SecondaryStructureData) {
+    const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues;
+    const { endSeqNumber, endInsCode, key, type } = entry;
+
+    let rI = resStart;
+    while (rI < resEnd) {
+        const seqNumber = label_seq_id.value(rI);
+        data.type[rI] = type;
+        data.key[rI] = key;
+
+        if ((seqNumber > endSeqNumber) ||
+            (seqNumber === endSeqNumber && pdbx_PDB_ins_code.value(rI) === endInsCode)) {
+            break;
+        }
+
+        rI++;
+    }
+}
+
+function assignSecondaryStructureRanges(hierarchy: AtomicHierarchy, map: SecondaryStructureMap, data: SecondaryStructureData) {
+    const { count: chainCount } = hierarchy.chainAtomSegments;
+    const { label_asym_id } = hierarchy.chains;
+    const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues;
+
+    for (let cI = 0 as ChainIndex; cI < chainCount; cI++) {
+        const resStart = AtomicHierarchy.chainStartResidueIndex(hierarchy, cI), resEnd = AtomicHierarchy.chainEndResidueIndexExcl(hierarchy, cI);
+        const asymId = label_asym_id.value(cI);
+        if (map.has(asymId)) {
+            const entries = map.get(asymId)!;
+
+            for (let rI = resStart; rI < resEnd; rI++) {
+                const seqNumber = label_seq_id.value(rI);
+                if (entries.has(seqNumber)) {
+                    const entry = entries.get(seqNumber)!;
+                    const insCode = pdbx_PDB_ins_code.value(rI);
+                    if (entry.startInsCode !== insCode) continue;
+                    assignSecondaryStructureEntry(hierarchy, entry, rI, resEnd, data);
+                }
+            }
+        }
+    }
+}

+ 55 - 0
src/mol-model-parsers/structure/mmcif/sequence.ts

@@ -0,0 +1,55 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
+import StructureSequence from 'mol-model/structure/model/properties/sequence'
+import { Column } from 'mol-data/db';
+import { AtomicHierarchy } from 'mol-model/structure/model/properties/atomic';
+import { Entities } from 'mol-model/structure/model/properties/common';
+import { Sequence } from 'mol-model/sequence';
+
+// TODO how to handle microheterogeneity
+//    see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
+//
+// Data items in the ENTITY_POLY_SEQ category specify the sequence
+// of monomers in a polymer. Allowance is made for the possibility
+// of microheterogeneity in a sample by allowing a given sequence
+// number to be correlated with more than one monomer ID. The
+// corresponding ATOM_SITE entries should reflect this
+// heterogeneity.
+
+export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence {
+    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy, modResMap);
+
+    const { entity_id, num, mon_id } = cif.entity_poly_seq;
+
+    const byEntityKey: StructureSequence['byEntityKey'] = {};
+    const sequences: StructureSequence.Entity[] = [];
+    const count = entity_id.rowCount;
+
+    let i = 0;
+    while (i < count) {
+        const start = i;
+        while (i < count - 1 && entity_id.areValuesEqual(i, i + 1)) i++;
+        i++;
+
+        const id = entity_id.value(start);
+        const _compId = Column.window(mon_id, start, i);
+        const _num = Column.window(num, start, i);
+        const entityKey = entities.getEntityIndex(id);
+
+        byEntityKey[entityKey] = {
+            entityId: id,
+            compId: _compId,
+            num: _num,
+            sequence: Sequence.ofResidueNames(_compId, _num, modResMap)
+        };
+
+        sequences.push(byEntityKey[entityKey]);
+    }
+
+    return { byEntityKey, sequences };
+}

+ 43 - 0
src/mol-model-parsers/structure/mmcif/sort.ts

@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
+import { createRangeArray, makeBuckets } from 'mol-data/util';
+import { Column, Table } from 'mol-data/db';
+import { RuntimeContext } from 'mol-task';
+
+function isIdentity(xs: ArrayLike<number>) {
+    for (let i = 0, _i = xs.length; i < _i; i++) {
+        if (xs[i] !== i) return false;
+    }
+    return true;
+}
+
+export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Database['atom_site'], start: number, end: number) {
+    const indices = createRangeArray(start, end - 1);
+
+    const { label_entity_id, label_asym_id, label_seq_id } = atom_site;
+    const entityBuckets = makeBuckets(indices, label_entity_id.value);
+    if (ctx.shouldUpdate) await ctx.update();
+    for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) {
+        const chainBuckets = makeBuckets(indices, label_asym_id.value, { start: entityBuckets[ei], end: entityBuckets[ei + 1] });
+        for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) {
+            const aI = chainBuckets[cI];
+            // are we in HETATM territory?
+            if (label_seq_id.valueKind(aI) !== Column.ValueKind.Present) continue;
+
+            makeBuckets(indices, label_seq_id.value, { sort: true, start: aI, end: chainBuckets[cI + 1] });
+            if (ctx.shouldUpdate) await ctx.update();
+        }
+        if (ctx.shouldUpdate) await ctx.update();
+    }
+
+    if (isIdentity(indices) && indices.length === atom_site._rowCount) {
+        return atom_site;
+    }
+
+    return Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site'];
+}

+ 26 - 0
src/mol-model-parsers/structure/mmcif/util.ts

@@ -0,0 +1,26 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Model } from 'mol-model/structure/model'
+import { ElementIndex } from 'mol-model/structure/model/indexing';
+
+export function findEntityIdByAsymId(model: Model, asymId: string) {
+    if (model.sourceData.kind !== 'mmCIF') return ''
+    const { struct_asym } = model.sourceData.data
+    for (let i = 0, n = struct_asym._rowCount; i < n; ++i) {
+        if (struct_asym.id.value(i) === asymId) return struct_asym.entity_id.value(i)
+    }
+    return ''
+}
+
+export function findAtomIndexByLabelName(model: Model, residueIndex: number, atomName: string, altLoc: string | null): ElementIndex {
+    const { offsets } = model.atomicHierarchy.residueAtomSegments;
+    const { label_atom_id, label_alt_id } = model.atomicHierarchy.atoms;
+    for (let i = offsets[residueIndex], n = offsets[residueIndex + 1]; i < n; ++i) {
+        if (label_atom_id.value(i) === atomName && (!altLoc || label_alt_id.value(i) === altLoc)) return i as ElementIndex;
+    }
+    return -1 as ElementIndex;
+}

+ 0 - 269
src/mol-model/structure/model/formats/pdb.ts

@@ -1,269 +0,0 @@
-/**
- * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author David Sehnal <david.sehnal@gmail.com>
- */
-
-import Format from '../format';
-import { Model } from '../model';
-import { Task } from 'mol-task';
-import { PdbFile } from 'mol-io/reader/pdb/schema';
-import from_mmCIF from './mmcif';
-import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
-import { substringStartsWith } from 'mol-util/string';
-import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
-import { CifField, CifCategory } from 'mol-io/reader/cif';
-import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field';
-
-function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
-    return {
-        name,
-        fieldNames: Object.keys(fields),
-        rowCount,
-        getField(f: string) {
-            return fields[f];
-        }
-    }
-}
-
-function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
-    return {
-        id: CifTextValueField(['1', '2', '3']),
-        type: CifTextValueField(['polymer', 'non-polymer', 'water'])
-    }
-}
-
-function atom_site_template(data: string, count: number) {
-    const str = () => new Array(count) as string[];
-    const ts = () => TokenBuilder.create(data, 2 * count);
-    return {
-        count,
-        group_PDB: ts(),
-        id: str(),
-        auth_atom_id: ts(),
-        label_alt_id: ts(),
-        auth_comp_id: ts(),
-        auth_asym_id: ts(),
-        auth_seq_id: ts(),
-        pdbx_PDB_ins_code: ts(),
-        Cartn_x: ts(),
-        Cartn_y: ts(),
-        Cartn_z: ts(),
-        occupancy: ts(),
-        B_iso_or_equiv: ts(),
-        type_symbol: ts(),
-        pdbx_PDB_model_num: str(),
-        label_entity_id: str()
-    };
-}
-
-function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
-    const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
-    const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
-    const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
-    const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
-
-    return {
-        auth_asym_id,
-        auth_atom_id,
-        auth_comp_id,
-        auth_seq_id,
-        B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
-        Cartn_x: CifTextField(sites.Cartn_x, sites.count),
-        Cartn_y: CifTextField(sites.Cartn_y, sites.count),
-        Cartn_z: CifTextField(sites.Cartn_z, sites.count),
-        group_PDB: CifTextField(sites.group_PDB, sites.count),
-        id: CifTextValueField(sites.id),
-
-        label_alt_id: CifTextField(sites.label_alt_id, sites.count),
-
-        label_asym_id: auth_asym_id,
-        label_atom_id: auth_atom_id,
-        label_comp_id: auth_comp_id,
-        label_seq_id: auth_seq_id,
-        label_entity_id: CifTextValueField(sites.label_entity_id),
-
-        occupancy: CifTextField(sites.occupancy, sites.count),
-        type_symbol: CifTextField(sites.type_symbol, sites.count),
-
-        pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
-        pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
-    };
-}
-
-function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
-    const { data: str } = data;
-    let startPos = s;
-    let start = s;
-    const end = e;
-    const length = end - start;
-
-    // TODO: filter invalid atoms
-
-    // COLUMNS        DATA TYPE       CONTENTS
-    // --------------------------------------------------------------------------------
-    // 1 -  6        Record name     "ATOM  "
-    Tokenizer.trim(data, start, start + 6);
-    TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
-
-    // 7 - 11        Integer         Atom serial number.
-    // TODO: support HEX
-    start = startPos + 6;
-    Tokenizer.trim(data, start, start + 5);
-    sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd);
-
-    // 13 - 16        Atom            Atom name.
-    start = startPos + 12;
-    Tokenizer.trim(data, start, start + 4);
-    TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
-
-    // 17             Character       Alternate location indicator.
-    if (str.charCodeAt(startPos + 16) === 32) { // ' '
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    } else {
-        TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
-    }
-
-    // 18 - 20        Residue name    Residue name.
-    start = startPos + 17;
-    Tokenizer.trim(data, start, start + 3);
-    TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
-
-    // 22             Character       Chain identifier.
-    TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
-
-    // 23 - 26        Integer         Residue sequence number.
-    // TODO: support HEX
-    start = startPos + 22;
-    Tokenizer.trim(data, start, start + 4);
-    TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
-
-    // 27             AChar           Code for insertion of residues.
-    if (str.charCodeAt(startPos + 26) === 32) { // ' '
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    } else {
-        TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
-    }
-
-    // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
-    start = startPos + 30;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
-
-    // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
-    start = startPos + 38;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
-
-    // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
-    start = startPos + 46;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
-
-    // 55 - 60        Real(6.2)       Occupancy.
-    start = startPos + 54;
-    Tokenizer.trim(data, start, start + 6);
-    TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
-
-    // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
-    if (length >= 66) {
-        start = startPos + 60;
-        Tokenizer.trim(data, start, start + 6);
-        TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
-    } else {
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    }
-
-    // 73 - 76        LString(4)      Segment identifier, left-justified.
-    // ignored
-
-    // 77 - 78        LString(2)      Element symbol, right-justified.
-    if (length >= 78) {
-        start = startPos + 76;
-        Tokenizer.trim(data, start, start + 2);
-
-        if (data.tokenStart < data.tokenEnd) {
-            TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
-        } else {
-            // "guess" the symbol
-            TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
-        }
-    } else {
-        TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
-    }
-
-    // TODO
-    sites.label_entity_id.push('1');
-    sites.pdbx_PDB_model_num.push(model);
-
-}
-
-type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
-
-async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> {
-    const { lines } = pdb;
-    const { data, indices } = lines;
-    const tokenizer = Tokenizer(data);
-
-    // Count the atoms
-    let atomCount = 0;
-    for (let i = 0, _i = lines.count; i < _i; i++) {
-        const s = indices[2 * i], e = indices[2 * i + 1];
-        switch (data[s]) {
-            case 'A':
-                if (substringStartsWith(data, s, e, 'ATOM  ')) atomCount++;
-                break;
-            case 'H':
-                if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++;
-                break;
-        }
-    }
-
-    const atom_site = atom_site_template(data, atomCount);
-
-    let modelNum = 0, modelStr = '';
-
-    for (let i = 0, _i = lines.count; i < _i; i++) {
-        const s = indices[2 * i], e = indices[2 * i + 1];
-        switch (data[s]) {
-            case 'A':
-                if (!substringStartsWith(data, s, e, 'ATOM  ')) continue;
-                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
-                addAtom(atom_site, modelStr, tokenizer, s, e);
-                break;
-            case 'H':
-                if (!substringStartsWith(data, s, e, 'HETATM')) continue;
-                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
-                addAtom(atom_site, modelStr, tokenizer, s, e);
-                break;
-            case 'M':
-                if (substringStartsWith(data, s, e, 'MODEL ')) {
-                    modelNum++;
-                    modelStr = '' + modelNum;
-                }
-                break;
-
-        }
-    }
-
-    const categories = {
-        entity: toCategory('entity', _entity(), 3),
-        atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
-    }
-
-    return Format.mmCIF({
-        header: pdb.id || 'PDB',
-        categoryNames: Object.keys(categories),
-        categories
-    });
-}
-
-function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> {
-    return Task.create('Create PDB Model', async ctx => {
-        await ctx.update('Converting to mmCIF...');
-        const cif = await pdbToMmCIF(format.data);
-        return from_mmCIF(cif).runInContext(ctx);
-    });
-}
-
-export default buildModels;

+ 1 - 0
tsconfig.json

@@ -23,6 +23,7 @@
             "mol-math": ["./mol-math"],
             "mol-model": ["./mol-model"],
             "mol-model-props": ["./mol-model-props", "./mol-model-props/index.ts"],
+            "mol-model-parsers": ["./mol-model-parsers"],
             "mol-ql": ["./mol-ql"],
             "mol-repr": ["./mol-repr"],
             "mol-script": ["./mol-script"],