Browse Source

basic support for models from cif-core files

Alexander Rose 5 years ago
parent
commit
e90ccfdd20

+ 3 - 1
data/cif-field-names/cif-core-field-names.csv

@@ -59,5 +59,7 @@ atom_site_aniso.U_12
 geom_bond.atom_site_label_1
 geom_bond.atom_site_label_2
 geom_bond.distance
+geom_bond.site_symmetry_1
 geom_bond.site_symmetry_2
-geom_bond.publ_flag
+geom_bond.publ_flag
+geom_bond.valence

+ 27 - 0
src/mol-io/reader/cif/schema/cif-core.ts

@@ -280,6 +280,29 @@ export const CifCore_Schema = {
          * publication or should be placed in a table of significant angles.
          */
         publ_flag: str,
+        /**
+         * The set of data items which specify the symmetry operation codes
+         * which must be applied to the atom sites involved in the geometry angle.
+         *
+         * The symmetry code of each atom site as the symmetry-equivalent position
+         * number 'n' and the cell translation number 'pqr'. These numbers are
+         * combined to form the code 'n pqr' or n_pqr.
+         *
+         * The character string n_pqr is composed as follows:
+         *
+         * n refers to the symmetry operation that is applied to the
+         * coordinates stored in _atom_site.fract_xyz. It must match a
+         * number given in _symmetry_equiv.pos_site_id.
+         *
+         * p, q and r refer to the translations that are subsequently
+         * applied to the symmetry transformed coordinates to generate
+         * the atom used in calculating the angle. These translations
+         * (x,y,z) are related to (p,q,r) by the relations
+         * p = 5 + x
+         * q = 5 + y
+         * r = 5 + z
+         */
+        site_symmetry_1: str,
         /**
          * The set of data items which specify the symmetry operation codes
          * which must be applied to the atom sites involved in the geometry angle.
@@ -303,6 +326,10 @@ export const CifCore_Schema = {
          * r = 5 + z
          */
         site_symmetry_2: str,
+        /**
+         * Bond valence calculated from the bond distance.
+         */
+        valence: float,
     },
     /**
      * The CATEGORY of data items used to record details about the

+ 165 - 0
src/mol-model-formats/structure/cif-core.ts

@@ -0,0 +1,165 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from '../../mol-data/db';
+import { Model, Symmetry } from '../../mol-model/structure/model';
+import { MoleculeType } from '../../mol-model/structure/model/types';
+import { RuntimeContext, Task } from '../../mol-task';
+import { createModels } from './basic/parser';
+import { BasicSchema, createBasic } from './basic/schema';
+import { ComponentBuilder } from './common/component';
+import { EntityBuilder } from './common/entity';
+import { ModelFormat } from './format';
+import { CifCore_Database } from '../../mol-io/reader/cif/schema/cif-core';
+import { CifFrame, CIF } from '../../mol-io/reader/cif';
+import { Spacegroup, SpacegroupCell } from '../../mol-math/geometry';
+import { Vec3 } from '../../mol-math/linear-algebra';
+import { ModelSymmetry } from './property/symmetry';
+import { IndexPairBonds } from './property/bonds/index-pair';
+
+function getSpacegroupNameOrNumber(space_group: CifCore_Database['space_group']) {
+    const groupNumber = space_group.IT_number.value(0)
+    const groupName = space_group['name_H-M_full'].value(0)
+    if (!space_group.IT_number.isDefined) return groupName
+    if (!space_group['name_H-M_full'].isDefined) return groupNumber
+    return groupNumber
+}
+
+function getSymmetry(db: CifCore_Database): Symmetry {
+    const { cell, space_group } = db
+    const nameOrNumber = getSpacegroupNameOrNumber(space_group)
+    const spaceCell = SpacegroupCell.create(nameOrNumber,
+        Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
+        Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
+
+    return {
+        spacegroup: Spacegroup.create(spaceCell),
+        assemblies : [],
+        isNonStandardCrytalFrame: false,
+        ncsOperators: []
+    }
+}
+
+async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: RuntimeContext): Promise<Model[]> {
+
+    const atomCount = db.atom_site._rowCount
+    const MOL = Column.ofConst('MOL', atomCount, Column.Schema.str);
+    const A = Column.ofConst('A', atomCount, Column.Schema.str);
+    const seq_id = Column.ofConst(1, atomCount, Column.Schema.int);
+
+    const symmetry = getSymmetry(db)
+    const m = symmetry.spacegroup.cell.fromFractional
+
+    const { fract_x, fract_y, fract_z } = db.atom_site
+    const x = new Float32Array(atomCount)
+    const y = new Float32Array(atomCount)
+    const z = new Float32Array(atomCount)
+    const v = Vec3()
+    for (let i = 0; i < atomCount; ++i) {
+        Vec3.set(v, fract_x.value(i), fract_y.value(i), fract_z.value(i))
+        Vec3.transformMat4(v, v, m)
+        x[i] = v[0], y[i] = v[1], z[i] = v[2]
+    }
+
+    const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
+        auth_asym_id: A,
+        auth_atom_id: db.atom_site.label,
+        auth_comp_id: MOL,
+        auth_seq_id: seq_id,
+        Cartn_x: Column.ofFloatArray(x),
+        Cartn_y: Column.ofFloatArray(y),
+        Cartn_z: Column.ofFloatArray(z),
+        id: Column.range(0, atomCount - 1),
+
+        label_asym_id: A,
+        label_atom_id: db.atom_site.label,
+        label_comp_id: MOL,
+        label_seq_id: seq_id,
+        label_entity_id: Column.ofConst('1', atomCount, Column.Schema.str),
+
+        occupancy: db.atom_site.occupancy,
+        type_symbol: db.atom_site.type_symbol,
+
+        pdbx_PDB_model_num: Column.ofConst(1, atomCount, Column.Schema.int),
+    }, atomCount);
+
+    const name = db.chemical.name_common.value(0) || db.chemical.name_systematic.value(0)
+
+    const entityBuilder = new EntityBuilder()
+    entityBuilder.setNames([['MOL', name || 'Unknown Entity']])
+    entityBuilder.getEntityId('MOL', MoleculeType.Unknown, 'A');
+
+    const componentBuilder = new ComponentBuilder(seq_id, db.atom_site.type_symbol);
+    componentBuilder.setNames([['MOL', name || 'Unknown Molecule']])
+    componentBuilder.add('MOL', 0);
+
+    const basics = createBasic({
+        entity: entityBuilder.getEntityTable(),
+        chem_comp: componentBuilder.getChemCompTable(),
+        atom_site
+    });
+
+    const models = await createModels(basics, format, ctx);
+
+    if (models.length > 0) {
+        ModelSymmetry.Provider.set(models[0], symmetry)
+
+        const bondCount = db.geom_bond._rowCount
+        if(bondCount > 0) {
+            const labelIndexMap: { [label: string]: number } = {}
+            const { label } = db.atom_site
+            for (let i = 0, il = label.rowCount; i < il; ++i) {
+                labelIndexMap[label.value(i)] = i
+            }
+
+            const indexA: number[] = []
+            const indexB: number[] = []
+            const order: number[] = []
+
+            const { atom_site_label_1, atom_site_label_2, valence, site_symmetry_1, site_symmetry_2 } = db.geom_bond
+            for (let i = 0; i < bondCount; ++i) {
+                if (site_symmetry_1.value(i) === site_symmetry_2.value(i)) {
+                    indexA[i] = labelIndexMap[atom_site_label_1.value(i)]
+                    indexB[i] = labelIndexMap[atom_site_label_2.value(i)]
+                    // TODO derive from bond length if undefined
+                    order[i] = valence.isDefined ? valence.value(i) : 1
+                }
+            }
+
+            // TODO support symmetry
+            IndexPairBonds.Provider.set(models[0], IndexPairBonds.fromData({ pairs: {
+                indexA: Column.ofIntArray(indexA),
+                indexB: Column.ofIntArray(indexB),
+                order: Column.ofIntArray(order)
+            }, count: indexA.length }));
+        }
+    }
+
+    return models;
+}
+
+//
+
+export { CifCoreFormat };
+
+type CifCoreFormat = ModelFormat<CifCoreFormat.Data>
+
+namespace CifCoreFormat {
+    export type Data = { db: CifCore_Database, frame: CifFrame }
+    export function is(x: ModelFormat): x is CifCoreFormat {
+        return x.kind === 'cifCore'
+    }
+
+    export function fromFrame(frame: CifFrame, db?: CifCore_Database): CifCoreFormat {
+        if (!db) db = CIF.schema.cifCore(frame)
+        return { kind: 'cifCore', name: db._name, data: { db, frame } };
+    }
+}
+
+export function trajectoryFromCifCore(frame: CifFrame): Task<Model.Trajectory> {
+    const format = CifCoreFormat.fromFrame(frame)
+    return Task.create('Parse CIF Core', ctx => getModels(format.data.db, format, ctx))
+}

+ 6 - 3
src/mol-plugin-state/actions/data-format.ts

@@ -12,7 +12,7 @@ import { FileInfo, getFileInfo } from '../../mol-util/file-info';
 import { ParamDefinition as PD } from '../../mol-util/param-definition';
 import { PluginStateObject } from '../objects';
 import { PlyProvider } from './shape';
-import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider, MolProvider } from './structure';
+import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider, MolProvider, CifCoreProvider } from './structure';
 import { Ccp4Provider, DscifProvider, Dsn6Provider } from './volume';
 
 export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | PluginStateObject.Data.String> {
@@ -56,6 +56,7 @@ export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | Plugin
     constructor() {
         this.add('3dg', Provider3dg)
         this.add('ccp4', Ccp4Provider)
+        this.add('cifCore', CifCoreProvider)
         this.add('dcd', DcdProvider)
         this.add('dscif', DscifProvider)
         this.add('dsn6', Dsn6Provider)
@@ -157,7 +158,7 @@ export const OpenFiles = StateAction.build({
 
 //
 
-type cifVariants = 'dscif' | -1
+type cifVariants = 'dscif' | 'coreCif' | -1
 export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifVariants {
     if (info.ext === 'bcif') {
         try {
@@ -166,7 +167,9 @@ export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifV
             if (msgpackDecode(data as Uint8Array).encoder.startsWith('VolumeServer')) return 'dscif'
         } catch { }
     } else if (info.ext === 'cif') {
-        if ((data as string).startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif'
+        const str = data as string
+        if (str.startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif'
+        if (str.includes('atom_site_fract_x') || str.includes('atom_site.fract_x')) return 'coreCif'
     }
     return -1
 }

+ 20 - 2
src/mol-plugin-state/actions/structure.ts

@@ -28,8 +28,8 @@ export const MmcifProvider: DataFormatProvider<PluginStateObject.Data.String | P
     binaryExtensions: ['bcif'],
     isApplicable: (info: FileInfo, data: Uint8Array | string) => {
         if (info.ext === 'mmcif' || info.ext === 'mcif') return true
-        // assume cif/bcif files that are not DensityServer CIF are mmCIF
-        if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) !== 'dscif'
+        // assume undetermined cif/bcif files are mmCIF
+        if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) === -1
         return false
     },
     getDefaultBuilder: (ctx: PluginContext, data, options) => {
@@ -41,6 +41,24 @@ export const MmcifProvider: DataFormatProvider<PluginStateObject.Data.String | P
     }
 }
 
+export const CifCoreProvider: DataFormatProvider<any> = {
+    label: 'cifCore',
+    description: 'CIF Core',
+    stringExtensions: ['cif'],
+    binaryExtensions: [],
+    isApplicable: (info: FileInfo, data: Uint8Array | string) => {
+        if (info.ext === 'cif') return guessCifVariant(info, data) === 'coreCif'
+        return false
+    },
+    getDefaultBuilder: (ctx: PluginContext, data, options) => {
+        return Task.create('mmCIF default builder', async () => {
+            const trajectory = await ctx.builders.structure.parseTrajectory(data, 'cifCore');
+            const representationPreset = options.visuals ? 'auto' : 'empty';
+            await ctx.builders.structure.hierarchy.applyPreset(trajectory, 'default', { showUnitcell: options.visuals, representationPreset });
+        })
+    }
+}
+
 export const PdbProvider: DataFormatProvider<any> = {
     label: 'PDB',
     description: 'PDB',

+ 4 - 2
src/mol-plugin-state/formats/registry.ts

@@ -101,7 +101,7 @@ export interface DataFormatProvider<P = any, R = any> {
     parse(plugin: PluginContext, data: StateObjectRef<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, params?: P): Promise<R>
 }
 
-type cifVariants = 'dscif' | -1
+type cifVariants = 'dscif' | 'coreCif' | -1
 export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifVariants {
     if (info.ext === 'bcif') {
         try {
@@ -110,7 +110,9 @@ export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifV
             if (msgpackDecode(data as Uint8Array).encoder.startsWith('VolumeServer')) return 'dscif'
         } catch { }
     } else if (info.ext === 'cif') {
-        if ((data as string).startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif'
+        const str = data as string
+        if (str.startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif'
+        if (str.includes('atom_site_fract_x') || str.includes('atom_site.fract_x')) return 'coreCif'
     }
     return -1
 }

+ 25 - 2
src/mol-plugin-state/formats/trajectory.ts

@@ -26,8 +26,8 @@ export const MmcifProvider: TrajectoryFormatProvider = {
     binaryExtensions: ['bcif'],
     isApplicable: (info: FileInfo, data: Uint8Array | string) => {
         if (info.ext === 'mmcif' || info.ext === 'mcif') return true
-        // assume cif/bcif files that are not DensityServer CIF are mmCIF
-        if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) !== 'dscif'
+        // assume undetermined cif/bcif files are mmCIF
+        if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) === -1
         return false
     },
     parse: async (plugin, data, params) => {
@@ -43,6 +43,28 @@ export const MmcifProvider: TrajectoryFormatProvider = {
     }
 }
 
+export const CifCoreProvider: TrajectoryFormatProvider = {
+    label: 'cifCore',
+    description: 'CIF Core',
+    stringExtensions: ['cif'],
+    binaryExtensions: [],
+    isApplicable: (info: FileInfo, data: Uint8Array | string) => {
+        if (info.ext === 'cif') return guessCifVariant(info, data) === 'coreCif'
+        return false
+    },
+    parse: async (plugin, data, params) => {
+        const state = plugin.state.data;
+        const cif = state.build().to(data)
+            .apply(StateTransforms.Data.ParseCif, void 0, { state: { isGhost: true } })
+        const trajectory = cif.apply(StateTransforms.Model.TrajectoryFromCifCore, void 0, { tags: params?.trajectoryTags })
+        await plugin.updateDataState(trajectory, { revertOnError: true });
+        if ((cif.selector.cell?.obj?.data.blocks.length || 0) > 1) {
+            plugin.state.data.updateCellState(cif.ref, { isGhost: false });
+        }
+        return { trajectory: trajectory.selector };
+    }
+}
+
 function directTrajectory(transformer: StateTransformer<PluginStateObject.Data.String | PluginStateObject.Data.Binary, PluginStateObject.Molecule.Trajectory>): TrajectoryFormatProvider['parse'] {
     return async (plugin, data, params) => {
         const state = plugin.state.data;
@@ -100,6 +122,7 @@ export const MolProvider: TrajectoryFormatProvider = {
 
 export const BuildInTrajectoryFormats = [
     ['mmcif', MmcifProvider] as const,
+    ['cifCore', CifCoreProvider] as const,
     ['pdb', PdbProvider] as const,
     ['gro', GroProvider] as const,
     ['3dg', Provider3dg] as const,

+ 33 - 0
src/mol-plugin-state/transforms/model.ts

@@ -34,6 +34,7 @@ import { StructureSelectionQueries } from '../helpers/structure-selection-query'
 import { PluginStateObject as SO, PluginStateTransform } from '../objects';
 import { parseMol } from '../../mol-io/reader/mol/parser';
 import { trajectoryFromMol } from '../../mol-model-formats/structure/mol';
+import { trajectoryFromCifCore } from '../../mol-model-formats/structure/cif-core';
 
 export { CoordinatesFromDcd };
 export { TopologyFromPsf };
@@ -43,6 +44,7 @@ export { TrajectoryFromMmCif };
 export { TrajectoryFromPDB };
 export { TrajectoryFromGRO };
 export { TrajectoryFromMOL };
+export { TrajectoryFromCifCore };
 export { TrajectoryFrom3DG };
 export { ModelFromTrajectory };
 export { StructureFromTrajectory };
@@ -233,6 +235,37 @@ const TrajectoryFromMOL = PluginStateTransform.BuiltIn({
     }
 });
 
+type TrajectoryFromCifCore = typeof TrajectoryFromCifCore
+const TrajectoryFromCifCore = PluginStateTransform.BuiltIn({
+    name: 'trajectory-from-cif-core',
+    display: { name: 'Parse CIF Core', description: 'Identify and create all separate models in the specified CIF data block' },
+    from: SO.Format.Cif,
+    to: SO.Molecule.Trajectory,
+    params(a) {
+        if (!a) {
+            return {
+                blockHeader: PD.Optional(PD.Text(void 0, { description: 'Header of the block to parse. If none is specifed, the 1st data block in the file is used.' }))
+            };
+        }
+        const { blocks } = a.data;
+        return {
+            blockHeader: PD.Optional(PD.Select(blocks[0] && blocks[0].header, blocks.map(b => [b.header, b.header] as [string, string]), { description: 'Header of the block to parse' }))
+        };
+    }
+})({
+    apply({ a, params }) {
+        return Task.create('Parse CIF Core', async ctx => {
+            const header = params.blockHeader || a.data.blocks[0].header;
+            const block = a.data.blocks.find(b => b.header === header);
+            if (!block) throw new Error(`Data block '${[header]}' not found.`);
+            const models = await trajectoryFromCifCore(block).runInContext(ctx);
+            if (models.length === 0) throw new Error('No models found.');
+            const props = { label: `${models[0].entry}`, description: `${models.length} model${models.length === 1 ? '' : 's'}` };
+            return new SO.Molecule.Trajectory(models, props);
+        });
+    }
+});
+
 type TrajectoryFrom3DG = typeof TrajectoryFrom3DG
 const TrajectoryFrom3DG = PluginStateTransform.BuiltIn({
     name: 'trajectory-from-3dg',

+ 1 - 0
src/mol-plugin/index.ts

@@ -39,6 +39,7 @@ export const DefaultPluginSpec: PluginSpec = {
         PluginSpec.Action(StateTransforms.Data.ParseDsn6),
 
         PluginSpec.Action(StateTransforms.Model.TrajectoryFromMmCif),
+        PluginSpec.Action(StateTransforms.Model.TrajectoryFromCifCore),
         PluginSpec.Action(StateTransforms.Model.TrajectoryFromPDB),
         PluginSpec.Action(StateTransforms.Model.TransformStructureConformation),
         PluginSpec.Action(StateTransforms.Model.StructureCoordinateSystem),