Browse Source

normalize mmcif data

Alexander Rose 3 years ago
parent
commit
bf4e5ed7c2

+ 3 - 1
src/extensions/cellpack/model.ts

@@ -32,6 +32,7 @@ import { Color } from '../../mol-util/color';
 import { objectForEach } from '../../mol-util/object';
 import { readFromFile } from '../../mol-util/data-source';
 import { ColorNames } from '../../mol-util/color/names';
+import { createBasic } from '../../mol-model-formats/structure/basic/schema';
 
 function getCellPackModelUrl(fileName: string, baseUrl: string) {
     return `${baseUrl}/results/${fileName}`;
@@ -310,7 +311,8 @@ async function getCurve(plugin: PluginContext, name: string, ingredient: Ingredi
     const cif = getCifCurve(name, transforms, model);
     const curveModelTask = Task.create('Curve Model', async ctx => {
         const format = MmcifFormat.fromFrame(cif);
-        const models = await createModels(format.data.db, format, ctx);
+        const basic = createBasic(format.data.db, true);
+        const models = await createModels(basic, format, ctx);
         return models.representative;
     });
 

+ 5 - 29
src/mol-model-formats/structure/basic/parser.ts

@@ -150,40 +150,17 @@ function findModelEnd(num: Column<number>, startIndex: number) {
     return endIndex;
 }
 
-function hasPresentValues(column: Column<any>) {
-    for (let i = 0, il = column.rowCount; i < il; i++) {
-        if (column.valueKind(i) === Column.ValueKind.Present) return true;
-    }
-    return false;
-}
-
-function substUndefinedColumn<T extends Table<any>>(table: T, a: keyof T, b: keyof T) {
-    if (!table[a].isDefined || !hasPresentValues(table[a])) table[a] = table[b];
-    if (!table[b].isDefined || !hasPresentValues(table[b])) table[b] = table[a];
-}
-
-/** Fix possibly missing auth_/label_ columns */
-function getNormalizeAtomSite(atom_site: AtomSite) {
-    const normalized = Table.ofColumns(atom_site._schema, atom_site);
-    substUndefinedColumn(normalized, 'label_atom_id', 'auth_atom_id');
-    substUndefinedColumn(normalized, 'label_comp_id', 'auth_comp_id');
-    substUndefinedColumn(normalized, 'label_seq_id', 'auth_seq_id');
-    substUndefinedColumn(normalized, 'label_asym_id', 'auth_asym_id');
-    return normalized;
-}
-
 async function readStandard(ctx: RuntimeContext, data: BasicData, properties: CommonProperties, format: ModelFormat) {
     const models: Model[] = [];
 
     if (data.atom_site) {
-        const normalizedAtomSite = getNormalizeAtomSite(data.atom_site);
-        const atomCount = normalizedAtomSite.id.rowCount;
+        const atomCount = data.atom_site.id.rowCount;
         const entities = getEntityData(data);
 
         let modelStart = 0;
         while (modelStart < atomCount) {
-            const modelEnd = findModelEnd(normalizedAtomSite.pdbx_PDB_model_num, modelStart);
-            const { atom_site, sourceIndex } = await sortAtomSite(ctx, normalizedAtomSite, modelStart, modelEnd);
+            const modelEnd = findModelEnd(data.atom_site.pdbx_PDB_model_num, modelStart);
+            const { atom_site, sourceIndex } = await sortAtomSite(ctx, data.atom_site, modelStart, modelEnd);
             const model = createStandardModel(data, atom_site, sourceIndex, entities, properties, format, models.length > 0 ? models[models.length - 1] : void 0);
             models.push(model);
             modelStart = modelEnd;
@@ -229,16 +206,15 @@ async function readIntegrative(ctx: RuntimeContext, data: BasicData, properties:
         for (let i = 0; i < data.ihm_model_list._rowCount; i++) {
             const id = model_id.value(i);
 
-            const normalizedAtomSite = getNormalizeAtomSite(data.atom_site);
             let atom_site, atom_site_sourceIndex;
             if (atom_sites.has(id)) {
                 const e = atom_sites.get(id)!;
                 // need to sort `data.atom_site` as `e.start` and `e.end` are indices into that
-                const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, normalizedAtomSite, e.start, e.end);
+                const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, data.atom_site, e.start, e.end);
                 atom_site = sorted;
                 atom_site_sourceIndex = sourceIndex;
             } else {
-                atom_site = Table.window(normalizedAtomSite, normalizedAtomSite._schema, 0, 0);
+                atom_site = Table.window(data.atom_site, data.atom_site._schema, 0, 0);
                 atom_site_sourceIndex = Column.ofIntArray([]);
             }
 

+ 6 - 2
src/mol-model-formats/structure/basic/schema.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
@@ -7,6 +7,7 @@
 import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
 import { Table } from '../../../mol-data/db';
 import { mmCIF_chemComp_schema } from '../../../mol-io/reader/cif/schema/mmcif-extras';
+import { getNormalizedAtomSite } from './util';
 
 // TODO split into conformation and hierarchy parts
 
@@ -68,7 +69,7 @@ export interface BasicData {
     pdbx_molecule: Molecule
 }
 
-export function createBasic(data: Partial<BasicData>): BasicData {
+export function createBasic(data: Partial<BasicData>, normalize = false): BasicData {
     const basic = Object.create(null);
     for (const name of Object.keys(BasicSchema)) {
         if (name in data) {
@@ -77,5 +78,8 @@ export function createBasic(data: Partial<BasicData>): BasicData {
             basic[name] = Table.ofUndefinedColumns(BasicSchema[name as keyof typeof BasicSchema], 0);
         }
     }
+    if (normalize) {
+        basic.atom_site = getNormalizedAtomSite(basic.atom_site);
+    }
     return basic;
 }

+ 27 - 3
src/mol-model-formats/structure/basic/util.ts

@@ -1,12 +1,12 @@
 /**
- * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import { BasicData } from './schema';
-import { Table } from '../../../mol-data/db';
+import { AtomSite, BasicData } from './schema';
+import { Column, Table } from '../../../mol-data/db';
 
 export function getModelGroupName(model_id: number, data: BasicData) {
     const { ihm_model_group, ihm_model_group_link } = data;
@@ -17,4 +17,28 @@ export function getModelGroupName(model_id: number, data: BasicData) {
         if (group) return group.name;
     }
     return '';
+}
+
+//
+
+function hasPresentValues(column: Column<any>) {
+    for (let i = 0, il = column.rowCount; i < il; i++) {
+        if (column.valueKind(i) === Column.ValueKind.Present) return true;
+    }
+    return false;
+}
+
+function substUndefinedColumn<T extends Table<any>>(table: T, a: keyof T, b: keyof T) {
+    if (!table[a].isDefined || !hasPresentValues(table[a])) table[a] = table[b];
+    if (!table[b].isDefined || !hasPresentValues(table[b])) table[b] = table[a];
+}
+
+/** Fix possibly missing auth_/label_ columns */
+export function getNormalizedAtomSite(atom_site: AtomSite) {
+    const normalized = Table.ofColumns(atom_site._schema, atom_site);
+    substUndefinedColumn(normalized, 'label_atom_id', 'auth_atom_id');
+    substUndefinedColumn(normalized, 'label_comp_id', 'auth_comp_id');
+    substUndefinedColumn(normalized, 'label_seq_id', 'auth_seq_id');
+    substUndefinedColumn(normalized, 'label_asym_id', 'auth_asym_id');
+    return normalized;
 }

+ 2 - 2
src/mol-model-formats/structure/cif-core.ts

@@ -146,13 +146,13 @@ async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: Runti
     componentBuilder.setNames([['MOL', name || 'Unknown Molecule']]);
     componentBuilder.add('MOL', 0);
 
-    const basics = createBasic({
+    const basic = createBasic({
         entity: entityBuilder.getEntityTable(),
         chem_comp: componentBuilder.getChemCompTable(),
         atom_site
     });
 
-    const models = await createModels(basics, format, ctx);
+    const models = await createModels(basic, format, ctx);
 
     if (models.frameCount > 0) {
         const first = models.representative;

+ 2 - 2
src/mol-model-formats/structure/cube.ts

@@ -53,13 +53,13 @@ async function getModels(cube: CubeFile, ctx: RuntimeContext) {
     componentBuilder.setNames([['MOL', 'Unknown Molecule']]);
     componentBuilder.add('MOL', 0);
 
-    const basics = createBasic({
+    const basic = createBasic({
         entity: entityBuilder.getEntityTable(),
         chem_comp: componentBuilder.getChemCompTable(),
         atom_site
     });
 
-    return await createModels(basics, MolFormat.create(cube), ctx);
+    return await createModels(basic, MolFormat.create(cube), ctx);
 }
 
 //

+ 4 - 2
src/mol-model-formats/structure/mmcif.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -19,6 +19,7 @@ import { ComponentBond } from './property/bonds/chem_comp';
 import { StructConn } from './property/bonds/struct_conn';
 import { Trajectory } from '../../mol-model/structure';
 import { GlobalModelTransformInfo } from '../../mol-model/structure/model/properties/global-transform';
+import { createBasic } from './basic/schema';
 
 function modelSymmetryFromMmcif(model: Model) {
     if (!MmcifFormat.is(model.sourceData)) return;
@@ -100,5 +101,6 @@ namespace MmcifFormat {
 
 export function trajectoryFromMmCIF(frame: CifFrame): Task<Trajectory> {
     const format = MmcifFormat.fromFrame(frame);
-    return Task.create('Create mmCIF Model', ctx => createModels(format.data.db, format, ctx));
+    const basic = createBasic(format.data.db, true);
+    return Task.create('Create mmCIF Model', ctx => createModels(basic, format, ctx));
 }

+ 2 - 2
src/mol-model-formats/structure/mol.ts

@@ -68,13 +68,13 @@ export async function getMolModels(mol: MolFile, format: ModelFormat<any> | unde
     componentBuilder.setNames([['MOL', 'Unknown Molecule']]);
     componentBuilder.add('MOL', 0);
 
-    const basics = createBasic({
+    const basic = createBasic({
         entity: entityBuilder.getEntityTable(),
         chem_comp: componentBuilder.getChemCompTable(),
         atom_site
     });
 
-    const models = await createModels(basics, format ?? MolFormat.create(mol), ctx);
+    const models = await createModels(basic, format ?? MolFormat.create(mol), ctx);
 
     if (models.frameCount > 0) {
         const indexA = Column.ofIntArray(Column.mapToArray(bonds.atomIdxA, x => x - 1, Int32Array));

+ 2 - 2
src/mol-model-formats/structure/mol2.ts

@@ -75,13 +75,13 @@ async function getModels(mol2: Mol2File, ctx: RuntimeContext) {
             componentBuilder.add(atoms.subst_name.value(i), i);
         }
 
-        const basics = createBasic({
+        const basic = createBasic({
             entity: entityBuilder.getEntityTable(),
             chem_comp: componentBuilder.getChemCompTable(),
             atom_site
         });
 
-        const _models = await createModels(basics, Mol2Format.create(mol2), ctx);
+        const _models = await createModels(basic, Mol2Format.create(mol2), ctx);
 
         if (_models.frameCount > 0) {
             const indexA = Column.ofIntArray(Column.mapToArray(bonds.origin_atom_id, x => x - 1, Int32Array));

+ 4 - 2
src/mol-model-formats/structure/pdb.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -14,6 +14,7 @@ import { Column } from '../../mol-data/db';
 import { AtomPartialCharge } from './property/partial-charge';
 import { Trajectory } from '../../mol-model/structure';
 import { ModelFormat } from '../format';
+import { createBasic } from './basic/schema';
 
 export { PdbFormat };
 
@@ -34,7 +35,8 @@ export function trajectoryFromPDB(pdb: PdbFile): Task<Trajectory> {
         await ctx.update('Converting to mmCIF');
         const cif = await pdbToMmCif(pdb);
         const format = MmcifFormat.fromFrame(cif, undefined, PdbFormat.create(pdb));
-        const models = await createModels(format.data.db, format, ctx);
+        const basic = createBasic(format.data.db, true);
+        const models = await createModels(basic, format, ctx);
         const partial_charge = cif.categories['atom_site']?.getField('partial_charge');
         if (partial_charge) {
             // TODO works only for single, unsorted model, to work generally

+ 2 - 2
src/mol-model-formats/structure/xyz.ts

@@ -78,13 +78,13 @@ function getModels(mol: XyzFile, ctx: RuntimeContext) {
     componentBuilder.setNames([['MOL', 'Unknown Molecule']]);
     componentBuilder.add('MOL', 0);
 
-    const basics = createBasic({
+    const basic = createBasic({
         entity: entityBuilder.getEntityTable(),
         chem_comp: componentBuilder.getChemCompTable(),
         atom_site
     });
 
-    return createModels(basics, XyzFormat.create(mol), ctx);
+    return createModels(basic, XyzFormat.create(mol), ctx);
 }
 
 //