Browse Source

mol-model: molstar_atom_site_operator_mapping parsing

David Sehnal 5 years ago
parent
commit
28edfd810c

+ 68 - 6
src/mol-model-formats/structure/basic/atomic.ts

@@ -8,7 +8,7 @@
 import { Column, Table } from '../../../mol-data/db';
 import { Interval, Segmentation } from '../../../mol-data/int';
 import UUID from '../../../mol-util/uuid';
-import { ElementIndex } from '../../../mol-model/structure';
+import { ElementIndex, ChainIndex } from '../../../mol-model/structure';
 import { Model } from '../../../mol-model/structure/model/model';
 import { AtomicConformation, AtomicData, AtomicHierarchy, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from '../../../mol-model/structure/model/properties/atomic';
 import { getAtomicIndex } from '../../../mol-model/structure/model/properties/utils/atomic-index';
@@ -16,6 +16,12 @@ import { ElementSymbol } from '../../../mol-model/structure/model/types';
 import { Entities } from '../../../mol-model/structure/model/properties/common';
 import { getAtomicDerivedData } from '../../../mol-model/structure/model/properties/utils/atomic-derived';
 import { AtomSite } from './schema';
+import { ModelFormat } from '../format';
+import { SymmetryOperator } from '../../../mol-math/geometry';
+import { MmcifFormat } from '../mmcif';
+import { AtomSiteOperatorMappingSchema } from '../../../mol-model/structure/export/categories/atom_site_operator_mapping';
+import { toDatabase } from '../../../mol-io/reader/cif/schema';
+import { Mat4, Vec3 } from '../../../mol-math/linear-algebra';
 
 function findHierarchyOffsets(atom_site: AtomSite) {
     if (atom_site._rowCount === 0) return { residues: [], chains: [] };
@@ -95,14 +101,70 @@ function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
         && Table.areEqual(a.atoms, b.atoms)
 }
 
-function getAtomicHierarchy(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, chemicalComponentMap: Model['properties']['chemicalComponentMap'], previous?: Model) {
+function createChainOperatorMappingAndSubstituteNames(hierarchy: AtomicData, format: ModelFormat) {
+    const mapping = new Map<ChainIndex, SymmetryOperator>();
+    if (!MmcifFormat.is(format)) return mapping;
+
+    const { molstar_atom_site_operator_mapping: entries } = toDatabase(AtomSiteOperatorMappingSchema, format.data.frame);
+    if (entries._rowCount === 0) return mapping;
+
+    const labelMap = new Map<string, { name: string, operator: SymmetryOperator }>();
+    const authMap = new Map<string, string>();
+
+    for (let i = 0; i < entries._rowCount; i++) {
+        const assembly: SymmetryOperator['assembly'] = entries.assembly_operator_id.valueKind(i) === Column.ValueKind.Present
+            ? { id: entries.assembly_id.value(i), operList: [], operId: entries.assembly_operator_id.value(i) }
+            : void 0;
+
+        const operator = SymmetryOperator.create(entries.operator_name.value(i), Mat4.identity(), {
+            assembly,
+            spgrOp: entries.symmetry_operator_index.valueKind(i) === Column.ValueKind.Present ? entries.symmetry_operator_index.value(i) : void 0,
+            hkl: Vec3.ofArray(entries.symmetry_hkl.value(i)),
+            ncsId: entries.ncs_id.value(i)
+        });
+
+        const suffix = entries.suffix.value(i);
+        const label = entries.label_asym_id.value(i);
+        labelMap.set(`${label}${suffix}`, { name: label, operator });
+        const auth = entries.auth_asym_id.value(i);
+        authMap.set(`${auth}${suffix}`, auth);
+    }
+
+    const { label_asym_id, auth_asym_id } = hierarchy.chains;
+    const mappedLabel: string[] = new Array(label_asym_id.rowCount);
+    const mappedAuth: string[] = new Array(label_asym_id.rowCount);
+
+    for (let i = 0 as ChainIndex; i < label_asym_id.rowCount; i++) {
+        const label = label_asym_id.value(i), auth = auth_asym_id.value(i);
+        if (!labelMap.has(label)) {
+            mappedLabel[i] = label;
+            mappedAuth[i] = auth;
+            continue;
+        }
+
+        const { name, operator } = labelMap.get(label)!;
+        mapping.set(i, operator);
+
+        mappedLabel[i] = name;
+        mappedAuth[i] = authMap.get(auth) || auth;
+    }
+
+    hierarchy.chains.label_asym_id = Column.ofArray({ array: mappedLabel, valueKind: hierarchy.chains.label_asym_id.valueKind, schema: hierarchy.chains.label_asym_id.schema });
+    hierarchy.chains.auth_asym_id = Column.ofArray({ array: mappedAuth, valueKind: hierarchy.chains.auth_asym_id.valueKind, schema: hierarchy.chains.auth_asym_id.schema });
+
+    return mapping;
+}
+
+function getAtomicHierarchy(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, chemicalComponentMap: Model['properties']['chemicalComponentMap'], format: ModelFormat, previous?: Model) {
     const hierarchyOffsets = findHierarchyOffsets(atom_site);
     const hierarchyData = createHierarchyData(atom_site, sourceIndex, hierarchyOffsets);
+    const chainOperatorMapping = createChainOperatorMappingAndSubstituteNames(hierarchyData, format);
 
     if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) {
         return {
             sameAsPrevious: true,
             hierarchy: previous.atomicHierarchy,
+            chainOperatorMapping
         };
     }
 
@@ -114,11 +176,11 @@ function getAtomicHierarchy(atom_site: AtomSite, sourceIndex: Column<number>, en
     const index = getAtomicIndex(hierarchyData, entities, hierarchySegments);
     const derived = getAtomicDerivedData(hierarchyData, index, chemicalComponentMap);
     const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchySegments, index, derived };
-    return { sameAsPrevious: false, hierarchy };
+    return { sameAsPrevious: false, hierarchy, chainOperatorMapping };
 }
 
-export function getAtomicHierarchyAndConformation(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, chemicalComponentMap: Model['properties']['chemicalComponentMap'], previous?: Model) {
-    const { sameAsPrevious, hierarchy } = getAtomicHierarchy(atom_site, sourceIndex, entities, chemicalComponentMap, previous)
+export function getAtomicHierarchyAndConformation(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, chemicalComponentMap: Model['properties']['chemicalComponentMap'], format: ModelFormat, previous?: Model) {
+    const { sameAsPrevious, hierarchy, chainOperatorMapping } = getAtomicHierarchy(atom_site, sourceIndex, entities, chemicalComponentMap, format, previous)
     const conformation = getConformation(atom_site)
-    return { sameAsPrevious, hierarchy, conformation };
+    return { sameAsPrevious, hierarchy, conformation, chainOperatorMapping };
 }

+ 4 - 2
src/mol-model-formats/structure/basic/parser.ts

@@ -39,7 +39,7 @@ export async function createModels(data: BasicData, format: ModelFormat, ctx: Ru
 /** Standard atomic model */
 function createStandardModel(data: BasicData, atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, properties: Model['properties'], format: ModelFormat, previous?: Model): Model {
 
-    const atomic = getAtomicHierarchyAndConformation(atom_site, sourceIndex, entities, properties.chemicalComponentMap, previous);
+    const atomic = getAtomicHierarchyAndConformation(atom_site, sourceIndex, entities, properties.chemicalComponentMap, format, previous);
     const modelNum = atom_site.pdbx_PDB_model_num.value(0)
     if (previous && atomic.sameAsPrevious) {
         return {
@@ -75,6 +75,7 @@ function createStandardModel(data: BasicData, atom_site: AtomSite, sourceIndex:
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         atomicRanges,
+        atomicChainOperatorMappinng: atomic.chainOperatorMapping,
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
         properties,
@@ -86,7 +87,7 @@ function createStandardModel(data: BasicData, atom_site: AtomSite, sourceIndex:
 
 /** Integrative model with atomic/coarse parts */
 function createIntegrativeModel(data: BasicData, ihm: CoarseData, properties: Model['properties'], format: ModelFormat): Model {
-    const atomic = getAtomicHierarchyAndConformation(ihm.atom_site, ihm.atom_site_sourceIndex, ihm.entities, properties.chemicalComponentMap);
+    const atomic = getAtomicHierarchyAndConformation(ihm.atom_site, ihm.atom_site_sourceIndex, ihm.entities, properties.chemicalComponentMap, format);
     const coarse = getCoarse(ihm, properties);
     const sequence = getSequence(data, ihm.entities, atomic.hierarchy, coarse.hierarchy)
     const atomicRanges = getAtomicRanges(atomic.hierarchy, ihm.entities, atomic.conformation, sequence)
@@ -113,6 +114,7 @@ function createIntegrativeModel(data: BasicData, ihm: CoarseData, properties: Mo
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         atomicRanges,
+        atomicChainOperatorMappinng: atomic.chainOperatorMapping,
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
         properties,

+ 4 - 4
src/mol-model/structure/export/categories/atom_site_operator_mapping.ts

@@ -26,8 +26,8 @@ export const AtomSiteOperatorMappingSchema = {
         suffix: Column.Schema.Str(),
 
         // assembly
-        assembly_operator_id: Column.Schema.Str(),
-        assembly_operator_index: Column.Schema.Int(),
+        assembly_id: Column.Schema.Str(),
+        assembly_operator_id: Column.Schema.Int(),
 
         // symmetry
         symmetry_operator_index: Column.Schema.Int(),
@@ -48,8 +48,8 @@ const Fields = CifWriter.fields<number, Entry[], keyof (typeof AtomSiteOperatorM
     .str('suffix', (i, xs) => xs[i].operator.suffix)
     // assembly
     // TODO: include oper list as well?
-    .str('assembly_operator_id', (i, xs) => xs[i].operator.assembly?.id || '', { valueKind: asmValueKind })
-    .int('assembly_operator_index', (i, xs) => xs[i].operator.assembly?.operId || 0, { valueKind: asmValueKind })
+    .str('assembly_id', (i, xs) => xs[i].operator.assembly?.id || '', { valueKind: asmValueKind })
+    .int('assembly_operator_id', (i, xs) => xs[i].operator.assembly?.operId || 0, { valueKind: asmValueKind })
     // symmetry
     .int('symmetry_operator_index', (i, xs) => xs[i].operator.spgrOp, { valueKind: symmetryValueKind })
     .vec('symmetry_hkl', [(i, xs) => xs[i].operator.hkl[0], (i, xs) => xs[i].operator.hkl[1], (i, xs) => xs[i].operator.hkl[2]], { valueKind: symmetryValueKind })

+ 3 - 0
src/mol-model/structure/model/model.ts

@@ -22,6 +22,8 @@ import { Task } from '../../../mol-task';
 import { IndexPairBonds } from '../../../mol-model-formats/structure/property/bonds/index-pair';
 import { createModels } from '../../../mol-model-formats/structure/basic/parser';
 import { MmcifFormat } from '../../../mol-model-formats/structure/mmcif';
+import { ChainIndex } from './indexing';
+import { SymmetryOperator } from '../../../mol-math/geometry';
 
 /**
  * Interface to the "source data" of the molecule.
@@ -60,6 +62,7 @@ export interface Model extends Readonly<{
     atomicHierarchy: AtomicHierarchy,
     atomicConformation: AtomicConformation,
     atomicRanges: AtomicRanges,
+    atomicChainOperatorMappinng: Map<ChainIndex, SymmetryOperator>,
 
     properties: {
         /** map that holds details about unobserved or zero occurrence residues */

+ 17 - 11
src/mol-model/structure/structure/structure.ts

@@ -641,11 +641,13 @@ namespace Structure {
      */
     export function ofModel(model: Model): Structure {
         const chains = model.atomicHierarchy.chainAtomSegments;
-        const { index } = model.atomicHierarchy
-        const { auth_asym_id } = model.atomicHierarchy.chains
+        const { index } = model.atomicHierarchy;
+        const { auth_asym_id } = model.atomicHierarchy.chains;
+        const { atomicChainOperatorMappinng } = model;
         const builder = new StructureBuilder({ label: model.label });
 
         for (let c = 0 as ChainIndex; c < chains.count; c++) {
+            const operator = atomicChainOperatorMappinng.get(c) || SymmetryOperator.Default;
             const start = chains.offsets[c];
 
             // set to true for chains that consist of "single atom residues",
@@ -661,11 +663,15 @@ namespace Structure {
                 singleAtomResidues = true
                 const e1 = index.getEntityFromChain(c);
                 const e2 = index.getEntityFromChain(c + 1 as ChainIndex);
-                if (e1 !== e2) break
+                if (e1 !== e2) break;
 
                 const a1 = auth_asym_id.value(c);
                 const a2 = auth_asym_id.value(c + 1);
-                if (a1 !== a2) break
+                if (a1 !== a2) break;
+
+                const op1 = atomicChainOperatorMappinng.get(c);
+                const op2 = atomicChainOperatorMappinng.get(c + 1 as ChainIndex);
+                if (op1 !== op2) break;
 
                 multiChain = true
                 c++;
@@ -674,12 +680,12 @@ namespace Structure {
             const elements = SortedArray.ofBounds(start as ElementIndex, chains.offsets[c + 1] as ElementIndex);
 
             if (singleAtomResidues) {
-                partitionAtomicUnitByAtom(model, elements, builder, multiChain);
+                partitionAtomicUnitByAtom(model, elements, builder, multiChain, operator);
             } else if (elements.length > 200000 || isWaterChain(model, c)) {
                 // split up very large chains e.g. lipid bilayers, micelles or water with explicit H
-                partitionAtomicUnitByResidue(model, elements, builder, multiChain);
+                partitionAtomicUnitByResidue(model, elements, builder, multiChain, operator);
             } else {
-                builder.addUnit(Unit.Kind.Atomic, model, SymmetryOperator.Default, elements, multiChain ? Unit.Trait.MultiChain : Unit.Trait.None);
+                builder.addUnit(Unit.Kind.Atomic, model, operator, elements, multiChain ? Unit.Trait.MultiChain : Unit.Trait.None);
             }
         }
 
@@ -701,7 +707,7 @@ namespace Structure {
         return model.entities.data.type.value(e) === 'water';
     }
 
-    function partitionAtomicUnitByAtom(model: Model, indices: SortedArray, builder: StructureBuilder, multiChain: boolean) {
+    function partitionAtomicUnitByAtom(model: Model, indices: SortedArray, builder: StructureBuilder, multiChain: boolean, operator: SymmetryOperator) {
         const { x, y, z } = model.atomicConformation;
         const position = { x, y, z, indices }
         const lookup = GridLookup3D(position, getBoundary(position), 8192);
@@ -716,13 +722,13 @@ namespace Structure {
             for (let j = 0, _j = count[i]; j < _j; j++) {
                 set[j] = indices[array[start + j]];
             }
-            builder.addUnit(Unit.Kind.Atomic, model, SymmetryOperator.Default, SortedArray.ofSortedArray(set), traits);
+            builder.addUnit(Unit.Kind.Atomic, model, operator, SortedArray.ofSortedArray(set), traits);
         }
         builder.endChainGroup();
     }
 
     // keeps atoms of residues together
-    function partitionAtomicUnitByResidue(model: Model, indices: SortedArray, builder: StructureBuilder, multiChain: boolean) {
+    function partitionAtomicUnitByResidue(model: Model, indices: SortedArray, builder: StructureBuilder, multiChain: boolean, operator: SymmetryOperator) {
         const { residueAtomSegments } = model.atomicHierarchy
 
         const startIndices: number[] = []
@@ -755,7 +761,7 @@ namespace Structure {
                     set[set.length] = l;
                 }
             }
-            builder.addUnit(Unit.Kind.Atomic, model, SymmetryOperator.Default, SortedArray.ofSortedArray(new Int32Array(set)), traits);
+            builder.addUnit(Unit.Kind.Atomic, model, operator, SortedArray.ofSortedArray(new Int32Array(set)), traits);
         }
         builder.endChainGroup();
     }