Explorar el Código

compute polymer ranges when building model from mmcif

Alexander Rose hace 6 años
padre
commit
863db31429

+ 30 - 25
src/mol-model/structure/model/formats/mmcif.ts

@@ -70,6 +70,7 @@ function getNcsOperators(format: mmCIF_Format) {
     }
     return opers;
 }
+
 function getModifiedResidueNameMap(format: mmCIF_Format) {
     const data = format.data.pdbx_struct_mod_residue;
     const map = new Map<string, string>();
@@ -121,8 +122,22 @@ function getChemicalComponentMap(format: mmCIF_Format) {
     return map
 }
 
-function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, previous?: Model): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, previous);
+export interface FormatData {
+    modifiedResidueNameMap: Map<string, string>
+    asymIdSerialMap: Map<string, number>
+    chemicalComponentMap: Map<string, ChemicalComponent>
+}
+
+function getFormatData(format: mmCIF_Format): FormatData {
+    return {
+        modifiedResidueNameMap: getModifiedResidueNameMap(format),
+        asymIdSerialMap: getAsymIdSerialMap(format),
+        chemicalComponentMap: getChemicalComponentMap(format)
+    }
+}
+
+function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous);
     if (previous && atomic.sameAsPrevious) {
         return { ...previous, atomicConformation: atomic.conformation };
     }
@@ -132,10 +147,6 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities
         ? format.data.entry.id.value(0)
         : format.data._name;
 
-    const modifiedResidueNameMap = getModifiedResidueNameMap(format);
-    const asymIdSerialMap = getAsymIdSerialMap(format)
-    const chemicalComponentMap = getChemicalComponentMap(format)
-
     return {
         id: UUID.create(),
         label,
@@ -143,16 +154,14 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities
         modelNum: atom_site.pdbx_PDB_model_num.value(0),
         entities,
         symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, entities, atomic.hierarchy, modifiedResidueNameMap),
+        sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidueNameMap),
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
         properties: {
             secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
-            modifiedResidueNameMap,
-            asymIdSerialMap,
-            chemicalComponentMap
+            ...formatData
         },
         customProperties: new CustomProperties(),
         _staticPropertyData: Object.create(null),
@@ -160,12 +169,9 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities
     };
 }
 
-function createModelIHM(format: mmCIF_Format, data: IHMData): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities);
-    const coarse = getIHMCoarse(data);
-    const modifiedResidueNameMap = getModifiedResidueNameMap(format);
-    const asymIdSerialMap = getAsymIdSerialMap(format)
-    const chemicalComponentMap = getChemicalComponentMap(format)
+function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
+    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData);
+    const coarse = getIHMCoarse(data, formatData);
 
     return {
         id: UUID.create(),
@@ -174,16 +180,14 @@ function createModelIHM(format: mmCIF_Format, data: IHMData): Model {
         modelNum: data.model_id,
         entities: data.entities,
         symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, data.entities, atomic.hierarchy, modifiedResidueNameMap),
+        sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidueNameMap),
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
         properties: {
             secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy),
-            modifiedResidueNameMap,
-            asymIdSerialMap,
-            chemicalComponentMap
+            ...formatData
         },
         customProperties: new CustomProperties(),
         _staticPropertyData: Object.create(null),
@@ -204,7 +208,7 @@ function findModelEnd(num: Column<number>, startIndex: number) {
     return endIndex;
 }
 
-async function readStandard(ctx: RuntimeContext, format: mmCIF_Format) {
+async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
     const atomCount = format.data.atom_site._rowCount;
     const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
 
@@ -213,7 +217,7 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format) {
     while (modelStart < atomCount) {
         const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
         const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
-        const model = createStandardModel(format, atom_site, entities, models.length > 0 ? models[models.length - 1] : void 0);
+        const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
         attachProps(model);
         models.push(model);
         modelStart = modelEnd;
@@ -235,7 +239,7 @@ function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
     return ret;
 }
 
-async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) {
+async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
     const { ihm_model_list } = format.data;
     const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
 
@@ -257,7 +261,7 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) {
             ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
             ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
         };
-        const model = createModelIHM(format, data);
+        const model = createModelIHM(format, data, formatData);
         attachProps(model);
         models.push(model);
     }
@@ -266,9 +270,10 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) {
 }
 
 function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
+    const formatData = getFormatData(format)
     return Task.create('Create mmCIF Model', async ctx => {
         const isIHM = format.data.ihm_model_list._rowCount > 0;
-        return isIHM ? await readIHM(ctx, format) : await readStandard(ctx, format);
+        return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData);
     });
 }
 

+ 9 - 8
src/mol-model/structure/model/formats/mmcif/atomic.ts

@@ -17,13 +17,16 @@ import { ElementSymbol } from '../../types';
 import { Entities } from '../../properties/common';
 
 import mmCIF_Format = Format.mmCIF
+import { getAtomicRanges } from '../../properties/utils/atomic-ranges';
+import { FormatData } from '../mmcif';
+
 type AtomSite = mmCIF_Database['atom_site']
 
 function findHierarchyOffsets(atom_site: AtomSite) {
-    if (atom_site._rowCount === 0) return { residues: [], polymers: [], chains: [] };
+    if (atom_site._rowCount === 0) return { residues: [], chains: [] };
 
     const start = 0, end = atom_site._rowCount;
-    const residues = [start as Element], chains = [start as Element], polymers = [start as Element];
+    const residues = [start as Element], chains = [start as Element];
 
     const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = atom_site;
 
@@ -34,13 +37,11 @@ function findHierarchyOffsets(atom_site: AtomSite) {
             || !auth_seq_id.areValuesEqual(i - 1, i)
             || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i)
             || !label_comp_id.areValuesEqual(i - 1, i);
-        const newPolymer = newResidue && label_seq_id.value(i - 1) !== label_seq_id.value(i) - 1;
 
         if (newResidue) residues[residues.length] = i as Element;
-        if (newPolymer) polymers[polymers.length] = i as Element;
         if (newChain) chains[chains.length] = i as Element;
     }
-    return { residues, polymers, chains };
+    return { residues, chains };
 }
 
 function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
@@ -78,7 +79,7 @@ function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
         && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
 }
 
-export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, previous?: Model) {
+export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) {
     const hierarchyOffsets = findHierarchyOffsets(atom_site);
     const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets);
 
@@ -93,11 +94,11 @@ export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_sit
     const hierarchySegments: AtomicSegments = {
         residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, Interval.ofBounds(0, atom_site._rowCount)),
         chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, Interval.ofBounds(0, atom_site._rowCount)),
-        polymerSegments: Segmentation.ofOffsets(hierarchyOffsets.polymers, Interval.ofBounds(0, atom_site._rowCount)),
     }
 
     const hierarchyKeys = getAtomicKeys(hierarchyData, entities, hierarchySegments);
-    const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments };
+    const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, formatData.chemicalComponentMap);
+    const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments, ...hierarchyRanges };
     return {
         sameAsPrevious: false,
         hierarchy,

+ 9 - 10
src/mol-model/structure/model/formats/mmcif/ihm.ts

@@ -13,6 +13,8 @@ import { UUID } from 'mol-util';
 import { Segmentation, Interval } from 'mol-data/int';
 import { Mat3, Tensor } from 'mol-math/linear-algebra';
 import { Element } from '../../../structure'
+import { getCoarseRanges } from '../../properties/utils/coarse-ranges';
+import { FormatData } from '../mmcif';
 
 export interface IHMData {
     model_id: number,
@@ -25,7 +27,7 @@ export interface IHMData {
 
 export const EmptyIHMCoarse = { hierarchy: CoarseHierarchy.Empty, conformation: void 0 as any }
 
-export function getIHMCoarse(data: IHMData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } {
+export function getIHMCoarse(data: IHMData, formatData: FormatData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } {
     const { ihm_sphere_obj_site, ihm_gaussian_obj_site } = data;
 
     if (ihm_sphere_obj_site._rowCount === 0 && ihm_gaussian_obj_site._rowCount === 0) return EmptyIHMCoarse;
@@ -33,16 +35,18 @@ export function getIHMCoarse(data: IHMData): { hierarchy: CoarseHierarchy, confo
     const sphereData = getData(ihm_sphere_obj_site);
     const sphereConformation = getSphereConformation(ihm_sphere_obj_site);
     const sphereKeys = getCoarseKeys(sphereData, data.entities);
+    const sphereRanges = getCoarseRanges(sphereData, formatData.chemicalComponentMap);
 
     const gaussianData = getData(ihm_gaussian_obj_site);
     const gaussianConformation = getGaussianConformation(ihm_gaussian_obj_site);
     const gaussianKeys = getCoarseKeys(gaussianData, data.entities);
+    const gaussianRanges = getCoarseRanges(gaussianData, formatData.chemicalComponentMap);
 
     return {
         hierarchy: {
             isDefined: true,
-            spheres: { ...sphereData, ...sphereKeys },
-            gaussians: { ...gaussianData, ...gaussianKeys },
+            spheres: { ...sphereData, ...sphereKeys, ...sphereRanges },
+            gaussians: { ...gaussianData, ...gaussianKeys, ...gaussianRanges },
         },
         conformation: {
             id: UUID.create(),
@@ -81,19 +85,14 @@ function getGaussianConformation(data: mmCIF['ihm_gaussian_obj_site']): CoarseGa
 }
 
 function getSegments(asym_id: Column<string>, seq_id_begin: Column<number>, seq_id_end: Column<number>) {
-    const polymerOffsets = [0 as Element], chainOffsets = [0 as Element];
+    const chainOffsets = [0 as Element];
     for (let i = 1, _i = asym_id.rowCount; i < _i; i++) {
         const newChain = !asym_id.areValuesEqual(i - 1, i);
-        const newPolymer = newChain
-            || seq_id_end.value(i - 1) !== seq_id_begin.value(i) - 1;
-
-        if (newPolymer) polymerOffsets[polymerOffsets.length] = i as Element;
         if (newChain) chainOffsets[chainOffsets.length] = i as Element;
     }
 
     return {
-        chainSegments: Segmentation.ofOffsets(chainOffsets, Interval.ofBounds(0, asym_id.rowCount)),
-        polymerSegments: Segmentation.ofOffsets(polymerOffsets, Interval.ofBounds(0, asym_id.rowCount))
+        chainSegments: Segmentation.ofOffsets(chainOffsets, Interval.ofBounds(0, asym_id.rowCount))
     }
 }
 

+ 7 - 6
src/mol-model/structure/model/properties/atomic/hierarchy.ts

@@ -9,6 +9,7 @@ import { Segmentation } from 'mol-data/int'
 import { mmCIF_Schema as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
 import { ElementSymbol } from '../../types'
 import { Element } from '../../../structure'
+import SortedRanges from 'mol-data/int/sorted-ranges';
 
 export const AtomsSchema = {
     type_symbol: Column.Schema.Aliased<ElementSymbol>(mmCIF.atom_site.type_symbol),
@@ -52,11 +53,6 @@ export interface AtomicSegments {
     residueSegments: Segmentation<Element>,
     /** Maps chainIndex to a range of atoms [segments[cI], segments[cI + 1]) */
     chainSegments: Segmentation<Element>,
-    /**
-     * bonded/connected stretches of polymer chains, i.e. a chain will be
-     * broken into multiple polymer segments if there are missing residues
-     */
-    polymerSegments: Segmentation<Element>
     // TODO: include entity segments?
 }
 
@@ -78,5 +74,10 @@ export interface AtomicKeys {
     findResidueKey(entityId: string, label_asym_id: string, label_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code: string): number
 }
 
-type _Hierarchy = AtomicData & AtomicSegments & AtomicKeys
+export interface AtomicRanges {
+    polymerRanges: SortedRanges<Element>
+    gapRanges: SortedRanges<Element>
+}
+
+type _Hierarchy = AtomicData & AtomicSegments & AtomicKeys & AtomicRanges
 export interface AtomicHierarchy extends _Hierarchy { }

+ 7 - 6
src/mol-model/structure/model/properties/coarse/hierarchy.ts

@@ -8,6 +8,7 @@
 import { Column } from 'mol-data/db'
 import { Segmentation } from 'mol-data/int';
 import { Element } from '../../../structure'
+import SortedRanges from 'mol-data/int/sorted-ranges';
 
 export interface CoarsedElementKeys {
     // assign a key to each element
@@ -28,14 +29,14 @@ export interface CoarseElementData {
     seq_id_end: Column<number>,
 
     chainSegments: Segmentation<Element>,
-    /**
-     * bonded/connected stretches of polymer chains, i.e. a chain will be
-     * broken into multiple polymer segments if there are missing residues
-     */
-    polymerSegments: Segmentation<Element>
 }
 
-export type CoarseElements = CoarsedElementKeys & CoarseElementData
+export interface CoarseRanges {
+    polymerRanges: SortedRanges<Element>
+    gapRanges: SortedRanges<Element>
+}
+
+export type CoarseElements = CoarsedElementKeys & CoarseElementData & CoarseRanges
 
 export interface CoarseHierarchy {
     isDefined: boolean,

+ 69 - 0
src/mol-model/structure/model/properties/utils/atomic-ranges.ts

@@ -0,0 +1,69 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { AtomicSegments } from '../atomic';
+import { AtomicData, AtomicRanges } from '../atomic/hierarchy';
+import { Segmentation, Interval } from 'mol-data/int';
+import SortedRanges from 'mol-data/int/sorted-ranges';
+import { Element } from '../../../../structure';
+import { ChemicalComponent } from '../chemical-component';
+import { MoleculeType, isPolymer } from '../../types';
+
+export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, chemicalComponentMap: Map<string, ChemicalComponent>): AtomicRanges {
+    const polymerRanges: number[] = []
+    const gapRanges: number[] = []
+    const chainIt = Segmentation.transientSegments(segments.chainSegments, Interval.ofBounds(0, data.atoms._rowCount))
+    const residueIt = Segmentation.transientSegments(segments.residueSegments, Interval.ofBounds(0, data.atoms._rowCount))
+    const { label_seq_id, label_comp_id } = data.residues
+
+    let prevSeqId: number
+    let prevEnd: number
+    let startIndex: number
+
+    while (chainIt.hasNext) {
+        const chainSegment = chainIt.move();
+        residueIt.setSegment(chainSegment);
+        prevSeqId = -1
+        prevEnd = -1
+        startIndex = -1
+
+        while (residueIt.hasNext) {
+            const residueSegment = residueIt.move();
+            const residueIndex = residueSegment.index
+            const cc = chemicalComponentMap.get(label_comp_id.value(residueIndex))
+            const moleculeType = cc ? cc.moleculeType : MoleculeType.unknown
+            const seqId = label_seq_id.value(residueIndex)
+            if (isPolymer(moleculeType)) {
+                if (startIndex !== -1) {
+                    if (seqId !== prevSeqId + 1) {
+                        polymerRanges.push(startIndex, prevEnd - 1)
+                        gapRanges.push(prevEnd - 1, residueSegment.start)
+                        startIndex = residueSegment.start
+                    } else if (!residueIt.hasNext) {
+                        polymerRanges.push(startIndex, residueSegment.end - 1)
+                    }
+                } else {
+                    startIndex = residueSegment.start // start polymer
+                }
+            } else {
+                if (startIndex !== -1) {
+                    polymerRanges.push(startIndex, prevEnd - 1)
+                    startIndex = -1
+                }
+            }
+            
+            prevEnd = residueSegment.end
+            prevSeqId = seqId
+        }
+    }
+
+    console.log(polymerRanges, gapRanges)
+
+    return {
+        polymerRanges: SortedRanges.ofSortedRanges(polymerRanges as Element[]),
+        gapRanges: SortedRanges.ofSortedRanges(gapRanges as Element[])
+    }
+}

+ 52 - 0
src/mol-model/structure/model/properties/utils/coarse-ranges.ts

@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { CoarseRanges, CoarseElementData } from '../coarse/hierarchy';
+import { Segmentation, Interval } from 'mol-data/int';
+import SortedRanges from 'mol-data/int/sorted-ranges';
+import { Element } from '../../../../structure';
+import { ChemicalComponent } from '../chemical-component';
+
+// TODO assumes all coarse elements are part of a polymer
+
+export function getCoarseRanges(data: CoarseElementData, chemicalComponentMap: Map<string, ChemicalComponent>): CoarseRanges {
+    const polymerRanges: number[] = []
+    const gapRanges: number[] = []
+    const chainIt = Segmentation.transientSegments(data.chainSegments, Interval.ofBounds(0, data.count))
+
+    const { seq_id_begin, seq_id_end } = data
+
+    while (chainIt.hasNext) {
+        const { start, end } = chainIt.move();
+        console.log('chain', start, end)
+
+        let startIndex = -1
+        let prevSeqEnd = -1
+        for (let i = start; i < end; ++i) {
+            const seqEnd = seq_id_end.value(i)
+            if (i === start) {
+                startIndex = i
+                prevSeqEnd = seq_id_end.value(i)
+            } else {
+                if (prevSeqEnd !== seq_id_begin.value(i) - 1) {
+                    polymerRanges.push(startIndex, i - 1)
+                    startIndex = i
+                }
+            }
+            if (i === end - 1) {
+                polymerRanges.push(startIndex, i)
+            }
+            prevSeqEnd = seqEnd
+        }
+    }
+
+    console.log(polymerRanges, gapRanges)
+
+    return {
+        polymerRanges: SortedRanges.ofSortedRanges(polymerRanges as Element[]),
+        gapRanges: SortedRanges.ofSortedRanges(gapRanges as Element[])
+    }
+}