Browse Source

implemented Sequence.fromAtomicHierarchy

Alexander Rose 6 years ago
parent
commit
99cb7a95cd

+ 11 - 1
src/mol-model/structure/model/formats/mmcif/sequence.ts

@@ -10,8 +10,18 @@ import { Column } from 'mol-data/db';
 import { AtomicHierarchy } from '../../properties/atomic';
 import { Entities } from '../../properties/common';
 
+// TODO how to handle microheterogeneity
+//    see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
+//
+// Data items in the ENTITY_POLY_SEQ category specify the sequence
+// of monomers in a polymer. Allowance is made for the possibility
+// of microheterogeneity in a sample by allowing a given sequence
+// number to be correlated with more than one monomer ID. The
+// corresponding ATOM_SITE entries should reflect this
+// heterogeneity.
+
 export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): Sequence {
-    if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(hierarchy);
+    if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(entities, hierarchy);
 
     const { entity_id, num, mon_id } = cif.entity_poly_seq;
 

+ 30 - 3
src/mol-model/structure/model/properties/sequence.ts

@@ -6,6 +6,7 @@
 
 import { Column } from 'mol-data/db'
 import { AtomicHierarchy } from './atomic/hierarchy';
+import { Entities } from './common';
 
 interface Sequence {
     readonly byEntityKey: { [key: number]: Sequence.Entity }
@@ -19,10 +20,36 @@ namespace Sequence {
         readonly compId: Column<string>
     }
 
-    export function fromAtomicHierarchy(hierarchy: AtomicHierarchy): Sequence {
-        // const { label_comp_id } = hierarchy.residues;
+    export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): Sequence {
+        const { label_entity_id } = hierarchy.chains
+        const { label_comp_id, label_seq_id } = hierarchy.residues
+        const { chainSegments, residueSegments } = hierarchy
 
-        throw 'not implemented';
+        const byEntityKey: Sequence['byEntityKey'] = {};
+
+        const chainCount = hierarchy.chains._rowCount
+        for (let i = 0; i < chainCount; ++i) {
+            const entityId = label_entity_id.value(i)
+            const entityIndex = entities.getEntityIndex(entityId)
+            // TODO only for polymers, mirroring _entity_poly_seq, ok???
+            if (entities.data.type.value(i) !== 'polymer') continue
+
+            const entityKey = hierarchy.entityKey[entityIndex]
+            if (byEntityKey[entityKey] !== undefined) continue
+
+            const start = residueSegments.segmentMap[chainSegments.segments[i]]
+            let end = residueSegments.segmentMap[chainSegments.segments[i + 1]]
+            // TODO better way to handle end???
+            if (end === undefined) end = hierarchy.residues._rowCount
+
+            byEntityKey[entityKey] = {
+                entityId,
+                compId: Column.window(label_comp_id, start, end),
+                num: Column.window(label_seq_id, start, end)
+            }
+        }
+
+        return { byEntityKey }
     }
 }