Browse Source

added Model.sequence

David Sehnal 7 years ago
parent
commit
22d208ff33

+ 5 - 0
data/mmcif-field-names.csv

@@ -58,6 +58,11 @@ entity.pdbx_mutation
 entity.pdbx_fragment
 entity.pdbx_ec
 
+entity_poly_seq.entity_id
+entity_poly_seq.num
+entity_poly_seq.mon_id
+entity_poly_seq.hetero
+
 entry.id
 
 exptl.entry_id

+ 15 - 3
src/apps/structure-info/index.ts

@@ -39,7 +39,7 @@ export function atomLabel(model: Model, aI: number) {
 }
 
 
-function printBonds(structure: Structure) {
+export function printBonds(structure: Structure) {
     const { units, elements } = structure;
     const unitIds = ElementSet.unitIndices(elements);
 
@@ -67,12 +67,24 @@ function printBonds(structure: Structure) {
     }
 }
 
+export function printSequence(model: Model) {
+    const { byEntityKey } = model.sequence;
+    for (const key of Object.keys(byEntityKey)) {
+        const seq = byEntityKey[+key];
+        console.log(`${seq.entityId} (${seq.num.value(0)}, ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`);
+        // for (let i = 0; i < seq.compId.rowCount; i++) {
+        //     console.log(`${seq.entityId} ${seq.num.value(i)} ${seq.compId.value(i)}`);
+        // }
+    }
+}
+
 async function run(pdb: string) {
     const mmcif = await getPdb(pdb)
     const models = Model.create({ kind: 'mmCIF', data: mmcif });
-    const structure = Structure.ofModel(models[0])
+    //const structure = Structure.ofModel(models[0])
     // console.log(structure)
-    printBonds(structure)
+    // printBonds(structure)
+    printSequence(models[0]);
 }
 
 const parser = new argparse.ArgumentParser({

+ 1 - 1
src/mol-data/db/column.ts

@@ -169,7 +169,7 @@ function createFirstIndexMapOfColumn<T>(c: Column<T>): Map<T, number> {
     const map = new Map<T, number>();
     for (let i = 0, _i = c.rowCount; i < _i; i++) {
         const v = c.value(i);
-        if (!map.has(v)) return map.set(c.value(i), i);
+        if (!map.has(v)) map.set(c.value(i), i);
     }
     return map;
 }

+ 6 - 0
src/mol-io/reader/cif/schema/mmcif.ts

@@ -86,6 +86,12 @@ export const mmCIF_Schema = {
         pdbx_fragment: str,
         pdbx_ec: List(',', x => x),
     },
+    entity_poly_seq: {
+        entity_id: str,
+        num: int,
+        mon_id: str,
+        hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str)
+    },
     entry: {
         id: str,
     },

+ 4 - 1
src/mol-model/structure/model/formats/gro.ts

@@ -18,6 +18,7 @@ import { guessElement } from '../utils/guess-element'
 import { ElementSymbol} from '../types'
 
 import gro_Format = Format.gro
+import Sequence from '../properties/sequence';
 
 type HierarchyOffsets = { residues: ArrayLike<number>, chains: ArrayLike<number> }
 
@@ -112,11 +113,13 @@ function createModel(format: gro_Format, modelNum: number, previous?: Model): Mo
         chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds),
     }
     const hierarchyKeys = findHierarchyKeys(hierarchyData, hierarchySegments);
+    const hierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments };
     return {
         id: UUID.create(),
         sourceData: format,
         modelNum,
-        hierarchy: { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments },
+        hierarchy,
+        sequence: Sequence.fromHierarchy(hierarchy),
         conformation: getConformation(structure.atoms),
         coarseGrained: CoarseGrained.Empty,
         symmetry: { assemblies: [] },

+ 6 - 1
src/mol-model/structure/model/formats/mmcif.ts

@@ -18,6 +18,7 @@ import { ElementSymbol} from '../types'
 import createAssemblies from './mmcif/assembly'
 
 import mmCIF_Format = Format.mmCIF
+import { getSequence } from './mmcif/sequence';
 
 function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
     const num = data.atom_site.pdbx_PDB_model_num;
@@ -106,11 +107,15 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model):
         chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds),
     }
     const hierarchyKeys = findHierarchyKeys(hierarchyData, hierarchySegments);
+
+    const hierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments };
+
     return {
         id: UUID.create(),
         sourceData: format,
         modelNum: format.data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)),
-        hierarchy: { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments },
+        hierarchy,
+        sequence: getSequence(format.data, hierarchy),
         conformation: getConformation(format, bounds),
         coarseGrained: CoarseGrained.Empty,
         symmetry: getSymmetry(format),

+ 31 - 0
src/mol-model/structure/model/formats/mmcif/sequence.ts

@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
+import Sequence from '../../properties/sequence'
+import { Column } from 'mol-data/db';
+import { Hierarchy } from '../../properties/hierarchy';
+
+export function getSequence(cif: mmCIF, hierarchy: Hierarchy): Sequence {
+    if (!cif.entity_poly_seq._rowCount) return Sequence.fromHierarchy(hierarchy);
+
+    const { entity_id, num, mon_id } = cif.entity_poly_seq;
+
+    const byEntityKey: Sequence['byEntityKey'] = {};
+    const count = entity_id.rowCount;
+
+    let i = 0;
+    while (i < count) {
+        const start = i;
+        while (i < count - 1 && entity_id.areValuesEqual(i, i + 1)) i++;
+        i++;
+
+        const id = entity_id.value(start);
+        byEntityKey[hierarchy.findEntityKey(id)] = { entityId: id, compId: Column.window(mon_id, start, i), num: Column.window(num, start, i)  }
+    }
+
+    return { byEntityKey };
+}

+ 2 - 0
src/mol-model/structure/model/model.ts

@@ -6,6 +6,7 @@
 
 import UUID from 'mol-util/uuid'
 import Format from './format'
+import Sequence from './properties/sequence'
 import Hierarchy from './properties/hierarchy'
 import Conformation from './properties/conformation'
 import Symmetry from './properties/symmetry'
@@ -26,6 +27,7 @@ interface Model extends Readonly<{
 
     sourceData: Format,
 
+    sequence: Sequence,
     hierarchy: Hierarchy,
     conformation: Conformation,
     symmetry: Symmetry,

+ 29 - 0
src/mol-model/structure/model/properties/sequence.ts

@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column } from 'mol-data/db'
+import { Hierarchy } from './hierarchy';
+
+interface Sequence {
+    readonly byEntityKey: { [key: number]: Sequence.Entity }
+}
+
+namespace Sequence {
+    export interface Entity {
+        readonly entityId: string,
+        readonly num: Column<number>
+        // _entity_poly_seq.mon_id
+        readonly compId: Column<string>
+    }
+
+    export function fromHierarchy(hierarchy: Hierarchy): Sequence {
+        // const { label_comp_id } = hierarchy.residues;
+
+        throw 'not implemented';
+    }
+}
+
+export default Sequence