před 7 roky · e3f49b61a1
--- a/src/apps/structure-info/model.ts
+++ b/src/apps/structure-info/model.ts
@@ -50,7 +50,7 @@ export function residueLabel(model: Model, rI: number) {
 
				 }
			
 
				 
			
 
				 export function printSecStructure(model: Model) {
			
 
				-    console.log('Secondary Structure\n=============');
			
 
				+    console.log('\nSecondary Structure\n=============');
			
 
				     const { residues } = model.atomicHierarchy;
			
 
				     const { type, key } = model.properties.secondaryStructure;
			
 
				 
			
@@ -90,20 +90,18 @@ export function printBonds(structure: Structure) {
 
				 }
			
 
				 
			
 
				 export function printSequence(model: Model) {
			
 
				-    console.log('Sequence\n=============');
			
 
				+    console.log('\nSequence\n=============');
			
 
				     const { byEntityKey } = model.sequence;
			
 
				     for (const key of Object.keys(byEntityKey)) {
			
 
				         const seq = byEntityKey[+key];
			
 
				-        console.log(`${seq.entityId} (${seq.num.value(0)}, ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`);
			
 
				-        // for (let i = 0; i < seq.compId.rowCount; i++) {
			
 
				-        //     console.log(`${seq.entityId} ${seq.num.value(i)} ${seq.compId.value(i)}`);
			
 
				-        // }
			
 
				+        console.log(`${seq.entityId} (${seq.sequence.kind} ${seq.num.value(0)} (offset ${seq.sequence.offset}), ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`);
			
 
				+        console.log(`${seq.sequence.sequence}`);
			
 
				     }
			
 
				     console.log();
			
 
				 }
			
 
				 
			
 
				 export function printUnits(structure: Structure) {
			
 
				-    console.log('Units\n=============');
			
 
				+    console.log('\nUnits\n=============');
			
 
				     const l = Element.Location();
			
 
				 
			
 
				     for (const unit of structure.units) {
			
@@ -133,10 +131,9 @@ export function printUnits(structure: Structure) {
 
				     }
			
 
				 }
			
 
				 
			
 
				-
			
 
				 export function printIHMModels(model: Model) {
			
 
				     if (!model.coarseHierarchy.isDefined) return false;
			
 
				-    console.log('IHM Models\n=============');
			
 
				+    console.log('\nIHM Models\n=============');
			
 
				     console.log(Table.formatToString(model.coarseHierarchy.models));
			
 
				 }
			
 
				 
			
@@ -144,10 +141,10 @@ async function run(mmcif: mmCIF_Database) {
 
				     const models = await Model.create({ kind: 'mmCIF', data: mmcif }).run();
			
 
				     const structure = Structure.ofModel(models[0]);
			
 
				     printSequence(models[0]);
			
 
				-    printIHMModels(models[0]);
			
 
				+    //printIHMModels(models[0]);
			
 
				     printUnits(structure);
			
 
				-    printBonds(structure);
			
 
				-    printSecStructure(models[0]);
			
 
				+    //printBonds(structure);
			
 
				+    //printSecStructure(models[0]);
			
 
				 }
			
 
				 
			
 
				 async function runDL(pdb: string) {
			
--- a/src/mol-data/db/column.ts
+++ b/src/mol-data/db/column.ts
@@ -103,6 +103,15 @@ namespace Column {
 
				         return lambdaColumn(spec);
			
 
				     }
			
 
				 
			
 
				+    /** values [min, max] (i.e. include both values) */
			
 
				+    export function range(min: number, max: number): Column<number> {
			
 
				+        return ofLambda({
			
 
				+            value: i => i + min,
			
 
				+            rowCount: Math.max(max - min + 1, 0),
			
 
				+            schema: Schema.int
			
 
				+        });
			
 
				+    }
			
 
				+
			
 
				     export function ofArray<T extends Column.Schema>(spec: Column.ArraySpec<T>): Column<T['T']> {
			
 
				         return arrayColumn(spec);
			
 
				     }
			
--- a/src/mol-model/sequence.ts
+++ b/src/mol-model/sequence.ts
@@ -0,0 +1,7 @@
 
				+/**
			
 
				+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
			
 
				+ *
			
 
				+ * @author David Sehnal <david.sehnal@gmail.com>
			
 
				+ */
			
 
				+
			
 
				+export * from './sequence/sequence'
			
--- a/src/mol-model/sequence/constants.ts
+++ b/src/mol-model/sequence/constants.ts
@@ -1 +1,71 @@
 
				-// TODO
			
 
				+/**
			
 
				+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
			
 
				+ *
			
 
				+ * @author David Sehnal <david.sehnal@gmail.com>
			
 
				+ */
			
 
				+
			
 
				+export type AminoAlphabet =
			
 
				+    | 'H' | 'R' | 'K' | 'I' | 'F' | 'L' | 'W' | 'A' | 'M' | 'P' | 'C' | 'N' | 'V' | 'G' | 'S' | 'Q' | 'Y' | 'D' | 'E' | 'T' | 'U' | 'O'
			
 
				+    | 'X' /** = Unknown */
			
 
				+
			
 
				+export type NuclecicAlphabet =
			
 
				+    | 'A' | 'C' | 'G' | 'T' | 'U'
			
 
				+    | 'X' /** = Unknown */
			
 
				+
			
 
				+// from NGL
			
 
				+const ProteinOneLetterCodes: { [name: string]: AminoAlphabet }  = {
			
 
				+    'HIS': 'H',
			
 
				+    'ARG': 'R',
			
 
				+    'LYS': 'K',
			
 
				+    'ILE': 'I',
			
 
				+    'PHE': 'F',
			
 
				+    'LEU': 'L',
			
 
				+    'TRP': 'W',
			
 
				+    'ALA': 'A',
			
 
				+    'MET': 'M',
			
 
				+    'PRO': 'P',
			
 
				+    'CYS': 'C',
			
 
				+    'ASN': 'N',
			
 
				+    'VAL': 'V',
			
 
				+    'GLY': 'G',
			
 
				+    'SER': 'S',
			
 
				+    'GLN': 'Q',
			
 
				+    'TYR': 'Y',
			
 
				+    'ASP': 'D',
			
 
				+    'GLU': 'E',
			
 
				+    'THR': 'T',
			
 
				+
			
 
				+    'SEC': 'U',  // as per IUPAC definition
			
 
				+    'PYL': 'O',  // as per IUPAC definition
			
 
				+}
			
 
				+
			
 
				+const DnaOneLetterCodes: { [name: string]: NuclecicAlphabet } = {
			
 
				+    'DA': 'A',
			
 
				+    'DC': 'C',
			
 
				+    'DG': 'G',
			
 
				+    'DT': 'T',
			
 
				+    'DU': 'U'
			
 
				+}
			
 
				+
			
 
				+const RnaOneLetterCodes: { [name: string]: NuclecicAlphabet } = {
			
 
				+    'A': 'A',
			
 
				+    'C': 'C',
			
 
				+    'G': 'G',
			
 
				+    'T': 'T',
			
 
				+    'U': 'U'
			
 
				+}
			
 
				+
			
 
				+export function getProteinOneLetterCode(residueName: string): AminoAlphabet {
			
 
				+    const code = ProteinOneLetterCodes[residueName];
			
 
				+    return code || 'X';
			
 
				+}
			
 
				+
			
 
				+export function getRnaOneLetterCode(residueName: string): NuclecicAlphabet {
			
 
				+    const code = RnaOneLetterCodes[residueName];
			
 
				+    return code || 'X';
			
 
				+}
			
 
				+
			
 
				+export function getDnaOneLetterCode(residueName: string): NuclecicAlphabet {
			
 
				+    const code = DnaOneLetterCodes[residueName];
			
 
				+    return code || 'X';
			
 
				+}
			
--- a/src/mol-model/sequence/sequence.ts
+++ b/src/mol-model/sequence/sequence.ts
@@ -4,12 +4,13 @@
 
				  * @author David Sehnal <david.sehnal@gmail.com>
			
 
				  */
			
 
				 
			
 
				-// import { Column } from 'mol-data/db'
			
 
				-// TODO
			
 
				+import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
			
 
				+import { Column } from 'mol-data/db'
			
 
				 
			
 
				-interface Sequence {
			
 
				+// TODO add mapping support to other sequence spaces, e.g. uniprot
			
 
				+// TODO sequence alignment (take NGL code as starting point)
			
 
				 
			
 
				-}
			
 
				+type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic
			
 
				 
			
 
				 namespace Sequence {
			
 
				     export const enum Kind {
			
@@ -19,14 +20,83 @@ namespace Sequence {
 
				         Generic = 'generic'
			
 
				     }
			
 
				 
			
 
				-    export type ProteinAlphabet = 'X'
			
 
				-    export type RnaAlphabet = 'X'
			
 
				-    export type DnaAlphabet = 'C'
			
 
				-    export type GenericAlphabet = 'X'
			
 
				-
			
 
				     export interface Base<K extends Kind, Alphabet extends string> {
			
 
				         readonly kind: K,
			
 
				-        readonly sequence: ReadonlyArray<Alphabet>
			
 
				+        readonly offset: number,
			
 
				+        readonly sequence: ArrayLike<Alphabet>
			
 
				+    }
			
 
				+
			
 
				+    export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
			
 
				+    export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
			
 
				+    export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
			
 
				+    export interface Generic extends Base<Kind.Generic, 'X'> { }
			
 
				+
			
 
				+    export function create(kind: Kind, sequence: string, offset: number = 0): Sequence {
			
 
				+        return { kind: kind as any, sequence: sequence as any, offset };
			
 
				+    }
			
 
				+
			
 
				+    export function getSequenceString(seq: Sequence) {
			
 
				+        return seq.sequence as string;
			
 
				+    }
			
 
				+
			
 
				+    function determineKind(names: Column<string>) {
			
 
				+        for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) {
			
 
				+            const name = names.value(i) || '';
			
 
				+            if (getProteinOneLetterCode(name) !== 'X') return { kind: Kind.Protein, code: getProteinOneLetterCode };
			
 
				+            if (getRnaOneLetterCode(name) !== 'X') return { kind: Kind.RNA, code: getRnaOneLetterCode };
			
 
				+            if (getDnaOneLetterCode(name) !== 'X') return { kind: Kind.DNA, code: getDnaOneLetterCode };
			
 
				+        }
			
 
				+        return { kind: Kind.Generic, code: (v: string) => 'X' };
			
 
				+    }
			
 
				+
			
 
				+    export function ofResidueNames(residueName: Column<string>, seqId: Column<number>): Sequence {
			
 
				+        if (seqId.rowCount === 0) throw new Error('cannot be empty');
			
 
				+
			
 
				+        const { kind, code } = determineKind(residueName);
			
 
				+        return new Impl(kind, residueName, seqId, code) as Sequence;
			
 
				+    }
			
 
				+
			
 
				+    class Impl implements Base<any, any> {
			
 
				+        private _offset = 0;
			
 
				+        private _seq: string | undefined = void 0;
			
 
				+
			
 
				+        get offset() {
			
 
				+            if (this._seq !== void 0) return this._offset;
			
 
				+            this.create();
			
 
				+            return this._offset;
			
 
				+        }
			
 
				+
			
 
				+        get sequence(): any {
			
 
				+            if (this._seq !== void 0) return this._seq;
			
 
				+            this.create();
			
 
				+            return this._seq;
			
 
				+        }
			
 
				+
			
 
				+        private create() {
			
 
				+            let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
			
 
				+            for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
			
 
				+                const id = this.seqId.value(i);
			
 
				+                if (maxSeqId < id) maxSeqId = id;
			
 
				+                if (id < minSeqId) minSeqId = id;
			
 
				+            }
			
 
				+
			
 
				+            const count = maxSeqId - minSeqId + 1;
			
 
				+            const sequenceArray = new Array(maxSeqId + 1);
			
 
				+            for (let i = 0; i < count; i++) {
			
 
				+                sequenceArray[i] = 'X';
			
 
				+            }
			
 
				+
			
 
				+            for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
			
 
				+                sequenceArray[this.seqId.value(i) - minSeqId] = this.code(this.residueName.value(i) || '');
			
 
				+            }
			
 
				+
			
 
				+            this._seq = sequenceArray.join('');
			
 
				+            this._offset = minSeqId - 1;
			
 
				+        }
			
 
				+
			
 
				+        constructor(public kind: Kind, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
			
 
				+
			
 
				+        }
			
 
				     }
			
 
				 }
			
 
				 
			
--- a/src/mol-model/structure/model/formats/mmcif/sequence.ts
+++ b/src/mol-model/structure/model/formats/mmcif/sequence.ts
@@ -5,10 +5,11 @@
 
				  */
			
 
				 
			
 
				 import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
			
 
				-import Sequence from '../../properties/sequence'
			
 
				+import StructureSequence from '../../properties/sequence'
			
 
				 import { Column } from 'mol-data/db';
			
 
				 import { AtomicHierarchy } from '../../properties/atomic';
			
 
				 import { Entities } from '../../properties/common';
			
 
				+import { Sequence } from '../../../../sequence';
			
 
				 
			
 
				 // TODO how to handle microheterogeneity
			
 
				 //    see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
			
@@ -20,12 +21,12 @@ import { Entities } from '../../properties/common';
 
				 // corresponding ATOM_SITE entries should reflect this
			
 
				 // heterogeneity.
			
 
				 
			
 
				-export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): Sequence {
			
 
				-    if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(entities, hierarchy);
			
 
				+export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): StructureSequence {
			
 
				+    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy);
			
 
				 
			
 
				     const { entity_id, num, mon_id } = cif.entity_poly_seq;
			
 
				 
			
 
				-    const byEntityKey: Sequence['byEntityKey'] = {};
			
 
				+    const byEntityKey: StructureSequence['byEntityKey'] = {};
			
 
				     const count = entity_id.rowCount;
			
 
				 
			
 
				     let i = 0;
			
@@ -35,7 +36,15 @@ export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHie
 
				         i++;
			
 
				 
			
 
				         const id = entity_id.value(start);
			
 
				-        byEntityKey[entities.getEntityIndex(id)] = { entityId: id, compId: Column.window(mon_id, start, i), num: Column.window(num, start, i)  }
			
 
				+        const _compId = Column.window(mon_id, start, i);
			
 
				+        const _num = Column.window(num, start, i);
			
 
				+
			
 
				+        byEntityKey[entities.getEntityIndex(id)] = {
			
 
				+            entityId: id,
			
 
				+            compId: _compId,
			
 
				+            num: _num,
			
 
				+            sequence: Sequence.ofResidueNames(_compId, _num)
			
 
				+        };
			
 
				     }
			
 
				 
			
 
				     return { byEntityKey };
			
--- a/src/mol-model/structure/model/model.ts
+++ b/src/mol-model/structure/model/model.ts
@@ -6,7 +6,7 @@
 
				 
			
 
				 import UUID from 'mol-util/uuid'
			
 
				 import Format from './format'
			
 
				-import Sequence from './properties/sequence'
			
 
				+import StructureSequence from './properties/sequence'
			
 
				 import { AtomicHierarchy, AtomicConformation } from './properties/atomic'
			
 
				 import { ModelSymmetry } from './properties/symmetry'
			
 
				 import { CoarseHierarchy, CoarseConformation } from './properties/coarse'
			
@@ -31,7 +31,7 @@ interface Model extends Readonly<{
 
				 
			
 
				     symmetry: ModelSymmetry,
			
 
				     entities: Entities,
			
 
				-    sequence: Sequence,
			
 
				+    sequence: StructureSequence,
			
 
				 
			
 
				     atomicHierarchy: AtomicHierarchy,
			
 
				     atomicConformation: AtomicConformation,
			
--- a/src/mol-model/structure/model/properties/sequence.ts
+++ b/src/mol-model/structure/model/properties/sequence.ts
@@ -7,59 +7,55 @@
 
				 import { Column } from 'mol-data/db'
			
 
				 import { AtomicHierarchy } from './atomic/hierarchy';
			
 
				 import { Entities } from './common';
			
 
				+import { Sequence } from '../../../sequence';
			
 
				 
			
 
				-interface Sequence {
			
 
				-    readonly byEntityKey: { [key: number]: Sequence.Entity }
			
 
				+interface StructureSequence {
			
 
				+    readonly byEntityKey: { [key: number]: StructureSequence.Entity }
			
 
				 }
			
 
				 
			
 
				-// TODO lift to model/sequence/ folder
			
 
				-// TODO add one letter code sequence string
			
 
				-// TODO add mapping support to other sequence spaces, e.g. uniprot
			
 
				-// TODO add sequence kind, e.g. protein, dna, rna (alphabets?)
			
 
				-// TODO sequence alignment (take NGL code as starting point)
			
 
				-
			
 
				-namespace Sequence {
			
 
				+namespace StructureSequence {
			
 
				     export interface Entity {
			
 
				         readonly entityId: string,
			
 
				-        readonly num: Column<number>
			
 
				-        // _entity_poly_seq.mon_id
			
 
				-        readonly compId: Column<string>
			
 
				+        readonly num: Column<number>,
			
 
				+        // Corresponds to _entity_poly_seq.mon_id
			
 
				+        readonly compId: Column<string>,
			
 
				+        readonly sequence: Sequence
			
 
				     }
			
 
				 
			
 
				-    export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): Sequence {
			
 
				-        const { label_entity_id } = hierarchy.chains
			
 
				+    export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): StructureSequence {
			
 
				         const { label_comp_id, label_seq_id } = hierarchy.residues
			
 
				         const { chainSegments, residueSegments } = hierarchy
			
 
				 
			
 
				-        const byEntityKey: Sequence['byEntityKey'] = {};
			
 
				+        const byEntityKey: StructureSequence['byEntityKey'] = { };
			
 
				 
			
 
				-        // TODO get min/max of label_seq_id to handle missing residues at start and in between
			
 
				-        //   note that this assumes label_seq_id is monotonically increasing
			
 
				+        for (let cI = 0, _cI = hierarchy.chains._rowCount; cI < _cI; cI++) {
			
 
				+            const entityKey = hierarchy.entityKey[cI];
			
 
				+            // Only for polymers, trying to mirror _entity_poly_seq
			
 
				+            if (byEntityKey[entityKey] !== void 0 || entities.data.type.value(entityKey) !== 'polymer') continue;
			
 
				 
			
 
				-        const chainCount = hierarchy.chains._rowCount
			
 
				-        for (let i = 0; i < chainCount; ++i) {
			
 
				-            const entityId = label_entity_id.value(i)
			
 
				-            const entityIndex = entities.getEntityIndex(entityId)
			
 
				-            // TODO only for polymers, mirroring _entity_poly_seq, ok???
			
 
				-            if (entities.data.type.value(i) !== 'polymer') continue
			
 
				+            let start = cI;
			
 
				+            cI++;
			
 
				+            while (cI < _cI && entityKey === hierarchy.entityKey[cI] && entities.data.type.value(entityKey) !== 'polymer') {
			
 
				+                cI++;
			
 
				+            }
			
 
				+            cI--;
			
 
				 
			
 
				-            const entityKey = hierarchy.entityKey[entityIndex]
			
 
				-            if (byEntityKey[entityKey] !== undefined) continue
			
 
				+            const rStart = residueSegments.segmentMap[chainSegments.segments[start]];
			
 
				+            const rEnd = residueSegments.segmentMap[chainSegments.segments[cI + 1]];
			
 
				 
			
 
				-            const start = residueSegments.segmentMap[chainSegments.segments[i]]
			
 
				-            let end = residueSegments.segmentMap[chainSegments.segments[i + 1]]
			
 
				-            // TODO better way to handle end???
			
 
				-            if (end === undefined) end = hierarchy.residues._rowCount
			
 
				+            const compId = Column.window(label_comp_id, rStart, rEnd);
			
 
				+            const num = Column.window(label_seq_id, rStart, rEnd);
			
 
				 
			
 
				             byEntityKey[entityKey] = {
			
 
				-                entityId,
			
 
				-                compId: Column.window(label_comp_id, start, end),
			
 
				-                num: Column.window(label_seq_id, start, end)
			
 
				-            }
			
 
				+                entityId: entities.data.id.value(entityKey),
			
 
				+                compId,
			
 
				+                num,
			
 
				+                sequence: Sequence.ofResidueNames(compId, num)
			
 
				+            };
			
 
				         }
			
 
				 
			
 
				         return { byEntityKey }
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-export default Sequence
			
 
				+export default StructureSequence