Bladeren bron

basic microheterogeneity support

Alexander Rose 5 jaren geleden
bovenliggende
commit
72b1c36111

+ 5 - 4
src/mol-model-formats/structure/mmcif/atomic.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import { Column, Table } from '../../../mol-data/db';
@@ -26,15 +27,15 @@ function findHierarchyOffsets(atom_site: AtomSite) {
     const start = 0, end = atom_site._rowCount;
     const residues = [start as ElementIndex], chains = [start as ElementIndex];
 
-    const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = atom_site;
+    const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code } = atom_site;
 
     for (let i = start + 1 as ElementIndex; i < end; i++) {
         const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !label_asym_id.areValuesEqual(i - 1, i);
         const newResidue = newChain
             || !label_seq_id.areValuesEqual(i - 1, i)
             || !auth_seq_id.areValuesEqual(i - 1, i)
-            || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i)
-            || !label_comp_id.areValuesEqual(i - 1, i);
+            || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i);
+        // not checking label_comp_id to allow for MICROHETEROGENEITY
 
         if (newResidue) residues[residues.length] = i as ElementIndex;
         if (newChain) chains[chains.length] = i as ElementIndex;

+ 55 - 12
src/mol-model/sequence/sequence.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
@@ -24,6 +25,9 @@ namespace Sequence {
         readonly kind: K,
         readonly offset: number,
         readonly sequence: ArrayLike<Alphabet>
+        readonly labels: ArrayLike<string>
+        /** maps seqId to list of compIds */
+        readonly microHet: ReadonlyMap<number, string[]>
     }
 
     export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
@@ -31,8 +35,8 @@ namespace Sequence {
     export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
     export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
 
-    export function create(kind: Kind, sequence: string, offset: number = 0): Sequence {
-        return { kind: kind as any, sequence: sequence as any, offset };
+    export function create<K extends Kind, Alphabet extends string>(kind: K, sequence: Alphabet[], labels: string[], microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> {
+        return { kind: kind, sequence: sequence, labels, microHet, offset };
     }
 
     export function getSequenceString(seq: Sequence) {
@@ -62,13 +66,17 @@ namespace Sequence {
 
         const { kind, code } = determineKind(residueName);
 
-        if (!modifiedMap || modifiedMap.size === 0) return new Impl(kind, residueName, seqId, code) as Sequence;
+        if (!modifiedMap || modifiedMap.size === 0) {
+            return new Impl(kind, residueName, seqId, code) as Sequence;
+        }
         return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence;
     }
 
-    class Impl implements Base<any, any> {
+    class Impl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
         private _offset = 0;
-        private _seq: string | undefined = void 0;
+        private _seq: ArrayLike<Alphabet> | undefined = void 0;
+        private _labels: ArrayLike<string> | undefined = void 0;
+        private _microHet: ReadonlyMap<number, string[]> | undefined = void 0;
 
         get offset() {
             if (this._seq !== void 0) return this._offset;
@@ -76,10 +84,22 @@ namespace Sequence {
             return this._offset;
         }
 
-        get sequence(): any {
+        get sequence(): ArrayLike<Alphabet> {
             if (this._seq !== void 0) return this._seq;
             this.create();
-            return this._seq;
+            return this._seq!;
+        }
+
+        get labels(): ArrayLike<string> {
+            if (this._labels !== void 0) return this._labels;
+            this.create();
+            return this._labels!;
+        }
+
+        get microHet(): ReadonlyMap<number, string[]> {
+            if (this._microHet !== void 0) return this._microHet;
+            this.create();
+            return this._microHet!;
         }
 
         private create() {
@@ -91,20 +111,43 @@ namespace Sequence {
             }
 
             const count = maxSeqId - minSeqId + 1;
-            const sequenceArray = new Array(maxSeqId + 1);
+            const sequenceArray = new Array<string>(maxSeqId + 1);
+            const labels = new Array<string[]>(maxSeqId + 1);
             for (let i = 0; i < count; i++) {
                 sequenceArray[i] = '-';
+                labels[i] = [];
+            }
+
+            const compIds = new Array<string[]>(maxSeqId + 1);
+            for (let i = minSeqId; i <= maxSeqId; ++i) {
+                compIds[i] = [];
             }
 
             for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
-                sequenceArray[this.seqId.value(i) - minSeqId] = this.code(this.residueName.value(i) || '');
+                const seqId = this.seqId.value(i)
+                const idx = seqId - minSeqId;
+                const name = this.residueName.value(i);
+                const code = this.code(name);
+                // in case of MICROHETEROGENEITY `sequenceArray[idx]` may already be set
+                if (!sequenceArray[idx] || sequenceArray[idx] === '-') {
+                    sequenceArray[idx] = code;
+                }
+                labels[idx].push(code === 'X' ? name : code);
+                compIds[seqId].push(name);
+            }
+
+            const microHet = new Map()
+            for (let i = minSeqId; i <= maxSeqId; ++i) {
+                if (compIds[i].length > 1) microHet.set(i, compIds[i])
             }
 
-            this._seq = sequenceArray.join('');
+            this._seq = sequenceArray.join('') as unknown as ArrayLike<Alphabet>;
+            this._labels = labels.map(l => l.length > 1 ? `(${l.join('|')})` : l.join(''));
+            this._microHet = microHet
             this._offset = minSeqId - 1;
         }
 
-        constructor(public kind: Kind, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
+        constructor(public kind: K, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
 
         }
     }

+ 1 - 1
src/mol-model/structure/model/properties/sequence.ts

@@ -19,7 +19,7 @@ namespace StructureSequence {
     export interface Entity {
         readonly entityId: string,
         readonly num: Column<number>,
-        // Corresponds to _entity_poly_seq.mon_id
+        /** Corresponds to _entity_poly_seq.mon_id */
         readonly compId: Column<string>,
         readonly sequence: Sequence
     }

+ 23 - 1
src/mol-model/structure/structure/properties.ts

@@ -57,13 +57,33 @@ function compId(l: StructureElement.Location) {
     return !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.label_comp_id.value(l.unit.residueIndex[l.element])
 }
 
+function seqId(l: StructureElement.Location) {
+    return !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.label_seq_id.value(l.unit.residueIndex[l.element])
+}
+
+function hasMicroheterogeneity(l: StructureElement.Location) {
+    if (!Unit.isAtomic(l.unit)) notAtomic()
+    const entitySeq = l.unit.model.sequence.byEntityKey[eK(l)]
+    return entitySeq && entitySeq.sequence.microHet.has(seqId(l))
+}
+
+function microheterogeneityCompIds(l: StructureElement.Location) {
+    if (!Unit.isAtomic(l.unit)) notAtomic()
+    const entitySeq = l.unit.model.sequence.byEntityKey[eK(l)]
+    if (entitySeq) {
+        return entitySeq.sequence.microHet.get(seqId(l)) || [compId(l)]
+    } else {
+        return [compId(l)]
+    }
+}
+
 const residue = {
     key: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.residueIndex[l.element]),
 
     group_PDB: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.group_PDB.value(l.unit.residueIndex[l.element])),
     label_comp_id: p(compId),
     auth_comp_id: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_comp_id.value(l.unit.residueIndex[l.element])),
-    label_seq_id: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.label_seq_id.value(l.unit.residueIndex[l.element])),
+    label_seq_id: p(seqId),
     auth_seq_id: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.element])),
     pdbx_PDB_ins_code: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.element])),
 
@@ -74,6 +94,8 @@ const residue = {
         const id = compId(l)
         return l.unit.model.properties.modifiedResidues.parentId.get(id) || id
     }),
+    hasMicroheterogeneity: p(hasMicroheterogeneity),
+    microheterogeneityCompIds: p(microheterogeneityCompIds),
     secondary_structure_type: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.type[l.unit.residueIndex[l.element]]),
     secondary_structure_key: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.key[l.unit.residueIndex[l.element]]),
     chem_comp_type: p(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.chemicalComponentMap.get(compId(l))!.type),

+ 1 - 1
src/mol-plugin/ui/sequence/polymer.ts

@@ -25,7 +25,7 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> {
     }
 
     residueLabel(seqIdx: number) {
-        return this.sequence.sequence[seqIdx]
+        return this.sequence.labels[seqIdx]
     }
     residueColor(seqIdx: number) {
         return this.missing.has(this.modelNum, this.asymId, this.seqId(seqIdx))

+ 2 - 2
src/mol-plugin/ui/sequence/residue.tsx

@@ -38,8 +38,8 @@ export class Residue extends PurePluginUIComponent<{ seqIdx: number, label: stri
     }
 
     get margin() {
-        return this.props.label.length > 1 && this.props.seqIdx
-            ? `0px 0px 0px 4px`
+        return this.props.label.length > 1
+            ? (this.props.seqIdx === 0 ? `0px 2px 0px 0px` : `0px 2px 0px 2px`)
             : undefined
     }
 

+ 6 - 1
src/mol-theme/label.ts

@@ -108,13 +108,18 @@ export function atomicElementLabel(location: StructureElement.Location<Unit.Atom
     const atom_id = Props.atom.label_atom_id(location)
     const alt_id = Props.atom.label_alt_id(location)
 
+    const microHetCompIds = Props.residue.microheterogeneityCompIds(location)
+    const compId = granularity === 'residue' && microHetCompIds.length > 1 ?
+        `(${microHetCompIds.join('|')})` : comp_id
+
+
     const label: string[] = []
 
     switch (granularity) {
         case 'element':
             label.push(`${atom_id}${alt_id ? `%${alt_id}` : ''}`)
         case 'residue':
-            label.push(`${comp_id} ${seq_id}`)
+            label.push(`${compId} ${seq_id}`)
         case 'chain':
             label.push(`Chain ${label_asym_id}:${auth_asym_id}`)
     }