Explorar el Código

sequence improvements, create sequence from coarse elements

Alexander Rose hace 5 años
padre
commit
3eec30aa42

+ 7 - 5
src/apps/structure-info/model.ts

@@ -15,6 +15,7 @@ import { OrderedSet } from '../../mol-data/int';
 import { openCif, downloadCif } from './helpers';
 import { Vec3 } from '../../mol-math/linear-algebra';
 import { trajectoryFromMmCIF } from '../../mol-model-formats/structure/mmcif';
+import { Sequence } from '../../mol-model/sequence';
 
 
 async function downloadFromPdb(pdb: string) {
@@ -110,9 +111,10 @@ export function printSequence(model: Model) {
     console.log('\nSequence\n=============');
     const { byEntityKey } = model.sequence;
     for (const key of Object.keys(byEntityKey)) {
-        const seq = byEntityKey[+key];
-        console.log(`${seq.entityId} (${seq.sequence.kind} ${seq.num.value(0)} (offset ${seq.sequence.offset}), ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`);
-        console.log(`${seq.sequence.sequence}`);
+        const { sequence, entityId } = byEntityKey[+key];
+        const { seqId, compId } = sequence
+        console.log(`${entityId} (${sequence.kind} ${seqId.value(0)} (offset ${sequence.offset}), ${seqId.value(seqId.rowCount - 1)}) (${compId.value(0)}, ${compId.value(compId.rowCount - 1)})`);
+        console.log(`${Sequence.getSequenceString(sequence)}`);
     }
     console.log();
 }
@@ -159,14 +161,14 @@ export function printUnits(structure: Structure) {
             console.log(`Coarse unit ${unit.id} ${unit.conformation.operator.name} (${Unit.isSpheres(l.unit) ? 'spheres' : 'gaussians'}): ${size} elements.`);
 
             const props = StructureProperties.coarse;
-            const seq = l.unit.model.sequence;
+            const modelSeq = l.unit.model.sequence;
 
             for (let j = 0, _j = Math.min(size, 3); j < _j; j++) {
                 l.element = OrderedSet.getAt(elements, j);
 
                 const residues: string[] = [];
                 const start = props.seq_id_begin(l), end = props.seq_id_end(l);
-                const compId = seq.byEntityKey[props.entityKey(l)].compId.value;
+                const compId = modelSeq.byEntityKey[props.entityKey(l)].sequence.compId.value;
                 for (let e = start; e <= end; e++) residues.push(compId(e));
                 console.log(`${props.asym_id(l)}:${start}-${end} (${residues.join('-')}) ${props.asym_id(l)} [${props.x(l).toFixed(2)}, ${props.y(l).toFixed(2)}, ${props.z(l).toFixed(2)}]`);
             }

+ 2 - 2
src/mol-model-formats/structure/mmcif/parser.ts

@@ -225,7 +225,7 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, sourceIn
         modelNum,
         entities,
         symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        sequence: getSequence(format.data, entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId),
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         coarseHierarchy: coarse.hierarchy,
@@ -262,7 +262,7 @@ function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatD
         modelNum: data.model_id,
         entities: data.entities,
         symmetry: getSymmetry(format),
-        sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId),
+        sequence: getSequence(format.data, data.entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId),
         atomicHierarchy: atomic.hierarchy,
         atomicConformation: atomic.conformation,
         coarseHierarchy: coarse.hierarchy,

+ 10 - 18
src/mol-model-formats/structure/mmcif/sequence.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import { mmCIF_Database as mmCIF } from '../../../mol-io/reader/cif/schema/mmcif'
@@ -10,19 +11,12 @@ import { Column } from '../../../mol-data/db';
 import { AtomicHierarchy } from '../../../mol-model/structure/model/properties/atomic';
 import { Entities } from '../../../mol-model/structure/model/properties/common';
 import { Sequence } from '../../../mol-model/sequence';
+import { CoarseHierarchy } from '../../../mol-model/structure/model/properties/coarse';
 
-// TODO how to handle microheterogeneity
-//    see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
-//
-// Data items in the ENTITY_POLY_SEQ category specify the sequence
-// of monomers in a polymer. Allowance is made for the possibility
-// of microheterogeneity in a sample by allowing a given sequence
-// number to be correlated with more than one monomer ID. The
-// corresponding ATOM_SITE entries should reflect this
-// heterogeneity.
-
-export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence {
-    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy, modResMap);
+export function getSequence(cif: mmCIF, entities: Entities, atomicHierarchy: AtomicHierarchy, coarseHierarchy: CoarseHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence {
+    if (!cif.entity_poly_seq._rowCount) {
+        return StructureSequence.fromHierarchy(entities, atomicHierarchy, coarseHierarchy, modResMap);
+    }
 
     const { entity_id, num, mon_id } = cif.entity_poly_seq;
 
@@ -37,15 +31,13 @@ export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHie
         i++;
 
         const id = entity_id.value(start);
-        const _compId = Column.window(mon_id, start, i);
-        const _num = Column.window(num, start, i);
+        const compId = Column.window(mon_id, start, i);
+        const seqId = Column.window(num, start, i);
         const entityKey = entities.getEntityIndex(id);
 
         byEntityKey[entityKey] = {
             entityId: id,
-            compId: _compId,
-            num: _num,
-            sequence: Sequence.ofResidueNames(_compId, _num, modResMap)
+            sequence: Sequence.ofResidueNames(compId, seqId, modResMap)
         };
 
         sequences.push(byEntityKey[entityKey]);

+ 89 - 26
src/mol-model/sequence/sequence.ts

@@ -23,9 +23,15 @@ namespace Sequence {
 
     export interface Base<K extends Kind, Alphabet extends string> {
         readonly kind: K,
+        readonly length: number,
         readonly offset: number,
-        readonly sequence: ArrayLike<Alphabet>
-        readonly labels: ArrayLike<string>
+
+        readonly code: Column<Alphabet>
+        readonly label: Column<string>
+
+        readonly seqId: Column<number>
+        readonly compId: Column<string>
+
         /** maps seqId to list of compIds */
         readonly microHet: ReadonlyMap<number, string[]>
     }
@@ -35,12 +41,14 @@ namespace Sequence {
     export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
     export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
 
-    export function create<K extends Kind, Alphabet extends string>(kind: K, sequence: Alphabet[], labels: string[], microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> {
-        return { kind: kind, sequence: sequence, labels, microHet, offset };
+    export function create<K extends Kind, Alphabet extends string>(kind: K, code: Column<Alphabet>, label: Column<string>, seqId: Column<number>, compId: Column<string>, microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> {
+        const length = code.rowCount
+        return { kind, code, label, seqId, compId, microHet, offset, length };
     }
 
     export function getSequenceString(seq: Sequence) {
-        return seq.sequence as string;
+        const array = seq.code.toArray()
+        return (array instanceof Array ? array : Array.from(array)).join('')
     }
 
     function determineKind(names: Column<string>) {
@@ -61,39 +69,46 @@ namespace Sequence {
         }
     }
 
-    export function ofResidueNames(residueName: Column<string>, seqId: Column<number>, modifiedMap?: ReadonlyMap<string, string>): Sequence {
+    export function ofResidueNames(compId: Column<string>, seqId: Column<number>, modifiedMap?: ReadonlyMap<string, string>): Sequence {
         if (seqId.rowCount === 0) throw new Error('cannot be empty');
 
-        const { kind, code } = determineKind(residueName);
+        const { kind, code } = determineKind(compId);
 
         if (!modifiedMap || modifiedMap.size === 0) {
-            return new Impl(kind, residueName, seqId, code) as Sequence;
+            return new ResidueNamesImpl(kind, compId, seqId, code) as Sequence;
         }
-        return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence;
+        return new ResidueNamesImpl(kind, compId, seqId, modCode(code, modifiedMap)) as Sequence;
     }
 
-    class Impl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
+    class ResidueNamesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
         private _offset = 0;
-        private _seq: ArrayLike<Alphabet> | undefined = void 0;
-        private _labels: ArrayLike<string> | undefined = void 0;
+        private _length = 0;
         private _microHet: ReadonlyMap<number, string[]> | undefined = void 0;
+        private _code: Column<Alphabet> | undefined = undefined
+        private _label: Column<string> | undefined = undefined
 
-        get offset() {
-            if (this._seq !== void 0) return this._offset;
+        get code(): Column<Alphabet> {
+            if (this._code !== void 0) return this._code;
             this.create();
-            return this._offset;
+            return this._code!;
         }
 
-        get sequence(): ArrayLike<Alphabet> {
-            if (this._seq !== void 0) return this._seq;
+        get label(): Column<string> {
+            if (this._label !== void 0) return this._label;
             this.create();
-            return this._seq!;
+            return this._label!;
         }
 
-        get labels(): ArrayLike<string> {
-            if (this._labels !== void 0) return this._labels;
+        get offset() {
+            if (this._code !== void 0) return this._offset;
             this.create();
-            return this._labels!;
+            return this._offset;
+        }
+
+        get length() {
+            if (this._code !== void 0) return this._length;
+            this.create();
+            return this._length;
         }
 
         get microHet(): ReadonlyMap<number, string[]> {
@@ -126,8 +141,8 @@ namespace Sequence {
             for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
                 const seqId = this.seqId.value(i)
                 const idx = seqId - minSeqId;
-                const name = this.residueName.value(i);
-                const code = this.code(name);
+                const name = this.compId.value(i);
+                const code = this.getCode(name);
                 // in case of MICROHETEROGENEITY `sequenceArray[idx]` may already be set
                 if (!sequenceArray[idx] || sequenceArray[idx] === '-') {
                     sequenceArray[idx] = code;
@@ -141,14 +156,62 @@ namespace Sequence {
                 if (compIds[i].length > 1) microHet.set(i, compIds[i])
             }
 
-            this._seq = sequenceArray.join('') as unknown as ArrayLike<Alphabet>;
-            this._labels = labels.map(l => l.length > 1 ? `(${l.join('|')})` : l.join(''));
+            this._code = Column.ofStringArray(sequenceArray) as Column<Alphabet>
+            this._label = Column.ofLambda({
+                value: i => {
+                    const l = labels[i]
+                    return l.length > 1 ? `(${l.join('|')})` : l.join('')
+                },
+                rowCount: labels.length,
+                schema: Column.Schema.str
+            })
             this._microHet = microHet
             this._offset = minSeqId - 1;
+            this._length = count
+        }
+
+        constructor(public kind: K, public compId: Column<string>, public seqId: Column<number>, private getCode: (name: string) => string) {
+
         }
+    }
+
+    export function ofSequenceRanges(seqIdBegin: Column<number>, seqIdEnd: Column<number>): Sequence {
+        const kind = Kind.Generic
+
+        return new SequenceRangesImpl(kind, seqIdBegin, seqIdEnd) as Sequence;
+    }
 
-        constructor(public kind: K, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
+    class SequenceRangesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
+        public offset: number
+        public length: number
+        public code: Column<Alphabet>
+        public label: Column<string>
+        public seqId: Column<number>
+        public compId: Column<string>
+        public microHet: ReadonlyMap<number, string[]>
+
+        constructor(public kind: K, private seqIdStart: Column<number>, private seqIdEnd: Column<number>) {
+            let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
+            for (let i = 0, _i = this.seqIdStart.rowCount; i < _i; i++) {
+                const idStart = this.seqIdStart.value(i);
+                const idEnd = this.seqIdEnd.value(i);
+                if (idStart < minSeqId) minSeqId = idStart;
+                if (maxSeqId < idEnd) maxSeqId = idEnd;
+            }
+
+            const count = maxSeqId - minSeqId + 1;
 
+            this.code = Column.ofConst('X', count, Column.Schema.str) as Column<Alphabet>
+            this.label = Column.ofConst('', count, Column.Schema.str)
+            this.seqId = Column.ofLambda({
+                value: row => row + minSeqId + 1,
+                rowCount: count,
+                schema: Column.Schema.int
+            })
+            this.compId = Column.ofConst('', count, Column.Schema.str)
+
+            this.offset = minSeqId - 1;
+            this.length = count
         }
     }
 }

+ 6 - 3
src/mol-model/structure/model/properties/coarse/hierarchy.ts

@@ -11,14 +11,17 @@ import { ElementIndex, ChainIndex, EntityIndex } from '../../indexing';
 import SortedRanges from '../../../../../mol-data/int/sorted-ranges';
 
 export interface CoarsedElementKeys {
-    // assign a key to each element
+    /** Assign a key to each element */
     chainKey: ArrayLike<ChainIndex>,
-    // assign a key to each element, index to the Model.entities.data table
+    /** Assign a key to each element, index to the Model.entities.data table */
     entityKey: ArrayLike<EntityIndex>,
 
-    /** find index of the residue/feature element where seq_id is included */
+    /** Find index of the residue/feature element where seq_id is included */
     findSequenceKey(entityId: string, asym_id: string, seq_id: number): ElementIndex
     findChainKey(entityId: string, asym_id: string): ChainIndex
+
+    /** Returns index or -1 if not present. */
+    getEntityFromChain(cI: ChainIndex): EntityIndex
 }
 
 export interface CoarseElementData {

+ 75 - 8
src/mol-model/structure/model/properties/sequence.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import { Column } from '../../../../mol-data/db'
@@ -9,6 +10,8 @@ import { AtomicHierarchy } from './atomic/hierarchy';
 import { Entities } from './common';
 import { Sequence } from '../../../sequence';
 import { ChainIndex } from '../indexing';
+import { CoarseHierarchy } from './coarse';
+import { CoarseElements } from './coarse/hierarchy';
 
 interface StructureSequence {
     readonly sequences: ReadonlyArray<StructureSequence.Entity>,
@@ -18,19 +21,39 @@ interface StructureSequence {
 namespace StructureSequence {
     export interface Entity {
         readonly entityId: string,
-        readonly num: Column<number>,
-        /** Corresponds to _entity_poly_seq.mon_id */
-        readonly compId: Column<string>,
         readonly sequence: Sequence
     }
 
+    function merge(...entitySeqs: StructureSequence[]): StructureSequence {
+        const sequences: StructureSequence.Entity[] = []
+        const byEntityKey: { [key: number]: StructureSequence.Entity } = {}
+
+        for (let i = 0, il = entitySeqs.length; i < il; ++i) {
+            sequences.push(...entitySeqs[i].sequences)
+            Object.assign(byEntityKey, entitySeqs[i].byEntityKey)
+        }
+        return { sequences, byEntityKey }
+    }
+
+    export function fromHierarchy(entities: Entities, atomicHierarchy: AtomicHierarchy, coarseHierarchy: CoarseHierarchy, modResMap?: ReadonlyMap<string, string>): StructureSequence {
+        const atomic = fromAtomicHierarchy(entities, atomicHierarchy, modResMap)
+        const coarse = fromCoarseHierarchy(entities, coarseHierarchy)
+        return merge(atomic, coarse)
+    }
+
     export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy, modResMap?: ReadonlyMap<string, string>): StructureSequence {
         const { label_comp_id, label_seq_id } = hierarchy.residues
         const { chainAtomSegments, residueAtomSegments } = hierarchy
+        const { count, offsets } = chainAtomSegments
 
         const byEntityKey: StructureSequence['byEntityKey'] = { };
         const sequences: StructureSequence.Entity[] = [];
 
+        // check if chain segments are empty
+        if (count === 1 && offsets[0] === 0 && offsets[1] === 0) {
+            return { byEntityKey, sequences };
+        }
+
         for (let cI = 0 as ChainIndex, _cI = hierarchy.chains._rowCount; cI < _cI; cI++) {
             const entityKey = hierarchy.index.getEntityFromChain(cI);
             // Only for polymers, trying to mirror _entity_poly_seq
@@ -43,16 +66,14 @@ namespace StructureSequence {
             }
             cI--;
 
-            const rStart = residueAtomSegments.index[chainAtomSegments.offsets[start]];
-            const rEnd = residueAtomSegments.index[chainAtomSegments.offsets[cI + 1]];
+            const rStart = residueAtomSegments.index[offsets[start]];
+            const rEnd = residueAtomSegments.index[offsets[cI + 1]];
 
             const compId = Column.window(label_comp_id, rStart, rEnd);
             const num = Column.window(label_seq_id, rStart, rEnd);
 
             byEntityKey[entityKey] = {
                 entityId: entities.data.id.value(entityKey),
-                compId,
-                num,
                 sequence: Sequence.ofResidueNames(compId, num, modResMap)
             };
 
@@ -61,6 +82,52 @@ namespace StructureSequence {
 
         return { byEntityKey, sequences };
     }
+
+    export function fromCoarseHierarchy(entities: Entities, hierarchy: CoarseHierarchy): StructureSequence {
+        const spheres = fromCoarseElements(entities, hierarchy.spheres)
+        const gaussians = fromCoarseElements(entities, hierarchy.gaussians)
+        return merge(spheres, gaussians)
+    }
+
+    export function fromCoarseElements(entities: Entities, elements: CoarseElements): StructureSequence {
+        const { chainElementSegments, seq_id_begin, seq_id_end } = elements
+        const { count, offsets } = chainElementSegments
+
+        const byEntityKey: StructureSequence['byEntityKey'] = { };
+        const sequences: StructureSequence.Entity[] = [];
+
+        // check if chain segments are empty
+        if (count === 1 && offsets[0] === 0 && offsets[1] === 0) {
+            return { byEntityKey, sequences };
+        }
+
+        for (let cI = 0 as ChainIndex, _cI = count; cI < _cI; cI++) {
+            const eK = elements.getEntityFromChain(cI);
+            if (byEntityKey[eK] !== void 0) continue;
+
+            let start = cI;
+            cI++;
+            while (cI < _cI && eK === elements.getEntityFromChain(cI)) {
+                cI++;
+            }
+            cI--;
+
+            const eStart = offsets[start];
+            const eEnd = offsets[cI + 1];
+
+            const seqIdBegin = Column.window(seq_id_begin, eStart, eEnd);
+            const seqIdEnd = Column.window(seq_id_end, eStart, eEnd);
+
+            byEntityKey[eK] = {
+                entityId: entities.data.id.value(eK),
+                sequence: Sequence.ofSequenceRanges(seqIdBegin, seqIdEnd)
+            };
+
+            sequences.push(byEntityKey[eK]);
+        }
+
+        return { byEntityKey, sequences };
+    }
 }
 
 export default StructureSequence

+ 29 - 13
src/mol-model/structure/model/properties/utils/coarse-keys.ts

@@ -8,6 +8,8 @@
 import { Entities } from '../common';
 import { CoarseElementData, CoarsedElementKeys } from '../coarse';
 import { ChainIndex, ElementIndex, EntityIndex } from '../../indexing';
+import SortedRanges from '../../../../../mol-data/int/sorted-ranges';
+import { OrderedSet } from '../../../../../mol-data/int';
 
 function getElementKey(map: Map<string, number>, key: string, counter: { index: number }) {
     if (map.has(key)) return map.get(key)!;
@@ -23,7 +25,7 @@ function getElementSubstructureKeyMap(map: Map<number, Map<string, number>>, key
     return ret;
 }
 
-function createLookUp(entities: Entities, chain: Map<number, Map<string, number>>, seq: Map<number, Map<number, number>>) {
+function createLookUp(entities: Entities, chain: Map<number, Map<string, number>>, seq: Map<number, SeqMap>) {
     const getEntKey = entities.getEntityIndex;
     const findChainKey: CoarsedElementKeys['findChainKey'] = (e, c) => {
         const eKey = getEntKey(e);
@@ -32,7 +34,6 @@ function createLookUp(entities: Entities, chain: Map<number, Map<string, number>
         if (!cm.has(c)) return -1 as ChainIndex;
         return cm.get(c)! as ChainIndex;
     }
-    // TODO consider implementing as binary search
     const findSequenceKey: CoarsedElementKeys['findSequenceKey'] = (e, c, s) => {
         const eKey = getEntKey(e);
         if (eKey < 0) return -1 as ElementIndex;
@@ -41,8 +42,9 @@ function createLookUp(entities: Entities, chain: Map<number, Map<string, number>
         const cKey = cm.get(c)
         if (cKey === undefined) return -1 as ElementIndex
         const sm = seq.get(cKey)!
-        if (!sm.has(s)) return -1 as ElementIndex;
-        return sm.get(s)! as ElementIndex
+        const { elementIndices, seqRanges } = sm
+        const idx = SortedRanges.firstIntersectionIndex(seqRanges, OrderedSet.ofSingleton(s))
+        return (idx !== -1 ? elementIndices[idx] : -1)  as ElementIndex
     }
     return { findChainKey, findSequenceKey };
 }
@@ -51,39 +53,53 @@ function missingEntity(k: string) {
     throw new Error(`Missing entity entry for entity id '${k}'.`);
 }
 
+type SeqMap = { elementIndices: number[], seqRanges: SortedRanges }
+
 export function getCoarseKeys(data: CoarseElementData, entities: Entities): CoarsedElementKeys {
     const { entity_id, asym_id, seq_id_begin, seq_id_end, count, chainElementSegments } = data;
 
-    const seqMaps = new Map<number, Map<number, number>>();
+    const seqMaps = new Map<number, SeqMap>();
     const chainMaps = new Map<number, Map<string, number>>(), chainCounter = { index: 0 };
 
     const chainKey = new Int32Array(count) as any as ChainIndex[];
     const entityKey = new Int32Array(count) as any as EntityIndex[];
 
+    const chainToEntity = new Int32Array(chainElementSegments.count) as any as EntityIndex[];
+
     for (let i = 0; i < count; i++) {
         entityKey[i] = entities.getEntityIndex(entity_id.value(i));
         if (entityKey[i] < 0) missingEntity(entity_id.value(i));
     }
 
     for (let cI = 0; cI < chainElementSegments.count; cI++) {
-        const start = chainElementSegments.offsets[cI], end = chainElementSegments.offsets[cI + 1];
-        const map = getElementSubstructureKeyMap(chainMaps, entityKey[start]);
+        const start = chainElementSegments.offsets[cI]
+        const end = chainElementSegments.offsets[cI + 1];
+        const eK = entityKey[start]
+
+        chainToEntity[cI] = eK
+
+        const map = getElementSubstructureKeyMap(chainMaps, eK);
         const key = getElementKey(map, asym_id.value(start), chainCounter) as ChainIndex;
         for (let i = start; i < end; i++) chainKey[i] = key;
 
         // create seq_id map for the ranges defined by seq_id_begin and seq_id_end
-        const seqMap: Map<number, number> = new Map()
-        seqMaps.set(key, seqMap)
+        const elementIndices: number[] = []
+        const seqRanges: number[] = []
         for (let i = start; i < end; i++) {
             const seqStart = seq_id_begin.value(i)
             const seqEnd = seq_id_end.value(i)
-            for (let j = seqStart; j <= seqEnd; j++) {
-                seqMap.set(j, i)
-            }
+            elementIndices.push(i)
+            seqRanges.push(seqStart, seqEnd)
         }
+        const seqMap = { elementIndices, seqRanges: SortedRanges.ofSortedRanges(seqRanges) }
+        seqMaps.set(key, seqMap)
     }
 
     const { findChainKey, findSequenceKey } = createLookUp(entities, chainMaps, seqMaps);
 
-    return { chainKey, entityKey, findSequenceKey, findChainKey };
+    const getEntityFromChain: CoarsedElementKeys['getEntityFromChain'] = c => {
+        return chainToEntity[c]
+    }
+
+    return { chainKey, entityKey, findSequenceKey, findChainKey, getEntityFromChain };
 }

+ 1 - 1
src/mol-model/structure/util.ts

@@ -12,7 +12,7 @@ import Matrix from '../../mol-math/linear-algebra/matrix/matrix';
 
 export function getCoarseBegCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIndex) {
     const entityKey = unit.coarseElements.entityKey[element]
-    const seq = unit.model.sequence.byEntityKey[entityKey]
+    const seq = unit.model.sequence.byEntityKey[entityKey].sequence
     const seq_id_begin = unit.coarseElements.seq_id_begin.value(element)
     return seq.compId.value(seq_id_begin - 1) // 1-indexed
 }

+ 16 - 7
src/mol-plugin/ui/sequence.tsx

@@ -19,6 +19,8 @@ import { ParamDefinition as PD } from '../../mol-util/param-definition';
 import { HeteroSequenceWrapper } from './sequence/hetero';
 import { State, StateSelection } from '../../mol-state';
 
+const MaxDisplaySequenceLength = 10000
+
 function opKey(l: StructureElement.Location) {
     const ids = SP.unit.pdbx_struct_oper_list_ids(l)
     const ncs = SP.unit.struct_ncs_oper_id(l)
@@ -32,7 +34,7 @@ function splitModelEntityId(modelEntityId: string) {
     return [ parseInt(modelIdx), entityId ]
 }
 
-function getSequenceWrapper(state: SequenceViewState, structureSelection: StructureElementSelectionManager): SequenceWrapper.Any | undefined {
+function getSequenceWrapper(state: SequenceViewState, structureSelection: StructureElementSelectionManager): SequenceWrapper.Any | string {
     const { structure, modelEntityId, invariantUnitId, operatorKey } = state
     const l = StructureElement.Location.create()
     const [ modelIdx, entityId ] = splitModelEntityId(modelEntityId)
@@ -43,11 +45,21 @@ function getSequenceWrapper(state: SequenceViewState, structureSelection: Struct
         if (unit.invariantId !== invariantUnitId) continue
         if (opKey(l) !== operatorKey) continue
 
+        if (unit.polymerElements.length) {
+            const l = StructureElement.Location.create(unit, unit.elements[0])
+            const entitySeq = unit.model.sequence.byEntityKey[SP.entity.key(l)]
+            // check if entity sequence is available
+            if (!entitySeq) return 'No sequence available'
+            // check if sequence is too long
+            if (entitySeq.sequence.length > MaxDisplaySequenceLength) return 'Sequence too long'
+        }
+
         const Wrapper = unit.polymerElements.length ? PolymerSequenceWrapper : HeteroSequenceWrapper
         const sw = new Wrapper({ structure, unit })
         sw.markResidue(structureSelection.get(structure), MarkerAction.Select)
         return sw
     }
+    return 'No sequence available'
 }
 
 function getModelEntityOptions(structure: Structure) {
@@ -266,12 +278,9 @@ export class SequenceView extends PluginUIComponent<{ }, SequenceViewState> {
                 <ParameterControls params={this.params} values={this.values} onChange={this.setParamProps} />
             </div>
 
-            {sequenceWrapper !== undefined
-                ? (sequenceWrapper.length <= 10000
-                    ? <Sequence sequenceWrapper={sequenceWrapper} />
-                    : <div className='msp-sequence-wrapper'>Sequence too long</div>
-                )
-                : <div className='msp-sequence-wrapper'>No sequence available</div>}
+            {typeof sequenceWrapper === 'string'
+                ? <div className='msp-sequence-wrapper'>{sequenceWrapper}</div>
+                : <Sequence sequenceWrapper={sequenceWrapper} />}
         </div>;
     }
 }

+ 7 - 6
src/mol-plugin/ui/sequence/polymer.ts

@@ -20,12 +20,12 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> {
     private readonly modelNum: number
     private readonly asymId: string
 
-    seqId(seqIdx: number) {
-        return this.sequence.offset + seqIdx + 1
+    private seqId(seqIdx: number) {
+        return this.sequence.seqId.value(seqIdx)
     }
 
     residueLabel(seqIdx: number) {
-        return this.sequence.labels[seqIdx]
+        return this.sequence.label.value(seqIdx)
     }
     residueColor(seqIdx: number) {
         return this.missing.has(this.modelNum, this.asymId, this.seqId(seqIdx))
@@ -63,13 +63,14 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> {
 
     constructor(data: StructureUnit) {
         const l = StructureElement.Location.create(data.unit, data.unit.elements[0])
-        const sequence = data.unit.model.sequence.byEntityKey[SP.entity.key(l)].sequence
-        const length = sequence.sequence.length
+        const entitySeq = data.unit.model.sequence.byEntityKey[SP.entity.key(l)]
+
+        const length = entitySeq.sequence.length
         const markerArray = new Uint8Array(length)
 
         super(data, markerArray, length)
 
-        this.sequence = sequence
+        this.sequence = entitySeq.sequence
         this.missing = data.unit.model.properties.missingResidues
 
         this.modelNum = data.unit.model.modelNum

+ 1 - 1
src/mol-theme/color/entity-source.ts

@@ -58,7 +58,7 @@ function addSrc(seqToSrcByModelEntity: Map<string, Int16Array>, srcKeySerialMap:
         if (!seqToSrcByModelEntity.has(mK)) {
             const entityIndex = model.entities.getEntityIndex(entityId)
             const seq = model.sequence.sequences[entityIndex].sequence
-            seqToSrc = new Int16Array(seq.sequence.length)
+            seqToSrc = new Int16Array(seq.length)
             seqToSrcByModelEntity.set(mK, seqToSrc)
         } else {
             seqToSrc = seqToSrcByModelEntity.get(mK)!

+ 1 - 1
src/mol-theme/color/hydrophobicity.ts

@@ -44,7 +44,7 @@ function getCoarseCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIn
     if (seqIdBegin === seqIdEnd) {
         const { modifiedResidues } = unit.model.properties
         const entityKey = unit.coarseElements.entityKey[element]
-        const seq = unit.model.sequence.byEntityKey[entityKey]
+        const seq = unit.model.sequence.byEntityKey[entityKey].sequence
         let compId = seq.compId.value(seqIdBegin - 1) // 1-indexed
         const parentId = modifiedResidues.parentId.get(compId)
         return parentId === undefined ? compId : parentId

+ 1 - 1
src/mol-theme/color/residue-name.ts

@@ -86,7 +86,7 @@ function getCoarseCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIn
     if (seqIdBegin === seqIdEnd) {
         const { modifiedResidues } = unit.model.properties
         const entityKey = unit.coarseElements.entityKey[element]
-        const seq = unit.model.sequence.byEntityKey[entityKey]
+        const seq = unit.model.sequence.byEntityKey[entityKey].sequence
         let compId = seq.compId.value(seqIdBegin - 1) // 1-indexed
         const parentId = modifiedResidues.parentId.get(compId)
         return parentId === undefined ? compId : parentId

+ 1 - 1
src/mol-theme/color/sequence-id.ts

@@ -61,7 +61,7 @@ function getSequenceLength(unit: Unit, element: ElementIndex) {
     if (entityId === '') return 0
     const entityIndex = model.entities.getEntityIndex(entityId)
     if (entityIndex === -1) return 0
-    return model.sequence.byEntityKey[entityIndex].sequence.sequence.length
+    return model.sequence.byEntityKey[entityIndex].sequence.length
 }
 
 export function SequenceIdColorTheme(ctx: ThemeDataContext, props: PD.Values<SequenceIdColorThemeParams>): ColorTheme<SequenceIdColorThemeParams> {

+ 18 - 8
src/mol-theme/label.ts

@@ -129,16 +129,26 @@ export function atomicElementLabel(location: StructureElement.Location<Unit.Atom
 }
 
 export function coarseElementLabel(location: StructureElement.Location<Unit.Spheres | Unit.Gaussians>, granularity: LabelGranularity) {
-    // TODO handle granularity
     const asym_id = Props.coarse.asym_id(location)
     const seq_id_begin = Props.coarse.seq_id_begin(location)
     const seq_id_end = Props.coarse.seq_id_end(location)
-    if (seq_id_begin === seq_id_end) {
-        const entityIndex = Props.coarse.entityKey(location)
-        const seq = location.unit.model.sequence.byEntityKey[entityIndex]
-        const comp_id = seq.compId.value(seq_id_begin - 1) // 1-indexed
-        return `${comp_id} ${seq_id_begin}:${asym_id}`
-    } else {
-        return `${seq_id_begin}-${seq_id_end}:${asym_id}`
+
+    const label: string[] = []
+
+    switch (granularity) {
+        case 'element':
+        case 'residue':
+            if (seq_id_begin === seq_id_end) {
+                const entityIndex = Props.coarse.entityKey(location)
+                const seq = location.unit.model.sequence.byEntityKey[entityIndex]
+                const comp_id = seq.sequence.compId.value(seq_id_begin - 1) // 1-indexed
+                label.push(`${comp_id} ${seq_id_begin}-${seq_id_end}`)
+            } else {
+                label.push(`${seq_id_begin}-${seq_id_end}`)
+            }
+        case 'chain':
+            label.push(`Chain ${asym_id}`)
     }
+
+    return label.reverse().join(' | ')
 }