Browse Source

Parse sec struct from mmCIF

David Sehnal 7 years ago
parent
commit
59f5f0caec

+ 33 - 1
src/apps/structure-info/model.ts

@@ -16,6 +16,8 @@ import { OrderedSet } from 'mol-data/int';
 import { Table } from 'mol-data/db';
 import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
 import { openCif, downloadCif } from './helpers';
+import { BitFlags } from 'mol-util';
+import { SecondaryStructureType } from 'mol-model/structure/model/types';
 
 
 async function downloadFromPdb(pdb: string) {
@@ -39,6 +41,35 @@ export function atomLabel(model: Model, aI: number) {
     return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)} ${label_atom_id.value(aI)}`
 }
 
+export function residueLabel(model: Model, rI: number) {
+    const { residues, chains, residueSegments, chainSegments } = model.atomicHierarchy
+    const { label_comp_id, label_seq_id } = residues
+    const { label_asym_id } = chains
+    const cI = chainSegments.segmentMap[residueSegments.segments[rI]]
+    return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)}`
+}
+
+export function printSecStructure(model: Model) {
+    console.log('Secondary Structure\n=============');
+    const { residues } = model.atomicHierarchy;
+    const { type, key } = model.properties.secondaryStructure;
+
+    const count = residues._rowCount;
+    let rI = 0;
+    while (rI < count) {
+        let start = rI;
+        while (rI < count && key[start] === key[rI]) rI++;
+        rI--;
+
+        if (BitFlags.has(type[start], SecondaryStructureType.Flag.Beta)) {
+            console.log(`Sheet: ${residueLabel(model, start)} - ${residueLabel(model, rI)} (key ${key[start]})`);
+        } else if (BitFlags.has(type[start], SecondaryStructureType.Flag.Helix)) {
+            console.log(`Helix: ${residueLabel(model, start)} - ${residueLabel(model, rI)} (key ${key[start]})`);
+        }
+
+        rI++;
+    }
+}
 
 export function printBonds(structure: Structure) {
     for (const unit of structure.units) {
@@ -122,7 +153,8 @@ async function run(mmcif: mmCIF_Database) {
     printSequence(models[0]);
     printIHMModels(models[0]);
     printUnits(structure);
-    printBonds(structure);
+    // printBonds(structure);
+    printSecStructure(models[0]);
 }
 
 async function runDL(pdb: string) {

+ 4 - 0
src/mol-model/structure/model/formats/mmcif.ts

@@ -22,6 +22,7 @@ import { getSequence } from './mmcif/sequence';
 
 import mmCIF_Format = Format.mmCIF
 import { Task } from 'mol-task';
+import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
 
 function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
     const num = data.atom_site.pdbx_PDB_model_num;
@@ -151,6 +152,9 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model):
         atomicConformation: getConformation(format, bounds),
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
+        properties: {
+            secondaryStructure: getSecondaryStructureMmCif(format.data, atomicHierarchy)
+        },
         symmetry: getSymmetry(format)
     };
 }

+ 154 - 0
src/mol-model/structure/model/formats/mmcif/secondary-structure.ts

@@ -0,0 +1,154 @@
+
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database as mmCIF, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'
+import { SecondaryStructureType } from '../../types';
+import { AtomicHierarchy } from '../../properties/atomic';
+import { SecondaryStructure } from '../../properties/seconday-structure';
+import { Column } from 'mol-data/db';
+
+export function getSecondaryStructureMmCif(data: mmCIF_Database, hierarchy: AtomicHierarchy): SecondaryStructure {
+    const map: SecondaryStructureMap = new Map();
+    addHelices(data.struct_conf, map);
+    // must add Helices 1st because of 'key' value assignment.
+    addSheets(data.struct_sheet_range, map, data.struct_conf._rowCount);
+
+    const secStruct: SecondaryStructureData = {
+        type: new Int32Array(hierarchy.residues._rowCount) as any,
+        key: new Int32Array(hierarchy.residues._rowCount) as any
+    };
+
+    if (map.size > 0) assignSecondaryStructureRanges(hierarchy, map, secStruct);
+    return secStruct;
+}
+
+type SecondaryStructureEntry = {
+    startSeqNumber: number,
+    startInsCode: string | null,
+    endSeqNumber: number,
+    endInsCode: string | null,
+    type: SecondaryStructureType,
+    key: number
+}
+type SecondaryStructureMap = Map<string, Map<number, SecondaryStructureEntry>>
+type SecondaryStructureData = { type: SecondaryStructureType[], key: number[] }
+
+function addHelices(cat: mmCIF['struct_conf'], map: SecondaryStructureMap) {
+    if (!cat._rowCount) return;
+
+    const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat;
+    const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat;
+    const { pdbx_PDB_helix_class, conf_type_id } = cat;
+
+    for (let i = 0, _i = cat._rowCount; i < _i; i++) {
+        const type =  pdbx_PDB_helix_class.valueKind(i) === Column.ValueKind.Present
+            ? SecondaryStructureType.SecondaryStructurePdb[pdbx_PDB_helix_class.value(i)]
+            : conf_type_id.valueKind(i) === Column.ValueKind.Present
+            ? SecondaryStructureType.SecondaryStructureMmcif[conf_type_id.value(i)]
+            : SecondaryStructureType.Flag.NA
+
+        const entry: SecondaryStructureEntry = {
+            startSeqNumber: beg_label_seq_id.value(i),
+            startInsCode: pdbx_beg_PDB_ins_code.value(i),
+            endSeqNumber: end_label_seq_id.value(i),
+            endInsCode: pdbx_end_PDB_ins_code.value(i),
+            type: SecondaryStructureType.create(type),
+            key: i + 1
+        };
+
+        const asymId = beg_label_asym_id.value(i)!;
+        if (map.has(asymId)) {
+            map.get(asymId)!.set(entry.startSeqNumber, entry);
+        } else {
+            map.set(asymId, new Map([[entry.startSeqNumber, entry]]));
+        }
+    }
+}
+
+function addSheets(cat: mmCIF['struct_sheet_range'], map: SecondaryStructureMap, sheetCount: number) {
+    if (!cat._rowCount) return;
+
+    const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat;
+    const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat;
+    const { sheet_id } = cat;
+
+    const sheet_id_key = new Map<string, number>();
+    let currentKey = sheetCount + 1;
+
+    for (let i = 0, _i = cat._rowCount; i < _i; i++) {
+        const id = sheet_id.value(i);
+        let key: number;
+        if (sheet_id_key.has(id)) key = sheet_id_key.get(id)!;
+        else {
+            key = currentKey++;
+            sheet_id_key.set(id, key);
+        }
+
+        const entry: SecondaryStructureEntry = {
+            startSeqNumber: beg_label_seq_id.value(i),
+            startInsCode: pdbx_beg_PDB_ins_code.value(i),
+            endSeqNumber: end_label_seq_id.value(i),
+            endInsCode: pdbx_end_PDB_ins_code.value(i),
+            type: SecondaryStructureType.create(SecondaryStructureType.Flag.Beta | SecondaryStructureType.Flag.BetaSheet),
+            key
+        };
+
+        const asymId = beg_label_asym_id.value(i)!;
+        if (map.has(asymId)) {
+            map.get(asymId)!.set(entry.startSeqNumber, entry);
+        } else {
+            map.set(asymId, new Map([[entry.startSeqNumber, entry]]));
+        }
+    }
+
+    return;
+}
+
+function assignSecondaryStructureEntry(hierarchy: AtomicHierarchy, entry: SecondaryStructureEntry, resStart: number, resEnd: number, data: SecondaryStructureData) {
+    const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues;
+    const { endSeqNumber, endInsCode, type, key } = entry;
+
+    let rI = resStart;
+    while (rI < resEnd) {
+        const seqNumber = label_seq_id.value(rI);
+
+        data.type[rI] = type;
+        data.key[rI] = key;
+
+        if ((seqNumber > endSeqNumber) ||
+            (seqNumber === endSeqNumber && pdbx_PDB_ins_code.value(rI) === endInsCode)) {
+            break;
+        }
+
+        rI++;
+    }
+}
+
+function assignSecondaryStructureRanges(hierarchy: AtomicHierarchy, map: SecondaryStructureMap, data: SecondaryStructureData) {
+    const { segments: chainSegments, count: chainCount } = hierarchy.chainSegments;
+    const { label_asym_id } = hierarchy.chains;
+    const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues;
+
+    for (let cI = 0; cI < chainCount; cI++) {
+        const resStart = chainSegments[cI], resEnd = chainSegments[cI + 1];
+        const asymId = label_asym_id.value(cI);
+
+        if (map.has(asymId)) {
+            const entries = map.get(asymId)!;
+
+            for (let rI = resStart; rI < resEnd; rI++) {
+                const seqNumber = label_seq_id.value(rI);
+                if (entries.has(seqNumber)) {
+                    const entry = entries.get(seqNumber)!;
+                    const insCode = pdbx_PDB_ins_code.value(rI);
+                    if (entry.startInsCode !== insCode) continue;
+                    assignSecondaryStructureEntry(hierarchy, entry, rI, resEnd, data);
+                }
+            }
+        }
+    }
+}

+ 3 - 0
src/mol-model/structure/model/model.ts

@@ -11,6 +11,7 @@ import { AtomicHierarchy, AtomicConformation } from './properties/atomic'
 import { ModelSymmetry } from './properties/symmetry'
 import { CoarseHierarchy, CoarseConformation } from './properties/coarse'
 import { Entities } from './properties/common';
+import { SecondaryStructure } from './properties/seconday-structure';
 
 //import from_gro from './formats/gro'
 import from_mmCIF from './formats/mmcif'
@@ -34,6 +35,8 @@ interface Model extends Readonly<{
     atomicHierarchy: AtomicHierarchy,
     atomicConformation: AtomicConformation,
 
+    properties: { secondaryStructure: SecondaryStructure },
+
     coarseHierarchy: CoarseHierarchy,
     coarseConformation: CoarseConformation
 }> {

+ 9 - 6
src/mol-model/structure/model/properties/seconday-structure.ts

@@ -4,11 +4,14 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
+import { SecondaryStructureType } from '../types';
+
 /** Secondary structure "indexed" by residues. */
-export interface SecondaryStructure {
-    type: number[],
-    index: number[],
-    flags: number[],
+interface SecondaryStructure {
+    // assign flags to each residue
+    readonly type: ArrayLike<SecondaryStructureType>,
     /** unique value for each "element". This is because single sheet is speficied by multiple records. */
-    key: number[]
-}
+    readonly key: ArrayLike<number>
+}
+
+export { SecondaryStructure }

+ 1 - 0
src/mol-model/structure/model/types.ts

@@ -94,6 +94,7 @@ export namespace SecondaryStructureType {
     export const Turn = ['s', 't', 'l', '']
 
     export const is: (ss: SecondaryStructureType, f: Flag) => boolean = BitFlags.has
+    export const create: (fs: Flag) => SecondaryStructureType = BitFlags.create
 
     export const enum Flag {
         None = 0x0,

+ 5 - 1
src/mol-model/structure/query/properties.ts

@@ -52,7 +52,11 @@ const residue = {
     auth_comp_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_comp_id.value(l.unit.residueIndex[l.element])),
     label_seq_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.label_seq_id.value(l.unit.residueIndex[l.element])),
     auth_seq_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.element])),
-    pdbx_PDB_ins_code: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.element]))
+    pdbx_PDB_ins_code: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.element])),
+
+    // Properties
+    secondary_structure_type: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.type[l.unit.residueIndex[l.element]]),
+    secondary_structure_key: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.key[l.unit.residueIndex[l.element]]),
 }
 
 const chain = {

+ 1 - 1
src/mol-util/bit-flags.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-interface BitFlags<Flags> { '@type': Flags }
+interface BitFlags<Flags> extends Number { '@type': Flags }
 
 namespace BitFlags {
     export function create<F>(flags: F): BitFlags<F> { return flags as any; }