Browse Source

wip, moleculeType and chemCompMap usage

Alexander Rose 6 years ago
parent
commit
b674b102a5

+ 1 - 1
src/mol-model-formats/structure/mmcif/atomic.ts

@@ -101,7 +101,7 @@ export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_sit
 
     const index = getAtomicIndex(hierarchyData, entities, hierarchySegments);
     const derived = getAtomicDerivedData(hierarchyData, index, formatData.chemicalComponentMap);
-    const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, formatData.chemicalComponentMap);
+    const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, derived.residue.moleculeType);
     const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchySegments, ...hierarchyRanges, index, derived };
     return { sameAsPrevious: false, hierarchy, conformation };
 }

+ 19 - 7
src/mol-model-formats/structure/mmcif/parser.ts

@@ -23,11 +23,12 @@ import { getSecondaryStructureMmCif } from './secondary-structure';
 import { getSequence } from './sequence';
 import { sortAtomSite } from './sort';
 import { StructConn } from './bonds/struct_conn';
-import { ChemicalComponent, ChemicalComponentMap, CommonChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component';
+import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component';
 import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types';
 import { ModelFormat } from '../format';
 import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
 import mmCIF_Format = ModelFormat.mmCIF
+import { memoize1 } from 'mol-util/memoize';
 
 export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) {
     const formatData = getFormatData(format)
@@ -78,6 +79,7 @@ function getNcsOperators(format: mmCIF_Format) {
     }
     return opers;
 }
+
 function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] {
     const data = format.data.pdbx_struct_mod_residue;
     const parentId = new Map<string, string>();
@@ -95,9 +97,9 @@ function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['m
 }
 
 function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
+    const map = new Map<string, ChemicalComponent>();
     const { chem_comp } = format.data
     if (chem_comp._rowCount > 0) {
-        const map = new Map<string, ChemicalComponent>();
         const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp
         for (let i = 0, il = id.rowCount; i < il; ++i) {
             const _id = id.value(i)
@@ -113,10 +115,8 @@ function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
             }
             map.set(_id, cc)
         }
-        return map
-    } else {
-        return CommonChemicalComponentMap
     }
+    return map
 }
 
 function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
@@ -147,12 +147,24 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap
             }
         }
     } else {
-        // TODO check if present in format.data.atom_site.label_comp_id
-        SaccharideCompIdMap.forEach((v, k) => map.set(k, v))
+        const uniqueNames = getUniqueComponentNames(format)
+        SaccharideCompIdMap.forEach((v, k) => {
+            if (uniqueNames.has(k)) map.set(k, v)
+        })
     }
     return map
 }
 
+const getUniqueComponentNames = memoize1((format: mmCIF_Format) => {
+    const uniqueNames = new Set<string>()
+    const data = format.data.atom_site
+    const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
+    for (let i = 0, il = comp_id.rowCount; i < il; ++i) {
+        uniqueNames.add(comp_id.value(i))
+    }
+    return uniqueNames
+})
+
 export interface FormatData {
     modifiedResidues: Model['properties']['modifiedResidues']
     chemicalComponentMap: Model['properties']['chemicalComponentMap']

+ 1 - 206
src/mol-model/structure/model/properties/chemical-component.ts

@@ -16,209 +16,4 @@ export interface ChemicalComponent {
     formulaWeight: number
 }
 
-export type ChemicalComponentMap = ReadonlyMap<string, ChemicalComponent>
-
-const CommonChemicalComponents: ChemicalComponent[] = [
-    {
-        id: 'ALA',
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'ALANINE',
-        synonyms: [],
-        formula: 'C3 H7 N O2',
-        formulaWeight: 89.093
-    },
-    { 
-        id: 'ARG',
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'ARGININE',
-        synonyms: [],
-        formula: 'C6 H15 N4 O2 1',
-        formulaWeight: 175.209
-    },
-    { 
-        id: 'ASN', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'ASPARAGINE',
-        synonyms: [],
-        formula: 'C4 H8 N2 O3',
-        formulaWeight: 132.118
-    },
-    { 
-        id: 'ASP', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'ASPARTIC ACID',
-        synonyms: [],
-        formula: 'C4 H7 N O4',
-        formulaWeight: 133.103
-    },
-    { 
-        id: 'CYS', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'CYSTEINE',
-        synonyms: [],
-        formula: 'C3 H7 N O2 S',
-        formulaWeight: 121.158
-    },
-    { 
-        id: 'GLN', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'GLUTAMINE',
-        synonyms: [],
-        formula: 'C5 H10 N2 O3',
-        formulaWeight: 146.144
-    },
-    { 
-        id: 'GLU', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'GLUTAMIC ACID',
-        synonyms: [],
-        formula: 'C5 H9 N O4',
-        formulaWeight: 147.129
-    },
-    { 
-        id: 'GLY', 
-        type: ComponentType['peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'GLYCINE',
-        synonyms: [],
-        formula: 'C2 H5 N O2',
-        formulaWeight: 75.067
-    },
-    { 
-        id: 'HIS', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'HISTIDINE',
-        synonyms: [],
-        formula: 'C6 H10 N3 O2 1',
-        formulaWeight: 156.162
-    },
-    { 
-        id: 'ILE', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'ISOLEUCINE',
-        synonyms: [],
-        formula: 'C6 H13 N O2',
-        formulaWeight: 131.173
-    },
-    { 
-        id: 'LEU', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'LEUCINE',
-        synonyms: [],
-        formula: 'C6 H13 N O2',
-        formulaWeight: 131.173
-    },
-    { 
-        id: 'LYS', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'LYSINE',
-        synonyms: [],
-        formula: 'C6 H15 N2 O2 1',
-        formulaWeight: 147.195
-    },
-    { 
-        id: 'MET', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'METHIONINE',
-        synonyms: [],
-        formula: 'C5 H11 N O2 S',
-        formulaWeight: 149.211 
-    },
-    { 
-        id: 'PHE', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'PHENYLALANINE',
-        synonyms: [],
-        formula: 'C9 H11 N O2',
-        formulaWeight: 165.19 
-    },
-    { 
-        id: 'PRO', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'PROLINE',
-        synonyms: [],
-        formula: 'C5 H9 N O2',
-        formulaWeight: 115.13
-    },
-    { // 'O' as per IUPAC definition
-        id: 'PYL', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'PYRROLYSINE',
-        synonyms: [],
-        formula: 'C12 H21 N3 O3',
-        formulaWeight: 255.31
-    },
-    { // 'U' as per IUPAC definition
-        id: 'SEC', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'SELENOCYSTEINE',
-        synonyms: [],
-        formula: 'C3 H7 N O2 Se',
-        formulaWeight: 168.05
-    },
-    {
-        id: 'SER', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'SERINE',
-        synonyms: [],
-        formula: 'C3 H7 N O3',
-        formulaWeight: 105.09
-    },
-    {
-        id: 'THR', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'THREONINE',
-        synonyms: [],
-        formula: 'C4 H9 N O3',
-        formulaWeight: 119.12
-    },
-    {
-        id: 'TRP', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'TRYPTOPHAN',
-        synonyms: [],
-        formula: 'C11 H12 N2 O2',
-        formulaWeight: 204.22
-    },
-    {
-        id: 'TYR', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'TYROSINE',
-        synonyms: [],
-        formula: 'C9 H11 N O3',
-        formulaWeight: 181.19
-    },
-    {
-        id: 'VAL', 
-        type: ComponentType['L-peptide linking'],
-        moleculeType: MoleculeType.protein,
-        name: 'VALINE',
-        synonyms: [],
-        formula: 'C5 H11 N O2',
-        formulaWeight: 117.15
-    }
-]
-export const CommonChemicalComponentMap = new Map()
-for (let i = 0, il = CommonChemicalComponents.length; i < il; ++i) {
-    CommonChemicalComponentMap.set(CommonChemicalComponents[i].id, CommonChemicalComponents[i])
-}
+export type ChemicalComponentMap = ReadonlyMap<string, ChemicalComponent>

+ 14 - 5
src/mol-model/structure/model/properties/utils/atomic-derived.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
@@ -8,22 +8,32 @@ import { AtomicData } from '../atomic';
 import { ChemicalComponentMap } from '../chemical-component';
 import { AtomicIndex, AtomicDerivedData } from '../atomic/hierarchy';
 import { ElementIndex, ResidueIndex } from '../../indexing';
-import { MoleculeType } from '../../types';
+import { MoleculeType, getMoleculeType, getComponentType } from '../../types';
 import { getAtomIdForAtomRole } from 'mol-model/structure/util';
 
 export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemicalComponentMap: ChemicalComponentMap): AtomicDerivedData {
-    
     const { label_comp_id, _rowCount: n } = data.residues
 
     const traceElementIndex = new Uint32Array(n)
     const directionElementIndex = new Uint32Array(n)
     const moleculeType = new Uint8Array(n)
 
+    const moleculeTypeMap = new Map<string, MoleculeType>()
+
     for (let i = 0; i < n; ++i) {
         const compId = label_comp_id.value(i)
         const chemCompMap = chemicalComponentMap
         const cc = chemCompMap.get(compId)
-        const molType = cc ? cc.moleculeType : MoleculeType.unknown
+        let molType: MoleculeType
+        if (cc) {
+            molType = cc.moleculeType
+        } else if (moleculeTypeMap.has(compId)){
+            molType = moleculeTypeMap.get(compId)!
+        } else {
+            molType = getMoleculeType(getComponentType(compId), compId)
+            // TODO if unknown molecule type, use atom names to guess molecule type
+            moleculeTypeMap.set(compId, molType)
+        }
         moleculeType[i] = molType
 
         const traceAtomId = getAtomIdForAtomRole(molType, 'trace')
@@ -33,7 +43,6 @@ export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemi
         directionElementIndex[i] = index.findAtomOnResidue(i as ResidueIndex, directionAtomId)
     }
 
-
     return {
         residue: {
             traceElementIndex: traceElementIndex as unknown as ArrayLike<ElementIndex>,

+ 8 - 16
src/mol-model/structure/model/properties/utils/atomic-ranges.ts

@@ -8,7 +8,6 @@ import { AtomicSegments } from '../atomic';
 import { AtomicData, AtomicRanges } from '../atomic/hierarchy';
 import { Segmentation, Interval } from 'mol-data/int';
 import SortedRanges from 'mol-data/int/sorted-ranges';
-import { ChemicalComponentMap } from '../chemical-component';
 import { MoleculeType, isPolymer } from '../../types';
 import { ElementIndex, ResidueIndex } from '../../indexing';
 import { getAtomIdForAtomRole } from '../../../util';
@@ -17,11 +16,6 @@ import { Vec3 } from 'mol-math/linear-algebra';
 
 // TODO add gaps at the ends of the chains by comparing to the polymer sequence data
 
-function getMoleculeType(compId: string, chemicalComponentMap: ChemicalComponentMap) {
-    const cc = chemicalComponentMap.get(compId)
-    return cc ? cc.moleculeType : MoleculeType.unknown
-}
-
 function getElementIndexForAtomId(rI: ResidueIndex, atomId: string, data: AtomicData, segments: AtomicSegments): ElementIndex {
     const { offsets } = segments.residueAtomSegments
     const { label_atom_id } = data.atoms
@@ -31,10 +25,9 @@ function getElementIndexForAtomId(rI: ResidueIndex, atomId: string, data: Atomic
     return offsets[rI]
 }
 
-function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, chemicalComponentMap: ChemicalComponentMap) {
-    const { label_comp_id } = data.residues
-    const mtStart = getMoleculeType(label_comp_id.value(riStart), chemicalComponentMap)
-    const mtEnd = getMoleculeType(label_comp_id.value(riEnd), chemicalComponentMap)
+function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, moleculeType: ArrayLike<MoleculeType>) {
+    const mtStart = moleculeType[riStart]
+    const mtEnd = moleculeType[riEnd]
     if (!isPolymer(mtStart) || !isPolymer(mtEnd)) return false
 
     const startId = getAtomIdForAtomRole(mtStart, 'backboneStart')
@@ -49,13 +42,13 @@ function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data:
     return Vec3.distance(pStart, pEnd) < 10
 }
 
-export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, chemicalComponentMap: ChemicalComponentMap): AtomicRanges {
+export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, moleculeType: ArrayLike<MoleculeType>): AtomicRanges {
     const polymerRanges: number[] = []
     const gapRanges: number[] = []
     const cyclicPolymerMap = new Map<ResidueIndex, ResidueIndex>()
     const chainIt = Segmentation.transientSegments(segments.chainAtomSegments, Interval.ofBounds(0, data.atoms._rowCount))
     const residueIt = Segmentation.transientSegments(segments.residueAtomSegments, Interval.ofBounds(0, data.atoms._rowCount))
-    const { label_seq_id, label_comp_id } = data.residues
+    const { label_seq_id } = data.residues
 
     let prevSeqId: number
     let prevStart: number
@@ -72,7 +65,7 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf
 
         const riStart = segments.residueAtomSegments.index[chainSegment.start]
         const riEnd = segments.residueAtomSegments.index[chainSegment.end - 1]
-        if (areBackboneConnected(riStart, riEnd, data, segments, conformation, chemicalComponentMap)) {
+        if (areBackboneConnected(riStart, riEnd, data, segments, conformation, moleculeType)) {
             cyclicPolymerMap.set(riStart, riEnd)
             cyclicPolymerMap.set(riEnd, riStart)
         }
@@ -80,9 +73,8 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf
         while (residueIt.hasNext) {
             const residueSegment = residueIt.move();
             const residueIndex = residueSegment.index
-            const moleculeType = getMoleculeType(label_comp_id.value(residueIndex), chemicalComponentMap)
             const seqId = label_seq_id.value(residueIndex)
-            if (isPolymer(moleculeType)) {
+            if (isPolymer(moleculeType[residueIndex])) {
                 if (startIndex !== -1) {
                     if (seqId !== prevSeqId + 1) {
                         polymerRanges.push(startIndex, prevEnd - 1)
@@ -93,7 +85,7 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf
                     } else {
                         const riStart = segments.residueAtomSegments.index[residueSegment.start]
                         const riEnd = segments.residueAtomSegments.index[prevEnd - 1]
-                        if (!areBackboneConnected(riStart, riEnd, data, segments, conformation, chemicalComponentMap)) {
+                        if (!areBackboneConnected(riStart, riEnd, data, segments, conformation, moleculeType)) {
                             polymerRanges.push(startIndex, prevEnd - 1)
                             startIndex = residueSegment.start
                         }

+ 43 - 4
src/mol-model/structure/model/types.ts

@@ -6,6 +6,7 @@
  */
 
 import BitFlags from 'mol-util/bit-flags'
+import { SaccharideCompIdMap } from '../structure/carbohydrates/constants';
 
 const _esCache = (function () {
     const cache = Object.create(null);
@@ -158,9 +159,32 @@ export const WaterNames = [
     'SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC'
 ]
 
-export const ExtraSaccharideNames = [
-    'MLR'
-]
+export const AminoAcidOneLetterCodeMap = {
+    'HIS': 'H',
+    'ARG': 'R',
+    'LYS': 'K',
+    'ILE': 'I',
+    'PHE': 'F',
+    'LEU': 'L',
+    'TRP': 'W',
+    'ALA': 'A',
+    'MET': 'M',
+    'PRO': 'P',
+    'CYS': 'C',
+    'ASN': 'N',
+    'VAL': 'V',
+    'GLY': 'G',
+    'SER': 'S',
+    'GLN': 'Q',
+    'TYR': 'Y',
+    'ASP': 'D',
+    'GLU': 'E',
+    'THR': 'T',
+
+    'SEC': 'U',  // as per IUPAC definition
+    'PYL': 'O',  // as per IUPAC definition
+}
+export const AminoAcidNames = Object.keys(AminoAcidOneLetterCodeMap)
 
 export const RnaBaseNames = [ 'A', 'C', 'T', 'G', 'I', 'U' ]
 export const DnaBaseNames = [ 'DA', 'DC', 'DT', 'DG', 'DI', 'DU' ]
@@ -184,7 +208,7 @@ export function getMoleculeType(compType: string, compId: string) {
         return MoleculeType.RNA
     } else if (DNAComponentTypeNames.includes(compType)) {
         return MoleculeType.DNA
-    } else if (SaccharideComponentTypeNames.includes(compType) || ExtraSaccharideNames.includes(compId)) {
+    } else if (SaccharideComponentTypeNames.includes(compType)) {
         return MoleculeType.saccharide
     } else if (WaterNames.includes(compId)) {
         return MoleculeType.water
@@ -197,6 +221,21 @@ export function getMoleculeType(compType: string, compId: string) {
     }
 }
 
+export function getComponentType(compId: string) {
+    compId = compId.toUpperCase()
+    if (AminoAcidNames.includes(compId)) {
+        return 'peptide linking'
+    } else if (RnaBaseNames.includes(compId)) {
+        return 'RNA linking'
+    } else if (DnaBaseNames.includes(compId)) {
+        return 'DNA linking'
+    } else if (SaccharideCompIdMap.has(compId)) {
+        return 'saccharide'
+    } else {
+        return 'other'
+    }
+}
+
 export function isPolymer(moleculeType: MoleculeType) {
     return moleculeType === MoleculeType.protein || moleculeType === MoleculeType.DNA || moleculeType === MoleculeType.RNA || moleculeType === MoleculeType.PNA
 }