Browse Source

better handling of unknown saccharides

Alexander Rose 6 years ago
parent
commit
aad85a79e7

+ 12 - 4
src/mol-model/structure/model/formats/mmcif.ts

@@ -24,10 +24,10 @@ import { getSequence } from './mmcif/sequence';
 import { sortAtomSite } from './mmcif/sort';
 import { StructConn } from './mmcif/bonds/struct_conn';
 import { ChemicalComponent, ChemicalComponentMap } from '../properties/chemical-component';
-import { ComponentType, getMoleculeType } from '../types';
+import { ComponentType, getMoleculeType, MoleculeType } from '../types';
 
 import mmCIF_Format = Format.mmCIF
-import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap } from 'mol-model/structure/structure/carbohydrates/constants';
+import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
 
 type AtomSite = mmCIF_Database['atom_site']
 
@@ -124,10 +124,18 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap
                 }
             }
         }
-        return map
     } else {
-        return SaccharideCompIdMap
+        SaccharideCompIdMap.forEach((v, k) => map.set(k, v))
+        const { id, type  } = format.data.chem_comp
+        for (let i = 0, il = id.rowCount; i < il; ++i) {
+            const _id = id.value(i)
+            const _type = type.value(i)
+            if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) {
+                map.set(_id, UnknownSaccharideComponent)
+            }
+        }
     }
+    return map
 }
 
 export interface FormatData {

+ 6 - 9
src/mol-model/structure/structure/carbohydrates/compute.ts

@@ -12,12 +12,11 @@ import { Vec3 } from 'mol-math/linear-algebra';
 import PrincipalAxes from 'mol-math/linear-algebra/matrix/principal-axes';
 import { fillSerial } from 'mol-util/array';
 import { ResidueIndex, Model } from '../../model';
-import { ElementSymbol, MoleculeType } from '../../model/types';
-import { getAtomicMoleculeType, getPositionMatrix } from '../../util';
+import { ElementSymbol } from '../../model/types';
+import { getPositionMatrix } from '../../util';
 import StructureElement from '../element';
 import Structure from '../structure';
 import Unit from '../unit';
-import { UnknownSaccharideComponent, SaccharideComponent } from './constants';
 import { CarbohydrateElement, CarbohydrateLink, Carbohydrates, CarbohydrateTerminalLink, PartialCarbohydrateElement } from './data';
 import { UnitRings, UnitRing } from '../unit/rings';
 import { ElementIndex } from '../../model/indexing';
@@ -118,8 +117,8 @@ function filterFusedRings(unitRings: UnitRings, rings: UnitRings.Index[] | undef
     }
 }
 
-function getSaccharideComp(compId: string, model: Model): SaccharideComponent {
-    return model.properties.saccharideComponentMap.get(compId) || UnknownSaccharideComponent
+function getSaccharideComp(compId: string, model: Model) {
+    return model.properties.saccharideComponentMap.get(compId)
 }
 
 export function computeCarbohydrates(structure: Structure): Carbohydrates {
@@ -167,9 +166,7 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
                 const { index: residueIndex } = residueIt.move();
 
                 const saccharideComp = getSaccharideComp(label_comp_id.value(residueIndex), model)
-                if (saccharideComp === UnknownSaccharideComponent) {
-                    if (getAtomicMoleculeType(unit.model, residueIndex) !== MoleculeType.saccharide) continue
-                }
+                if (!saccharideComp) continue
 
                 if (!sugarResidueMap) {
                     sugarResidueMap = UnitRings.byFingerprintAndResidue(unit.rings, SugarRingFps);
@@ -402,7 +399,7 @@ function buildLookups (elements: CarbohydrateElement[], links: CarbohydrateLink[
         let k: string
         if (fromCarbohydrate) {
             k = terminalLinksKey(unit, anomericCarbon)
-        } else{
+        } else {
             k = terminalLinksKey(elementUnit, elementUnit.elements[elementIndex])
         }
         const e = terminalLinksMap.get(k)

+ 11 - 1
src/mol-model/structure/structure/carbohydrates/constants.ts

@@ -205,7 +205,10 @@ const CommonSaccharideNames: { [k: string]: string[] } = {
         'MLR', // via GlyFinder, tri-saccharide but homomer
     ],
     Man: ['MAN', 'BMA'],
-    Gal: ['GAL', 'GLA'],
+    Gal: [
+        'GAL', 'GLA',
+        'GXL' // via PubChem
+    ],
     Gul: ['GUP', 'GL0'],
     Alt: ['ALT'],
     All: ['ALL', 'AFD'],
@@ -296,6 +299,10 @@ const CommonSaccharideNames: { [k: string]: string[] } = {
     Psi: [],
 }
 
+const UnknownSaccharideNames = [
+    'NGZ', // via CCD
+]
+
 export const SaccharideCompIdMap = (function () {
     const map = new Map<string, SaccharideComponent>()
     for (let i = 0, il = Monosaccharides.length; i < il; ++i) {
@@ -307,6 +314,9 @@ export const SaccharideCompIdMap = (function () {
             }
         }
     }
+    for (let i = 0, il = UnknownSaccharideNames.length; i < il; ++i) {
+        map.set(UnknownSaccharideNames[i], UnknownSaccharideComponent)
+    }
     return map
 })()