Browse Source

filter fused rings, handle alt locs, anomeric carbon detection

Alexander Rose 6 years ago
parent
commit
e4603cae31

+ 2 - 2
src/mol-math/graph/int-adjacency-graph.ts

@@ -286,10 +286,10 @@ export namespace IntAdjacencyGraph {
      * Check if any vertex in `verticesA` is connected to any vertex in `verticesB`
      * via at most `maxDistance` edges.
      *
-     * Returns true if A and B are intersecting.
+     * Returns true if verticesA and verticesB are intersecting.
      */
     export function areVertexSetsConnected(graph: IntAdjacencyGraph, verticesA: SortedArray<number>, verticesB: SortedArray<number>, maxDistance: number): boolean {
-        // check if A and B are intersecting, this handles maxDepth = 0
+        // check if A and B are intersecting, this handles maxDistance = 0
         if (SortedArray.areIntersecting(verticesA, verticesB)) return true;
         if (maxDistance < 1) return false;
 

+ 99 - 28
src/mol-model/structure/structure/carbohydrates/compute.ts

@@ -2,9 +2,10 @@
  * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { Segmentation } from 'mol-data/int';
+import { Segmentation, SortedArray } from 'mol-data/int';
 import { combinations } from 'mol-data/util/combination';
 import { IntAdjacencyGraph } from 'mol-math/graph';
 import { Vec3 } from 'mol-math/linear-algebra';
@@ -19,41 +20,103 @@ import Unit from '../unit';
 import { SaccharideNameMap, UnknownSaccharideComponent } from './constants';
 import { CarbohydrateElement, CarbohydrateLink, Carbohydrates, CarbohydrateTerminalLink } from './data';
 import { UnitRings, UnitRing } from '../unit/rings';
+import { ElementIndex } from '../../model/indexing';
 
 const C = ElementSymbol('C'), O = ElementSymbol('O');
 const SugarRingFps = [UnitRing.elementFingerprint([C, C, C, C, C, O]), UnitRing.elementFingerprint([C, C, C, C, O])]
 
-function getDirection(direction: Vec3, unit: Unit.Atomic, indices: ArrayLike<StructureElement.UnitIndex>, center: Vec3) {
-    let indexC1 = -1, indexC1X = -1, indexC = -1
+function getAnomericCarbon(unit: Unit.Atomic, ringAtoms: ArrayLike<StructureElement.UnitIndex>): ElementIndex {
+    let indexHasTwoOxygen = -1, indexHasOxygenAndCarbon = -1, indexHasC1Name = -1, indexIsCarbon = -1
     const { elements } = unit
-    const { position } = unit.conformation
-    const { label_atom_id, type_symbol } = unit.model.atomicHierarchy.atoms
-    for (let i = 0, il = indices.length; i < il; ++i) {
-        const ei = elements[indices[i]]
-        const atomId = label_atom_id.value(ei)
-        if (atomId === 'C1') {
-            indexC1 = ei
+    const { type_symbol, label_atom_id } = unit.model.atomicHierarchy.atoms
+    const { b: neighbor, offset } = unit.links;
+    for (let i = 0, il = ringAtoms.length; i < il; ++i) {
+        const ei = elements[ringAtoms[i]]
+        if (type_symbol.value(ei) !== C) continue
+        let linkedOxygenCount = 0
+        let linkedCarbonCount = 0
+        for (let j = offset[ringAtoms[i]], jl = offset[ringAtoms[i] + 1]; j < jl; ++j) {
+            const ej = elements[neighbor[j]]
+            const typeSymbol = type_symbol.value(ej)
+            if (typeSymbol === O) ++linkedOxygenCount
+            else if (typeSymbol === C) ++linkedCarbonCount
+        }
+        if (linkedOxygenCount === 2) {
+            // found anomeric carbon
+            indexHasTwoOxygen = ei
             break
-        } else if (indexC1X === -1 && atomId.startsWith('C1')) {
-            indexC1X = ei
-        } else if (indexC === -1 && type_symbol.value(ei) === C) {
-            indexC = ei
+        } else if (linkedOxygenCount === 1 && linkedCarbonCount === 1) {
+            // possibly an anomeric carbon if this is a mono-saccharide without a glycosidic bond
+            indexHasOxygenAndCarbon = ei
+        } else if (label_atom_id.value(ei).startsWith('C1')) {
+            // likely the anomeric carbon as it is name C1 by convention
+            indexHasC1Name = ei
+        } else {
+            // use any carbon as a fallback
+            indexIsCarbon = ei
         }
     }
-    const index = indexC1 !== -1 ? indexC1
-        : indexC1X !== -1 ? indexC1X
-            : indexC !== -1 ? indexC
-                : elements[indices[0]]
+    return (indexHasTwoOxygen !== -1 ? indexHasTwoOxygen
+                : indexHasOxygenAndCarbon !== -1 ? indexHasOxygenAndCarbon
+                    : indexHasC1Name !== -1 ? indexHasC1Name
+                        : indexIsCarbon !== -1 ? indexIsCarbon
+                            : elements[ringAtoms[0]]) as ElementIndex
+}
+
+/** Return first non-empty label_alt_id or an empty string */
+function getRingAltId(unit: Unit.Atomic, ringAtoms: SortedArray<StructureElement.UnitIndex>) {
+    const { elements } = unit
+    const { label_alt_id } = unit.model.atomicHierarchy.atoms
+    for (let i = 0, il = ringAtoms.length; i < il; ++i) {
+        const ei = elements[ringAtoms[i]]
+        const altId = label_alt_id.value(ei)
+        if (altId) return altId
+    }
+    return ''
+}
+
+function getAltId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
+    const { elements } = unit
+    const { label_alt_id } = unit.model.atomicHierarchy.atoms
+    return label_alt_id.value(elements[index])
+}
+
+function getDirection(direction: Vec3, unit: Unit.Atomic, index: ElementIndex, center: Vec3) {
+    const { position } = unit.conformation
     Vec3.normalize(direction, Vec3.sub(direction, center, position(index, direction)))
     return direction
 }
 
-function getAtomId(unit: Unit.Atomic, index: number) {
+function getAtomId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
     const { elements } = unit
     const { label_atom_id } = unit.model.atomicHierarchy.atoms
     return label_atom_id.value(elements[index])
 }
 
+function filterFusedRings(unitRings: UnitRings, rings: UnitRings.Index[] | undefined) {
+    if (!rings || !rings.length) return
+
+    const fusedRings = new Set<UnitRings.Index>()
+    const ringCombinations = combinations(fillSerial(new Array(rings.length) as number[]), 2)
+    for (let i = 0, il = ringCombinations.length; i < il; ++i) {
+        const rc = ringCombinations[i];
+        const r0 = unitRings.all[rings[rc[0]]], r1 = unitRings.all[rings[rc[1]]];
+        if (SortedArray.areIntersecting(r0, r1)) {
+            fusedRings.add(rings[rc[0]])
+            fusedRings.add(rings[rc[1]])
+        }
+    }
+
+    if (fusedRings.size) {
+        const filteredRings: UnitRings.Index[] = []
+        for (let i = 0, il = rings.length; i < il; ++i) {
+            if (!fusedRings.has(rings[i])) filteredRings.push(rings[i])
+        }
+        return filteredRings
+    } else {
+        return rings
+    }
+}
 
 export function computeCarbohydrates(structure: Structure): Carbohydrates {
     const links: CarbohydrateLink[] = []
@@ -62,8 +125,8 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
 
     const elementsWithRingMap = new Map<string, number>()
 
-    function elementKey(residueIndex: number, unitId: number) {
-        return `${residueIndex}|${unitId}`
+    function elementKey(residueIndex: number, unitId: number, altId: string) {
+        return `${residueIndex}|${unitId}|${altId}`
     }
 
     function fixLinkDirection(iA: number, iB: number) {
@@ -107,7 +170,7 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
                     sugarResidueMap = UnitRings.byFingerprintAndResidue(unit.rings, SugarRingFps);
                 }
 
-                const sugarRings = sugarResidueMap.get(residueIndex);
+                const sugarRings = filterFusedRings(unit.rings, sugarResidueMap.get(residueIndex));
 
                 if (!sugarRings || !sugarRings.length) {
                     elements.push({
@@ -122,16 +185,18 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
 
                 for (let j = 0, jl = sugarRings.length; j < jl; ++j) {
                     const ringAtoms = rings.all[sugarRings[j]];
+                    const anomericCarbon = getAnomericCarbon(unit, ringAtoms)
 
                     const pa = new PrincipalAxes(getPositionMatrix(unit, ringAtoms))
                     const center = Vec3.copy(Vec3.zero(), pa.center)
                     const normal = Vec3.copy(Vec3.zero(), pa.normVecC)
-                    const direction = getDirection(Vec3.zero(), unit, ringAtoms, center)
+                    const direction = getDirection(Vec3.zero(), unit, anomericCarbon, center)
                     Vec3.orthogonalize(direction, normal, direction)
 
+                    const altId = getRingAltId(unit, ringAtoms)
                     const elementIndex = elements.length
                     ringElements.push(elementIndex)
-                    elementsWithRingMap.set(elementKey(residueIndex, unit.id), elementIndex)
+                    elementsWithRingMap.set(elementKey(residueIndex, unit.id, altId), elementIndex)
                     elements.push({
                         geometry: { center, normal, direction },
                         hasRing: true,
@@ -144,8 +209,10 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
                 for (let j = 0, jl = ringCombinations.length; j < jl; ++j) {
                     const rc = ringCombinations[j];
                     const r0 = rings.all[sugarRings[rc[0]]], r1 = rings.all[sugarRings[rc[1]]];
-                    if (IntAdjacencyGraph.areVertexSetsConnected(unit.links, r0, r1, 2)) {
-                        // fix both directions as it is unclear where the C1 atom is
+                    // 1,6 glycosidic links are distance 3 and 1,4 glycosidic links are distance 2
+                    if (IntAdjacencyGraph.areVertexSetsConnected(unit.links, r0, r1, 3)) {
+                        // TODO handle better, for now fix both directions as it is unclear where the C1 atom is
+                        //  would need to know the path connecting the two rings
                         fixLinkDirection(ringElements[rc[0]], ringElements[rc[1]])
                         fixLinkDirection(ringElements[rc[1]], ringElements[rc[0]])
                         links.push({
@@ -162,6 +229,10 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
         }
     }
 
+    function getElementIndex(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
+        return elementsWithRingMap.get(elementKey(unit.getResidueIndex(index), unit.id, getAltId(unit, index)))
+    }
+
     // get carbohydrate links induced by inter-unit bonds
     for (let i = 0, il = structure.units.length; i < il; ++i) {
         const unit = structure.units[i]
@@ -172,8 +243,8 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
                 pairBonds.getBonds(indexA).forEach(bondInfo => {
                     const { unitA, unitB } = pairBonds
                     const indexB = bondInfo.indexB
-                    const elementIndexA = elementsWithRingMap.get(elementKey(unitA.getResidueIndex(indexA), unitA.id))
-                    const elementIndexB = elementsWithRingMap.get(elementKey(unitB.getResidueIndex(indexB), unitB.id))
+                    const elementIndexA = getElementIndex(unitA, indexA)
+                    const elementIndexB = getElementIndex(unitB, indexB)
 
                     if (elementIndexA !== undefined && elementIndexB !== undefined) {
                         const atomIdA = getAtomId(unitA, indexA)

+ 1 - 0
src/mol-view/stage.ts

@@ -106,6 +106,7 @@ export class Stage {
         // this.loadPdbid('4zs9') // contains raffinose
         // this.loadPdbid('2yft') // contains kestose
         this.loadPdbid('2b5t') // contains large carbohydrate polymer
+        // this.loadPdbid('1b5f') // contains carbohydrate with alternate locations
         // this.loadMmcifUrl(`../../examples/1cbs_full.bcif`)
         // this.loadMmcifUrl(`../../examples/1cbs_updated.cif`)
         // this.loadMmcifUrl(`../../examples/1crn.cif`)