|
@@ -2,9 +2,10 @@
|
|
|
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
|
|
|
*
|
|
|
* @author Alexander Rose <alexander.rose@weirdbyte.de>
|
|
|
+ * @author David Sehnal <david.sehnal@gmail.com>
|
|
|
*/
|
|
|
|
|
|
-import { Segmentation } from 'mol-data/int';
|
|
|
+import { Segmentation, SortedArray } from 'mol-data/int';
|
|
|
import { combinations } from 'mol-data/util/combination';
|
|
|
import { IntAdjacencyGraph } from 'mol-math/graph';
|
|
|
import { Vec3 } from 'mol-math/linear-algebra';
|
|
@@ -19,41 +20,103 @@ import Unit from '../unit';
|
|
|
import { SaccharideNameMap, UnknownSaccharideComponent } from './constants';
|
|
|
import { CarbohydrateElement, CarbohydrateLink, Carbohydrates, CarbohydrateTerminalLink } from './data';
|
|
|
import { UnitRings, UnitRing } from '../unit/rings';
|
|
|
+import { ElementIndex } from '../../model/indexing';
|
|
|
|
|
|
const C = ElementSymbol('C'), O = ElementSymbol('O');
|
|
|
const SugarRingFps = [UnitRing.elementFingerprint([C, C, C, C, C, O]), UnitRing.elementFingerprint([C, C, C, C, O])]
|
|
|
|
|
|
-function getDirection(direction: Vec3, unit: Unit.Atomic, indices: ArrayLike<StructureElement.UnitIndex>, center: Vec3) {
|
|
|
- let indexC1 = -1, indexC1X = -1, indexC = -1
|
|
|
+function getAnomericCarbon(unit: Unit.Atomic, ringAtoms: ArrayLike<StructureElement.UnitIndex>): ElementIndex {
|
|
|
+ let indexHasTwoOxygen = -1, indexHasOxygenAndCarbon = -1, indexHasC1Name = -1, indexIsCarbon = -1
|
|
|
const { elements } = unit
|
|
|
- const { position } = unit.conformation
|
|
|
- const { label_atom_id, type_symbol } = unit.model.atomicHierarchy.atoms
|
|
|
- for (let i = 0, il = indices.length; i < il; ++i) {
|
|
|
- const ei = elements[indices[i]]
|
|
|
- const atomId = label_atom_id.value(ei)
|
|
|
- if (atomId === 'C1') {
|
|
|
- indexC1 = ei
|
|
|
+ const { type_symbol, label_atom_id } = unit.model.atomicHierarchy.atoms
|
|
|
+ const { b: neighbor, offset } = unit.links;
|
|
|
+ for (let i = 0, il = ringAtoms.length; i < il; ++i) {
|
|
|
+ const ei = elements[ringAtoms[i]]
|
|
|
+ if (type_symbol.value(ei) !== C) continue
|
|
|
+ let linkedOxygenCount = 0
|
|
|
+ let linkedCarbonCount = 0
|
|
|
+ for (let j = offset[ringAtoms[i]], jl = offset[ringAtoms[i] + 1]; j < jl; ++j) {
|
|
|
+ const ej = elements[neighbor[j]]
|
|
|
+ const typeSymbol = type_symbol.value(ej)
|
|
|
+ if (typeSymbol === O) ++linkedOxygenCount
|
|
|
+ else if (typeSymbol === C) ++linkedCarbonCount
|
|
|
+ }
|
|
|
+ if (linkedOxygenCount === 2) {
|
|
|
+ // found anomeric carbon
|
|
|
+ indexHasTwoOxygen = ei
|
|
|
break
|
|
|
- } else if (indexC1X === -1 && atomId.startsWith('C1')) {
|
|
|
- indexC1X = ei
|
|
|
- } else if (indexC === -1 && type_symbol.value(ei) === C) {
|
|
|
- indexC = ei
|
|
|
+ } else if (linkedOxygenCount === 1 && linkedCarbonCount === 1) {
|
|
|
+ // possibly an anomeric carbon if this is a mono-saccharide without a glycosidic bond
|
|
|
+ indexHasOxygenAndCarbon = ei
|
|
|
+ } else if (label_atom_id.value(ei).startsWith('C1')) {
|
|
|
+ // likely the anomeric carbon as it is name C1 by convention
|
|
|
+ indexHasC1Name = ei
|
|
|
+ } else {
|
|
|
+ // use any carbon as a fallback
|
|
|
+ indexIsCarbon = ei
|
|
|
}
|
|
|
}
|
|
|
- const index = indexC1 !== -1 ? indexC1
|
|
|
- : indexC1X !== -1 ? indexC1X
|
|
|
- : indexC !== -1 ? indexC
|
|
|
- : elements[indices[0]]
|
|
|
+ return (indexHasTwoOxygen !== -1 ? indexHasTwoOxygen
|
|
|
+ : indexHasOxygenAndCarbon !== -1 ? indexHasOxygenAndCarbon
|
|
|
+ : indexHasC1Name !== -1 ? indexHasC1Name
|
|
|
+ : indexIsCarbon !== -1 ? indexIsCarbon
|
|
|
+ : elements[ringAtoms[0]]) as ElementIndex
|
|
|
+}
|
|
|
+
|
|
|
+/** Return first non-empty label_alt_id or an empty string */
|
|
|
+function getRingAltId(unit: Unit.Atomic, ringAtoms: SortedArray<StructureElement.UnitIndex>) {
|
|
|
+ const { elements } = unit
|
|
|
+ const { label_alt_id } = unit.model.atomicHierarchy.atoms
|
|
|
+ for (let i = 0, il = ringAtoms.length; i < il; ++i) {
|
|
|
+ const ei = elements[ringAtoms[i]]
|
|
|
+ const altId = label_alt_id.value(ei)
|
|
|
+ if (altId) return altId
|
|
|
+ }
|
|
|
+ return ''
|
|
|
+}
|
|
|
+
|
|
|
+function getAltId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
|
|
|
+ const { elements } = unit
|
|
|
+ const { label_alt_id } = unit.model.atomicHierarchy.atoms
|
|
|
+ return label_alt_id.value(elements[index])
|
|
|
+}
|
|
|
+
|
|
|
+function getDirection(direction: Vec3, unit: Unit.Atomic, index: ElementIndex, center: Vec3) {
|
|
|
+ const { position } = unit.conformation
|
|
|
Vec3.normalize(direction, Vec3.sub(direction, center, position(index, direction)))
|
|
|
return direction
|
|
|
}
|
|
|
|
|
|
-function getAtomId(unit: Unit.Atomic, index: number) {
|
|
|
+function getAtomId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
|
|
|
const { elements } = unit
|
|
|
const { label_atom_id } = unit.model.atomicHierarchy.atoms
|
|
|
return label_atom_id.value(elements[index])
|
|
|
}
|
|
|
|
|
|
+function filterFusedRings(unitRings: UnitRings, rings: UnitRings.Index[] | undefined) {
|
|
|
+ if (!rings || !rings.length) return
|
|
|
+
|
|
|
+ const fusedRings = new Set<UnitRings.Index>()
|
|
|
+ const ringCombinations = combinations(fillSerial(new Array(rings.length) as number[]), 2)
|
|
|
+ for (let i = 0, il = ringCombinations.length; i < il; ++i) {
|
|
|
+ const rc = ringCombinations[i];
|
|
|
+ const r0 = unitRings.all[rings[rc[0]]], r1 = unitRings.all[rings[rc[1]]];
|
|
|
+ if (SortedArray.areIntersecting(r0, r1)) {
|
|
|
+ fusedRings.add(rings[rc[0]])
|
|
|
+ fusedRings.add(rings[rc[1]])
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (fusedRings.size) {
|
|
|
+ const filteredRings: UnitRings.Index[] = []
|
|
|
+ for (let i = 0, il = rings.length; i < il; ++i) {
|
|
|
+ if (!fusedRings.has(rings[i])) filteredRings.push(rings[i])
|
|
|
+ }
|
|
|
+ return filteredRings
|
|
|
+ } else {
|
|
|
+ return rings
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
const links: CarbohydrateLink[] = []
|
|
@@ -62,8 +125,8 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
|
|
|
const elementsWithRingMap = new Map<string, number>()
|
|
|
|
|
|
- function elementKey(residueIndex: number, unitId: number) {
|
|
|
- return `${residueIndex}|${unitId}`
|
|
|
+ function elementKey(residueIndex: number, unitId: number, altId: string) {
|
|
|
+ return `${residueIndex}|${unitId}|${altId}`
|
|
|
}
|
|
|
|
|
|
function fixLinkDirection(iA: number, iB: number) {
|
|
@@ -107,7 +170,7 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
sugarResidueMap = UnitRings.byFingerprintAndResidue(unit.rings, SugarRingFps);
|
|
|
}
|
|
|
|
|
|
- const sugarRings = sugarResidueMap.get(residueIndex);
|
|
|
+ const sugarRings = filterFusedRings(unit.rings, sugarResidueMap.get(residueIndex));
|
|
|
|
|
|
if (!sugarRings || !sugarRings.length) {
|
|
|
elements.push({
|
|
@@ -122,16 +185,18 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
|
|
|
for (let j = 0, jl = sugarRings.length; j < jl; ++j) {
|
|
|
const ringAtoms = rings.all[sugarRings[j]];
|
|
|
+ const anomericCarbon = getAnomericCarbon(unit, ringAtoms)
|
|
|
|
|
|
const pa = new PrincipalAxes(getPositionMatrix(unit, ringAtoms))
|
|
|
const center = Vec3.copy(Vec3.zero(), pa.center)
|
|
|
const normal = Vec3.copy(Vec3.zero(), pa.normVecC)
|
|
|
- const direction = getDirection(Vec3.zero(), unit, ringAtoms, center)
|
|
|
+ const direction = getDirection(Vec3.zero(), unit, anomericCarbon, center)
|
|
|
Vec3.orthogonalize(direction, normal, direction)
|
|
|
|
|
|
+ const altId = getRingAltId(unit, ringAtoms)
|
|
|
const elementIndex = elements.length
|
|
|
ringElements.push(elementIndex)
|
|
|
- elementsWithRingMap.set(elementKey(residueIndex, unit.id), elementIndex)
|
|
|
+ elementsWithRingMap.set(elementKey(residueIndex, unit.id, altId), elementIndex)
|
|
|
elements.push({
|
|
|
geometry: { center, normal, direction },
|
|
|
hasRing: true,
|
|
@@ -144,8 +209,10 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
for (let j = 0, jl = ringCombinations.length; j < jl; ++j) {
|
|
|
const rc = ringCombinations[j];
|
|
|
const r0 = rings.all[sugarRings[rc[0]]], r1 = rings.all[sugarRings[rc[1]]];
|
|
|
- if (IntAdjacencyGraph.areVertexSetsConnected(unit.links, r0, r1, 2)) {
|
|
|
- // fix both directions as it is unclear where the C1 atom is
|
|
|
+ // 1,6 glycosidic links are distance 3 and 1,4 glycosidic links are distance 2
|
|
|
+ if (IntAdjacencyGraph.areVertexSetsConnected(unit.links, r0, r1, 3)) {
|
|
|
+ // TODO handle better, for now fix both directions as it is unclear where the C1 atom is
|
|
|
+ // would need to know the path connecting the two rings
|
|
|
fixLinkDirection(ringElements[rc[0]], ringElements[rc[1]])
|
|
|
fixLinkDirection(ringElements[rc[1]], ringElements[rc[0]])
|
|
|
links.push({
|
|
@@ -162,6 +229,10 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ function getElementIndex(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
|
|
|
+ return elementsWithRingMap.get(elementKey(unit.getResidueIndex(index), unit.id, getAltId(unit, index)))
|
|
|
+ }
|
|
|
+
|
|
|
// get carbohydrate links induced by inter-unit bonds
|
|
|
for (let i = 0, il = structure.units.length; i < il; ++i) {
|
|
|
const unit = structure.units[i]
|
|
@@ -172,8 +243,8 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
|
|
|
pairBonds.getBonds(indexA).forEach(bondInfo => {
|
|
|
const { unitA, unitB } = pairBonds
|
|
|
const indexB = bondInfo.indexB
|
|
|
- const elementIndexA = elementsWithRingMap.get(elementKey(unitA.getResidueIndex(indexA), unitA.id))
|
|
|
- const elementIndexB = elementsWithRingMap.get(elementKey(unitB.getResidueIndex(indexB), unitB.id))
|
|
|
+ const elementIndexA = getElementIndex(unitA, indexA)
|
|
|
+ const elementIndexB = getElementIndex(unitB, indexB)
|
|
|
|
|
|
if (elementIndexA !== undefined && elementIndexB !== undefined) {
|
|
|
const atomIdA = getAtomId(unitA, indexA)
|