compute.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author David Sehnal <david.sehnal@gmail.com>
  6. */
  7. import { Segmentation, SortedArray } from 'mol-data/int';
  8. import { combinations } from 'mol-data/util/combination';
  9. import { IntAdjacencyGraph } from 'mol-math/graph';
  10. import { Vec3 } from 'mol-math/linear-algebra';
  11. import PrincipalAxes from 'mol-math/linear-algebra/matrix/principal-axes';
  12. import { fillSerial } from 'mol-util/array';
  13. import { ResidueIndex } from '../../model';
  14. import { ElementSymbol, MoleculeType } from '../../model/types';
  15. import { getMoleculeType, getPositionMatrix } from '../../util';
  16. import StructureElement from '../element';
  17. import Structure from '../structure';
  18. import Unit from '../unit';
  19. import { SaccharideNameMap, UnknownSaccharideComponent } from './constants';
  20. import { CarbohydrateElement, CarbohydrateLink, Carbohydrates, CarbohydrateTerminalLink, PartialCarbohydrateElement } from './data';
  21. import { UnitRings, UnitRing } from '../unit/rings';
  22. import { ElementIndex } from '../../model/indexing';
  23. const C = ElementSymbol('C'), O = ElementSymbol('O');
  24. const SugarRingFps = [UnitRing.elementFingerprint([C, C, C, C, C, O]), UnitRing.elementFingerprint([C, C, C, C, O])]
  25. function getAnomericCarbon(unit: Unit.Atomic, ringAtoms: ArrayLike<StructureElement.UnitIndex>): ElementIndex {
  26. let indexHasTwoOxygen = -1, indexHasOxygenAndCarbon = -1, indexHasC1Name = -1, indexIsCarbon = -1
  27. const { elements } = unit
  28. const { type_symbol, label_atom_id } = unit.model.atomicHierarchy.atoms
  29. const { b: neighbor, offset } = unit.links;
  30. for (let i = 0, il = ringAtoms.length; i < il; ++i) {
  31. const ei = elements[ringAtoms[i]]
  32. if (type_symbol.value(ei) !== C) continue
  33. let linkedOxygenCount = 0
  34. let linkedCarbonCount = 0
  35. for (let j = offset[ringAtoms[i]], jl = offset[ringAtoms[i] + 1]; j < jl; ++j) {
  36. const ej = elements[neighbor[j]]
  37. const typeSymbol = type_symbol.value(ej)
  38. if (typeSymbol === O) ++linkedOxygenCount
  39. else if (typeSymbol === C) ++linkedCarbonCount
  40. }
  41. if (linkedOxygenCount === 2) {
  42. // found anomeric carbon
  43. indexHasTwoOxygen = ei
  44. break
  45. } else if (linkedOxygenCount === 1 && linkedCarbonCount === 1) {
  46. // possibly an anomeric carbon if this is a mono-saccharide without a glycosidic bond
  47. indexHasOxygenAndCarbon = ei
  48. } else if (label_atom_id.value(ei).startsWith('C1')) {
  49. // likely the anomeric carbon as it is named C1 by convention
  50. indexHasC1Name = ei
  51. } else {
  52. // use any carbon as a fallback
  53. indexIsCarbon = ei
  54. }
  55. }
  56. return (indexHasTwoOxygen !== -1 ? indexHasTwoOxygen
  57. : indexHasOxygenAndCarbon !== -1 ? indexHasOxygenAndCarbon
  58. : indexHasC1Name !== -1 ? indexHasC1Name
  59. : indexIsCarbon !== -1 ? indexIsCarbon
  60. : elements[ringAtoms[0]]) as ElementIndex
  61. }
  62. /** Return first non-empty label_alt_id or an empty string */
  63. function getRingAltId(unit: Unit.Atomic, ringAtoms: SortedArray<StructureElement.UnitIndex>) {
  64. const { elements } = unit
  65. const { label_alt_id } = unit.model.atomicHierarchy.atoms
  66. for (let i = 0, il = ringAtoms.length; i < il; ++i) {
  67. const ei = elements[ringAtoms[i]]
  68. const altId = label_alt_id.value(ei)
  69. if (altId) return altId
  70. }
  71. return ''
  72. }
  73. function getAltId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
  74. const { elements } = unit
  75. const { label_alt_id } = unit.model.atomicHierarchy.atoms
  76. return label_alt_id.value(elements[index])
  77. }
  78. function getDirection(direction: Vec3, unit: Unit.Atomic, index: ElementIndex, center: Vec3) {
  79. const { position } = unit.conformation
  80. Vec3.normalize(direction, Vec3.sub(direction, center, position(index, direction)))
  81. return direction
  82. }
  83. function getAtomId(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
  84. const { elements } = unit
  85. const { label_atom_id } = unit.model.atomicHierarchy.atoms
  86. return label_atom_id.value(elements[index])
  87. }
  88. function filterFusedRings(unitRings: UnitRings, rings: UnitRings.Index[] | undefined) {
  89. if (!rings || !rings.length) return
  90. const fusedRings = new Set<UnitRings.Index>()
  91. const ringCombinations = combinations(fillSerial(new Array(rings.length) as number[]), 2)
  92. for (let i = 0, il = ringCombinations.length; i < il; ++i) {
  93. const rc = ringCombinations[i];
  94. const r0 = unitRings.all[rings[rc[0]]], r1 = unitRings.all[rings[rc[1]]];
  95. if (SortedArray.areIntersecting(r0, r1)) {
  96. fusedRings.add(rings[rc[0]])
  97. fusedRings.add(rings[rc[1]])
  98. }
  99. }
  100. if (fusedRings.size) {
  101. const filteredRings: UnitRings.Index[] = []
  102. for (let i = 0, il = rings.length; i < il; ++i) {
  103. if (!fusedRings.has(rings[i])) filteredRings.push(rings[i])
  104. }
  105. return filteredRings
  106. } else {
  107. return rings
  108. }
  109. }
  110. export function computeCarbohydrates(structure: Structure): Carbohydrates {
  111. const links: CarbohydrateLink[] = []
  112. const terminalLinks: CarbohydrateTerminalLink[] = []
  113. const elements: CarbohydrateElement[] = []
  114. const partialElements: PartialCarbohydrateElement[] = []
  115. const elementsWithRingMap = new Map<string, number>()
  116. function ringElementKey(residueIndex: number, unitId: number, altId: string) {
  117. return `${residueIndex}|${unitId}|${altId}`
  118. }
  119. function fixLinkDirection(iA: number, iB: number) {
  120. Vec3.sub(elements[iA].geometry!.direction, elements[iB].geometry!.center, elements[iA].geometry!.center)
  121. Vec3.normalize(elements[iA].geometry!.direction, elements[iA].geometry!.direction)
  122. }
  123. const tmpV = Vec3.zero()
  124. function fixTerminalLinkDirection(iA: number, indexB: number, unitB: Unit.Atomic) {
  125. const pos = unitB.conformation.position, geo = elements[iA].geometry!;
  126. Vec3.sub(geo.direction, pos(unitB.elements[indexB], tmpV), geo.center)
  127. Vec3.normalize(geo.direction, geo.direction)
  128. }
  129. // get carbohydrate elements and carbohydrate links induced by intra-residue bonds
  130. for (let i = 0, il = structure.units.length; i < il; ++i) {
  131. const unit = structure.units[i]
  132. if (!Unit.isAtomic(unit)) continue
  133. const { model } = unit
  134. const { chainAtomSegments, residueAtomSegments, residues } = model.atomicHierarchy
  135. const { label_comp_id } = residues
  136. const chainIt = Segmentation.transientSegments(chainAtomSegments, unit.elements)
  137. const residueIt = Segmentation.transientSegments(residueAtomSegments, unit.elements)
  138. let sugarResidueMap: Map<ResidueIndex, UnitRings.Index[]> | undefined = void 0;
  139. while (chainIt.hasNext) {
  140. residueIt.setSegment(chainIt.move());
  141. while (residueIt.hasNext) {
  142. const { index: residueIndex } = residueIt.move();
  143. const saccharideComp = SaccharideNameMap.get(label_comp_id.value(residueIndex)) || UnknownSaccharideComponent
  144. if (saccharideComp === UnknownSaccharideComponent) {
  145. if (getMoleculeType(unit.model, residueIndex) !== MoleculeType.saccharide) continue
  146. }
  147. if (!sugarResidueMap) {
  148. sugarResidueMap = UnitRings.byFingerprintAndResidue(unit.rings, SugarRingFps);
  149. }
  150. const sugarRings = filterFusedRings(unit.rings, sugarResidueMap.get(residueIndex));
  151. if (!sugarRings || !sugarRings.length) {
  152. partialElements.push({ unit, residueIndex, component: saccharideComp })
  153. continue;
  154. }
  155. const rings = unit.rings;
  156. const ringElements: number[] = []
  157. for (let j = 0, jl = sugarRings.length; j < jl; ++j) {
  158. const ringAtoms = rings.all[sugarRings[j]];
  159. const anomericCarbon = getAnomericCarbon(unit, ringAtoms)
  160. const pa = new PrincipalAxes(getPositionMatrix(unit, ringAtoms))
  161. const center = Vec3.copy(Vec3.zero(), pa.center)
  162. const normal = Vec3.copy(Vec3.zero(), pa.normVecC)
  163. const direction = getDirection(Vec3.zero(), unit, anomericCarbon, center)
  164. Vec3.orthogonalize(direction, normal, direction)
  165. const altId = getRingAltId(unit, ringAtoms)
  166. const elementIndex = elements.length
  167. ringElements.push(elementIndex)
  168. elementsWithRingMap.set(ringElementKey(residueIndex, unit.id, altId), elementIndex)
  169. elements.push({
  170. geometry: { center, normal, direction },
  171. component: saccharideComp,
  172. unit, residueIndex, anomericCarbon
  173. })
  174. }
  175. // add carbohydrate links induced by intra-residue bonds
  176. const ringCombinations = combinations(fillSerial(new Array(sugarRings.length) as number[]), 2)
  177. for (let j = 0, jl = ringCombinations.length; j < jl; ++j) {
  178. const rc = ringCombinations[j];
  179. const r0 = rings.all[sugarRings[rc[0]]], r1 = rings.all[sugarRings[rc[1]]];
  180. // 1,6 glycosidic links are distance 3 and 1,4 glycosidic links are distance 2
  181. if (IntAdjacencyGraph.areVertexSetsConnected(unit.links, r0, r1, 3)) {
  182. // TODO handle better, for now fix both directions as it is unclear where the C1 atom is
  183. // would need to know the path connecting the two rings
  184. fixLinkDirection(ringElements[rc[0]], ringElements[rc[1]])
  185. fixLinkDirection(ringElements[rc[1]], ringElements[rc[0]])
  186. links.push({
  187. carbohydrateIndexA: ringElements[rc[0]],
  188. carbohydrateIndexB: ringElements[rc[1]]
  189. })
  190. links.push({
  191. carbohydrateIndexA: ringElements[rc[1]],
  192. carbohydrateIndexB: ringElements[rc[0]]
  193. })
  194. }
  195. }
  196. }
  197. }
  198. }
  199. function getRingElementIndex(unit: Unit.Atomic, index: StructureElement.UnitIndex) {
  200. return elementsWithRingMap.get(ringElementKey(unit.getResidueIndex(index), unit.id, getAltId(unit, index)))
  201. }
  202. // get carbohydrate links induced by inter-unit bonds
  203. for (let i = 0, il = structure.units.length; i < il; ++i) {
  204. const unit = structure.units[i]
  205. if (!Unit.isAtomic(unit)) continue
  206. structure.links.getLinkedUnits(unit).forEach(pairBonds => {
  207. pairBonds.linkedElementIndices.forEach(indexA => {
  208. pairBonds.getBonds(indexA).forEach(bondInfo => {
  209. const { unitA, unitB } = pairBonds
  210. const indexB = bondInfo.indexB
  211. const ringElementIndexA = getRingElementIndex(unitA, indexA)
  212. const ringElementIndexB = getRingElementIndex(unitB, indexB)
  213. if (ringElementIndexA !== undefined && ringElementIndexB !== undefined) {
  214. const atomIdA = getAtomId(unitA, indexA)
  215. if (atomIdA.startsWith('O1') || atomIdA.startsWith('C1')) {
  216. fixLinkDirection(ringElementIndexA, ringElementIndexB)
  217. }
  218. links.push({
  219. carbohydrateIndexA: ringElementIndexA,
  220. carbohydrateIndexB: ringElementIndexB
  221. })
  222. } else if (ringElementIndexA !== undefined) {
  223. const atomIdA = getAtomId(unitA, indexA)
  224. if (atomIdA.startsWith('O1') || atomIdA.startsWith('C1')) {
  225. fixTerminalLinkDirection(ringElementIndexA, indexB, unitB)
  226. }
  227. terminalLinks.push({
  228. carbohydrateIndex: ringElementIndexA,
  229. elementIndex: indexB,
  230. elementUnit: unitB,
  231. fromCarbohydrate: true
  232. })
  233. } else if (ringElementIndexB !== undefined) {
  234. terminalLinks.push({
  235. carbohydrateIndex: ringElementIndexB,
  236. elementIndex: indexA,
  237. elementUnit: unitA,
  238. fromCarbohydrate: false
  239. })
  240. }
  241. })
  242. })
  243. })
  244. }
  245. return { links, terminalLinks, elements, partialElements, ...buildLookups(elements, links) }
  246. }
  247. function buildLookups (elements: CarbohydrateElement[], links: CarbohydrateLink[]) {
  248. // element lookup
  249. function elementKey(unit: Unit, anomericCarbon: ElementIndex) {
  250. return `${unit.id}|${anomericCarbon}`
  251. }
  252. const elementMap = new Map<string, number>()
  253. for (let i = 0, il = elements.length; i < il; ++i) {
  254. const { unit, anomericCarbon } = elements[i]
  255. elementMap.set(elementKey(unit, anomericCarbon), i)
  256. }
  257. function getElementIndex(unit: Unit, anomericCarbon: ElementIndex) {
  258. return elementMap.get(elementKey(unit, anomericCarbon))
  259. }
  260. // link lookup
  261. function linkKey(unitA: Unit, anomericCarbonA: ElementIndex, unitB: Unit, anomericCarbonB: ElementIndex) {
  262. return `${unitA.id}|${anomericCarbonA}|${unitB.id}|${anomericCarbonB}`
  263. }
  264. const linkMap = new Map<string, number>()
  265. for (let i = 0, il = links.length; i < il; ++i) {
  266. const l = links[i]
  267. const { unit: unitA, anomericCarbon: anomericCarbonA } = elements[l.carbohydrateIndexA]
  268. const { unit: unitB, anomericCarbon: anomericCarbonB } = elements[l.carbohydrateIndexB]
  269. linkMap.set(linkKey(unitA, anomericCarbonA, unitB, anomericCarbonB), i)
  270. }
  271. function getLinkIndex(unitA: Unit, anomericCarbonA: ElementIndex, unitB: Unit, anomericCarbonB: ElementIndex) {
  272. return linkMap.get(linkKey(unitA, anomericCarbonA, unitB, anomericCarbonB))
  273. }
  274. return { getElementIndex, getLinkIndex }
  275. }