encoder.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /**
  2. * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
  5. */
  6. import { Category } from '../cif/encoder';
  7. import { LigandEncoder } from '../ligand-encoder';
  8. import { StringBuilder } from '../../../mol-util';
  9. import { getCategoryInstanceData } from '../cif/encoder/util';
  10. import { BondType } from '../../../mol-model/structure/model/types';
  11. import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/chem_comp';
  12. // type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
  13. // type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
  14. const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
  15. type BondMap = Map<string, { order: number, flags: number }>;
  16. // specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
  17. export class Mol2Encoder extends LigandEncoder {
  18. private out: StringBuilder;
  19. _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx): void {
  20. const a = StringBuilder.create();
  21. const b = StringBuilder.create();
  22. const { instance, source } = getCategoryInstanceData(category, context);
  23. // write header
  24. const name = this.getName(instance, source);
  25. StringBuilder.writeSafe(this.builder, `# Name: ${name}\n# Created by ${this.encoder}\n\n`);
  26. const bondMap = this.componentBondData.entries.get(name)!;
  27. let bondCount = 0;
  28. const atoms = this.getAtoms(instance, source);
  29. StringBuilder.writeSafe(a, '@<TRIPOS>ATOM\n');
  30. StringBuilder.writeSafe(b, '@<TRIPOS>BOND\n');
  31. atoms.forEach((atom1, label_atom_id1) => {
  32. const { index: i1 } = atom1;
  33. if (bondMap?.map) {
  34. bondMap.map.get(label_atom_id1)!.forEach((bond, label_atom_id2) => {
  35. const atom2 = atoms.get(label_atom_id2);
  36. if (!atom2) return;
  37. const { index: i2, type_symbol: type_symbol2 } = atom2;
  38. if (i1 < i2 && !this.skipHydrogen(type_symbol2)) {
  39. const { order, flags } = bond;
  40. const ar = BondType.is(BondType.Flag.Aromatic, flags);
  41. StringBuilder.writeSafe(b, `${++bondCount} ${i1 + 1} ${i2 + 1} ${ar ? 'ar' : order}`);
  42. StringBuilder.newline(b);
  43. }
  44. });
  45. }
  46. const sybyl = bondMap?.map ? this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap) : atom1.type_symbol;
  47. StringBuilder.writeSafe(a, `${i1 + 1} ${label_atom_id1} ${atom1.Cartn_x.toFixed(3)} ${atom1.Cartn_y.toFixed(3)} ${atom1.Cartn_z.toFixed(3)} ${sybyl} 1 ${name} 0.000\n`);
  48. });
  49. // could write something like 'SMALL\nNO_CHARGES', for now let's write **** indicating non-optional, yet missing, string values
  50. StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.size} ${bondCount} 1\n****\n****\n\n`);
  51. StringBuilder.writeSafe(this.out, StringBuilder.getString(a));
  52. StringBuilder.writeSafe(this.out, StringBuilder.getString(b));
  53. StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n1 ${name} 1\n`);
  54. }
  55. private count<K, V, C>(map: Map<K, V>, ctx: C, predicate: (k: K, v: V, ctx: C) => boolean): number {
  56. let count = 0;
  57. const iter = map.entries();
  58. let result = iter.next();
  59. while (!result.done) {
  60. if (predicate(result.value[0], result.value[1], ctx)) {
  61. count++;
  62. }
  63. result = iter.next();
  64. }
  65. return count;
  66. }
  67. private orderSum(map: BondMap): number {
  68. let sum = 0;
  69. const iter = map.values();
  70. let result = iter.next();
  71. while (!result.done) {
  72. sum += result.value.order;
  73. result = iter.next();
  74. }
  75. return sum;
  76. }
  77. private isNonMetalBond(label_atom_id: string): boolean {
  78. for (const a of NON_METAL_ATOMS) {
  79. if (label_atom_id.startsWith(a)) return true;
  80. }
  81. return false;
  82. }
  83. private extractNonmets(map: BondMap): BondMap {
  84. const ret = new Map<string, { order: number, flags: number }>();
  85. const iter = map.entries();
  86. let result = iter.next();
  87. while (!result.done) {
  88. const [k, v] = result.value;
  89. if (this.isNonMetalBond(k)) {
  90. ret.set(k, v);
  91. }
  92. result = iter.next();
  93. }
  94. return ret;
  95. }
  96. // see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
  97. // cannot account for covalently bound amino acids etc
  98. private mapToSybyl(label_atom_id1: string, type_symbol1: string, bondMap: ComponentBond.Entry) {
  99. // TODO if altLoc: 'Du' // 1.1
  100. // TODO if end of polymeric bond: 'Du' // 1.2
  101. if (type_symbol1 === 'D') return 'H'; // 1.3
  102. if (type_symbol1 === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
  103. if (type_symbol1 === 'Co' || type_symbol1 === 'Ru') return type_symbol1 + '.oh'; // 1.5
  104. const bonds = bondMap.map.get(label_atom_id1)!;
  105. const numBonds = bonds.size;
  106. if (type_symbol1 === 'Ti' || type_symbol1 === 'Cr') { // 1.10
  107. return type_symbol1 + (numBonds <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
  108. }
  109. if (type_symbol1 === 'C') { // 1.6
  110. if (numBonds >= 4 && this.count(bonds, this, (_k, v) => v.order === 1) >= 4) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
  111. if (numBonds === 3 && this.isCat(bonds, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
  112. if (numBonds >= 2 && this.count(bonds, this, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
  113. if ((numBonds === 1 || numBonds === 2) && this.count(bonds, this, (_k, v) => v.order === 3)) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
  114. return 'C.2'; // 1.6.5
  115. }
  116. // most of the time, bonds will equal non-metal bonds
  117. const nonmets = this.count(bonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === bonds.size ? bonds : this.extractNonmets(bonds);
  118. const numNonmets = nonmets.size;
  119. if (type_symbol1 === 'O') { // 1.7
  120. if (numNonmets === 1) { // 1.7.1
  121. if (this.isOC(nonmets, bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
  122. if (this.isOP(nonmets, bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
  123. }
  124. if (numNonmets >= 2 && this.count(bonds, this, (_k, v) => v.order === 1) === bonds.size) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
  125. return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
  126. }
  127. if (type_symbol1 === 'N') { // 1.8
  128. if (numNonmets === 4 && this.count(nonmets, this, (_k, v) => v.order === 1) === 4) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
  129. if (numBonds >= 2 && this.count(bonds, this, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
  130. if (numNonmets === 1 && this.count(nonmets, this, (_k, v) => v.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
  131. if (numNonmets === 2 && this.orderSum(nonmets) === 4) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
  132. if (numNonmets === 3 && this.hasCOCS(nonmets, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
  133. if (numNonmets === 3) { // 1.8.6
  134. if (this.count(nonmets, this, (_k, v) => v.order > 1) === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
  135. if (this.count(nonmets, this, (_k, v) => v.order === 1) === 3) {
  136. if (this.isNpl3(nonmets, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
  137. }
  138. return 'N.3';
  139. }
  140. return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
  141. }
  142. if (type_symbol1 === 'S') { // 1.9
  143. if (numNonmets === 3 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
  144. if (numNonmets === 4 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
  145. if (numNonmets >= 2 && this.count(bonds, this, (_k, v) => v.order === 1) >= 2) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8), 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
  146. return 'S.2'; // 1.9.4
  147. }
  148. return type_symbol1; // 1.11
  149. }
  150. // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
  151. // delocalised .AND. one other single bond is to H then atom_type is N.pl3
  152. // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
  153. // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
  154. // TODO cannot check accurately for delocalized bonds
  155. private isNpl3(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  156. const iter = nonmets.keys();
  157. let result = iter.next();
  158. while (!result.done) {
  159. const label_atom_id = result.value;
  160. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  161. if (this.count(adjacentBonds, this, (_k, v) => v.order > 1 || BondType.is(BondType.Flag.Aromatic, v.flags))) {
  162. // TODO check accurately for 2nd criterion with coordinates
  163. return true;
  164. }
  165. result = iter.next();
  166. }
  167. return false;
  168. }
  169. // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
  170. // forming only 1 non-metal bond then atom_type is O.co2
  171. private isOC(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  172. const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
  173. if (!nonmet[0].startsWith('C')) return false;
  174. const carbonBonds = bondMap.map.get(nonmet[0])!;
  175. if (carbonBonds.size !== 3) return false;
  176. let count = 0;
  177. const iter = carbonBonds.keys();
  178. let result = iter.next();
  179. while (!result.done) {
  180. const label_atom_id = result.value;
  181. if (label_atom_id.startsWith('O')) {
  182. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  183. if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === 1) count++;
  184. }
  185. result = iter.next();
  186. }
  187. return count === 2;
  188. }
  189. // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
  190. // only 1 non-metal bond then atom_type is O.co2
  191. private isOP(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  192. const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
  193. if (!nonmet[0].startsWith('P')) return false;
  194. const phosphorusBonds = bondMap.map.get(nonmet[0])!;
  195. if (phosphorusBonds.size < 2) return false;
  196. let count = 0;
  197. const iter = phosphorusBonds.keys();
  198. let result = iter.next();
  199. while (!result.done) {
  200. const label_atom_id = result.value;
  201. if (label_atom_id.startsWith('O')) {
  202. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  203. if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === 1) count++;
  204. }
  205. result = iter.next();
  206. }
  207. return count >= 2;
  208. }
  209. // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
  210. // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
  211. private isCat(currentBondMap: BondMap, bondMap: ComponentBond.Entry): boolean {
  212. const iter1 = currentBondMap.keys();
  213. let result1 = iter1.next();
  214. while (!result1.done) {
  215. const label_atom_id = result1.value;
  216. if (!label_atom_id.startsWith('N')) return false;
  217. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  218. if (adjacentBonds.size < 2) return false;
  219. const iter2 = adjacentBonds.keys();
  220. let result2 = iter2.next();
  221. while (!result2.done) {
  222. if (result2.value.startsWith('O')) return false;
  223. result2 = iter2.next();
  224. }
  225. result1 = iter1.next();
  226. }
  227. // TODO ensure no cycles
  228. return true;
  229. }
  230. private countOfOxygenWithSingleNonmet(nonmets: BondMap, bondMap: ComponentBond.Entry): number {
  231. let count = 0;
  232. const iter = nonmets.keys();
  233. let result = iter.next();
  234. while (!result.done) {
  235. const label_atom_id = result.value;
  236. if (label_atom_id.startsWith('O')) {
  237. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  238. if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k))) count++;
  239. }
  240. result = iter.next();
  241. }
  242. return count;
  243. }
  244. // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
  245. private hasCOCS(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  246. const iter = nonmets.keys();
  247. let result = iter.next();
  248. while (!result.done) {
  249. const label_atom_id = result.value;
  250. if (label_atom_id.startsWith('C')) {
  251. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  252. if (this.count(adjacentBonds, this, (k, v) => k.startsWith('O') || k.startsWith('S') && v.order === 2)) return true;
  253. }
  254. result = iter.next();
  255. }
  256. return false;
  257. }
  258. protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
  259. const { instance, source } = getCategoryInstanceData(category, context);
  260. const fields = instance.fields;
  261. const src = source[0];
  262. const data = src.data;
  263. const it = src.keys();
  264. const key = it.move();
  265. for (let _f = 0; _f < fields.length; _f++) {
  266. const f = fields[_f]!;
  267. StringBuilder.writeSafe(sb, `# ${category.name}.${f.name}: `);
  268. const val = f.value(key, data, 0);
  269. StringBuilder.writeSafe(sb, val as string);
  270. StringBuilder.newline(sb);
  271. }
  272. StringBuilder.newline(sb);
  273. }
  274. encode(): void {
  275. // write meta-information, do so after ctab
  276. if (this.error || this.metaInformation) {
  277. StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
  278. }
  279. StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.out));
  280. this.encoded = true;
  281. }
  282. constructor(encoder: string, metaInformation: boolean, hydrogens: boolean) {
  283. super(encoder, metaInformation, hydrogens);
  284. this.out = StringBuilder.create();
  285. }
  286. }