encoder.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /**
  2. * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
  5. */
  6. import { Category } from '../cif/encoder';
  7. import { LigandEncoder } from '../ligand-encoder';
  8. import { StringBuilder } from '../../../mol-util';
  9. import { getCategoryInstanceData } from '../cif/encoder/util';
  10. import { BondType } from '../../../mol-model/structure/model/types';
  11. import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/chem_comp';
  12. // type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
  13. // type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
  14. const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
  15. type BondMap = Map<string, { order: number, flags: number }>;
  16. // specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
  17. export class Mol2Encoder extends LigandEncoder {
  18. private out: StringBuilder;
  19. _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx): void {
  20. const a = StringBuilder.create();
  21. const b = StringBuilder.create();
  22. const { instance, source } = getCategoryInstanceData(category, context);
  23. // write header
  24. const name = this.getName(instance, source);
  25. StringBuilder.writeSafe(this.builder, `# Name: ${name}\n# Created by ${this.encoder}\n\n`);
  26. const bondMap = this.componentBondData.entries.get(name)!;
  27. let bondCount = 0;
  28. const atoms = this.getAtoms(instance, source);
  29. StringBuilder.writeSafe(a, '@<TRIPOS>ATOM\n');
  30. StringBuilder.writeSafe(b, '@<TRIPOS>BOND\n');
  31. atoms.forEach((atom1, label_atom_id1) => {
  32. const { index: i1 } = atom1;
  33. bondMap.map.get(label_atom_id1)!.forEach((bond, label_atom_id2) => {
  34. const atom2 = atoms.get(label_atom_id2);
  35. if (!atom2) return;
  36. const { index: i2, type_symbol: type_symbol2 } = atom2;
  37. if (i1 < i2 && !this.skipHydrogen(type_symbol2)) {
  38. const { order, flags } = bond;
  39. const ar = BondType.is(BondType.Flag.Aromatic, flags);
  40. StringBuilder.writeSafe(b, `${++bondCount} ${i1 + 1} ${i2 + 1} ${ar ? 'ar' : order}`);
  41. StringBuilder.newline(b);
  42. }
  43. });
  44. const sybyl = this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap);
  45. StringBuilder.writeSafe(a, `${i1 + 1} ${label_atom_id1} ${atom1.Cartn_x.toFixed(3)} ${atom1.Cartn_y.toFixed(3)} ${atom1.Cartn_z.toFixed(3)} ${sybyl} 1 ${name} 0.000\n`);
  46. });
  47. // could write something like 'SMALL\nNO_CHARGES', for now let's write **** indicating non-optional, yet missing, string values
  48. StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.size} ${bondCount} 1\n****\n****\n\n`);
  49. StringBuilder.writeSafe(this.out, StringBuilder.getString(a));
  50. StringBuilder.writeSafe(this.out, StringBuilder.getString(b));
  51. StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n1 ${name} 1\n`);
  52. }
  53. private count<K, V>(map: Map<K, V>, predicate: (k: K, v: V) => boolean): number {
  54. let count = 0;
  55. const iter = map.entries();
  56. let result = iter.next();
  57. while (!result.done) {
  58. if (predicate(result.value[0], result.value[1])) {
  59. count++;
  60. }
  61. result = iter.next();
  62. }
  63. return count;
  64. }
  65. private orderSum(map: BondMap): number {
  66. let sum = 0;
  67. const iter = map.values();
  68. let result = iter.next();
  69. while (!result.done) {
  70. sum += result.value.order;
  71. result = iter.next();
  72. }
  73. return sum;
  74. }
  75. private isNonMetalBond(label_atom_id: string): boolean {
  76. for (const a of NON_METAL_ATOMS) {
  77. if (label_atom_id.startsWith(a)) return true;
  78. }
  79. return false;
  80. }
  81. private extractNonmets(map: BondMap): BondMap {
  82. const ret = new Map<string, { order: number, flags: number }>();
  83. const iter = map.entries();
  84. let result = iter.next();
  85. while (!result.done) {
  86. const [k, v] = result.value;
  87. if (NON_METAL_ATOMS.some(a => k.startsWith(a))) {
  88. ret.set(k, v);
  89. }
  90. result = iter.next();
  91. }
  92. return ret;
  93. }
  94. // see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
  95. // cannot account for covalently bound amino acids etc
  96. private mapToSybyl(label_atom_id1: string, type_symbol1: string, bondMap: ComponentBond.Entry) {
  97. // TODO if altLoc: 'Du' // 1.1
  98. // TODO if end of polymeric bond: 'Du' // 1.2
  99. if (type_symbol1 === 'D') return 'H'; // 1.3
  100. if (type_symbol1 === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
  101. if (type_symbol1 === 'Co' || type_symbol1 === 'Ru') return type_symbol1 + '.oh'; // 1.5
  102. const bonds = bondMap.map.get(label_atom_id1)!;
  103. const numBonds = bonds.size;
  104. if (type_symbol1 === 'Ti' || type_symbol1 === 'Cr') { // 1.10
  105. return type_symbol1 + (numBonds <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
  106. }
  107. if (type_symbol1 === 'C') { // 1.6
  108. if (numBonds >= 4 && this.count(bonds, (_k, v) => v.order === 1) >= 4) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
  109. if (numBonds === 3 && this.isCat(bonds, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
  110. if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
  111. if ((numBonds === 1 || numBonds === 2) && this.count(bonds, (_k, v) => v.order === 3)) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
  112. return 'C.2'; // 1.6.5
  113. }
  114. // most of the time, bonds will equal non-metal bonds
  115. const nonmets = this.count(bonds, (k, _v) => this.isNonMetalBond(k)) === bonds.size ? bonds : this.extractNonmets(bonds);
  116. const numNonmets = nonmets.size;
  117. if (type_symbol1 === 'O') { // 1.7
  118. if (numNonmets === 1) { // 1.7.1
  119. if (this.isOC(nonmets, bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
  120. if (this.isOP(nonmets, bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
  121. }
  122. if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) === bonds.size) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
  123. return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
  124. }
  125. if (type_symbol1 === 'N') { // 1.8
  126. if (numNonmets === 4 && this.count(nonmets, (_k, v) => v.order === 1) === 4) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
  127. if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
  128. if (numNonmets === 1 && this.count(nonmets, (_k, v) => v.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
  129. if (numNonmets === 2 && this.orderSum(nonmets) === 4) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
  130. if (numNonmets === 3 && this.hasCOCS(nonmets, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
  131. if (numNonmets === 3) { // 1.8.6
  132. if (this.count(nonmets, (_k, v) => v.order > 1) === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
  133. if (this.count(nonmets, (_k, v) => v.order === 1) === 3) {
  134. if (this.isNpl3(nonmets, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
  135. }
  136. return 'N.3';
  137. }
  138. return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
  139. }
  140. if (type_symbol1 === 'S') { // 1.9
  141. if (numNonmets === 3 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
  142. if (numNonmets === 4 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
  143. if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) >= 2) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8), 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
  144. return 'S.2'; // 1.9.4
  145. }
  146. return type_symbol1; // 1.11
  147. }
  148. // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
  149. // delocalised .AND. one other single bond is to H then atom_type is N.pl3
  150. // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
  151. // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
  152. // TODO cannot check accurately for delocalized bonds
  153. private isNpl3(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  154. const iter = nonmets.keys();
  155. let result = iter.next();
  156. while (!result.done) {
  157. const label_atom_id = result.value;
  158. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  159. if (this.count(adjacentBonds, (_k, v) => v.order > 1 || BondType.is(BondType.Flag.Aromatic, v.flags))) {
  160. // TODO check accurately for 2nd criterion with coordinates
  161. return true;
  162. }
  163. result = iter.next();
  164. }
  165. return false;
  166. }
  167. // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
  168. // forming only 1 non-metal bond then atom_type is O.co2
  169. private isOC(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  170. const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
  171. if (!nonmet[0].startsWith('C')) return false;
  172. const carbonBonds = bondMap.map.get(nonmet[0])!;
  173. if (carbonBonds.size !== 3) return false;
  174. let count = 0;
  175. const iter = carbonBonds.keys();
  176. let result = iter.next();
  177. while (!result.done) {
  178. const label_atom_id = result.value;
  179. if (label_atom_id.startsWith('O')) {
  180. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  181. if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
  182. }
  183. result = iter.next();
  184. }
  185. return count === 2;
  186. }
  187. // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
  188. // only 1 non-metal bond then atom_type is O.co2
  189. private isOP(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  190. const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
  191. if (!nonmet[0].startsWith('P')) return false;
  192. const phosphorusBonds = bondMap.map.get(nonmet[0])!;
  193. if (phosphorusBonds.size < 2) return false;
  194. let count = 0;
  195. const iter = phosphorusBonds.keys();
  196. let result = iter.next();
  197. while (!result.done) {
  198. const label_atom_id = result.value;
  199. if (label_atom_id.startsWith('O')) {
  200. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  201. if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
  202. }
  203. result = iter.next();
  204. }
  205. return count >= 2;
  206. }
  207. // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
  208. // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
  209. private isCat(currentBondMap: BondMap, bondMap: ComponentBond.Entry): boolean {
  210. const iter1 = currentBondMap.keys();
  211. let result1 = iter1.next();
  212. while (!result1.done) {
  213. const label_atom_id = result1.value;
  214. if (!label_atom_id.startsWith('N')) return false;
  215. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  216. if (adjacentBonds.size < 2) return false;
  217. const iter2 = adjacentBonds.keys();
  218. let result2 = iter2.next();
  219. while (!result2.done) {
  220. if (result2.value.startsWith('O')) return false;
  221. result2 = iter2.next();
  222. }
  223. result1 = iter1.next();
  224. }
  225. // TODO ensure no cycles
  226. return true;
  227. }
  228. private countOfOxygenWithSingleNonmet(nonmets: BondMap, bondMap: ComponentBond.Entry): number {
  229. let count = 0;
  230. const iter = nonmets.keys();
  231. let result = iter.next();
  232. while (!result.done) {
  233. const label_atom_id = result.value;
  234. if (label_atom_id.startsWith('O')) {
  235. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  236. if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k))) count++;
  237. }
  238. result = iter.next();
  239. }
  240. return count;
  241. }
  242. // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
  243. private hasCOCS(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
  244. const iter = nonmets.keys();
  245. let result = iter.next();
  246. while (!result.done) {
  247. const label_atom_id = result.value;
  248. if (label_atom_id.startsWith('C')) {
  249. const adjacentBonds = bondMap.map.get(label_atom_id)!;
  250. if (this.count(adjacentBonds, (k, v) => k.startsWith('O') || k.startsWith('S') && v.order === 2)) return true;
  251. }
  252. result = iter.next();
  253. }
  254. return false;
  255. }
  256. protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
  257. const { instance, source } = getCategoryInstanceData(category, context);
  258. const fields = instance.fields;
  259. const src = source[0];
  260. const data = src.data;
  261. const it = src.keys();
  262. const key = it.move();
  263. for (let _f = 0; _f < fields.length; _f++) {
  264. const f = fields[_f]!;
  265. StringBuilder.writeSafe(sb, `# ${category.name}.${f.name}: `);
  266. const val = f.value(key, data, 0);
  267. StringBuilder.writeSafe(sb, val as string);
  268. StringBuilder.newline(sb);
  269. }
  270. StringBuilder.newline(sb);
  271. }
  272. encode(): void {
  273. // write meta-information, do so after ctab
  274. if (this.error || this.metaInformation) {
  275. StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
  276. }
  277. StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.out));
  278. this.encoded = true;
  279. }
  280. constructor(encoder: string, metaInformation: boolean, hydrogens: boolean) {
  281. super(encoder, metaInformation, hydrogens);
  282. this.out = StringBuilder.create();
  283. }
  284. }