keywords.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. * @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> *
  4. * @author Koya Sakuma * This module was based on jmol transpiler from MolQL and modified in similar manner as pymol and vmd tranpilers.
  5. **/
  6. import { MolScriptBuilder } from '../../../mol-script/language/builder';
  7. const B = MolScriptBuilder;
  8. import * as h from '../helper';
  9. import { KeywordDict } from '../types';
  10. const Backbone = {
  11. nucleic: ['P', "O3'", "O5'", "C5'", "C4'", "C3'", 'OP1', 'OP2', 'O3*', 'O5*', 'C5*', 'C4*', 'C3*'],
  12. protein: ['C', 'N', 'CA', 'O']
  13. };
  14. function nucleicExpr() {
  15. return B.struct.combinator.merge([
  16. B.struct.generator.atomGroups({
  17. 'residue-test': B.core.set.has([
  18. B.set(...['G', 'C', 'A', 'T', 'U', 'I', 'DG', 'DC', 'DA', 'DT', 'DU', 'DI', '+G', '+C', '+A', '+T', '+U', '+I']),
  19. B.ammp('label_comp_id')
  20. ])
  21. }),
  22. B.struct.filter.pick({
  23. 0: B.struct.generator.atomGroups({
  24. 'group-by': B.ammp('residueKey')
  25. }),
  26. test: B.core.logic.and([
  27. B.core.rel.eq([B.struct.atomSet.atomCount(), 1]),
  28. B.core.rel.eq([B.ammp('label_atom_id'), B.atomName('P')]),
  29. ])
  30. }),
  31. B.struct.filter.pick({
  32. 0: B.struct.generator.atomGroups({
  33. 'group-by': B.ammp('residueKey')
  34. }),
  35. test: B.core.logic.or([
  36. B.core.set.isSubset([
  37. h.atomNameSet(["C1'", "C2'", "O3'", "C3'", "C4'", "C5'", "O5'"]),
  38. B.ammpSet('label_atom_id')
  39. ]),
  40. B.core.set.isSubset([
  41. h.atomNameSet(['C1*', 'C2*', 'O3*', 'C3*', 'C4*', 'C5*', 'O5*']),
  42. B.ammpSet('label_atom_id')
  43. ])
  44. ])
  45. })
  46. ]);
  47. }
  48. const ResDict = {
  49. aliphatic: ['ALA', 'GLY', 'ILE', 'LEU', 'VAL'],
  50. amino: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'ASX', 'GLX', 'UNK'],
  51. acidic: ['ASP', 'GLU'],
  52. aromatic: ['HIS', 'PHE', 'TRP', 'TYR'],
  53. basic: ['ARG', 'HIS', 'LYS'],
  54. buried: ['ALA', 'CYS', 'ILE', 'LEU', 'MET', 'PHE', 'TRP', 'VAL'],
  55. cg: ['CYT', 'C', 'GUA', 'G'],
  56. cyclic: ['HIS', 'PHE', 'PRO', 'TRP', 'TYR'],
  57. hydrophobic: ['ALA', 'GLY', 'ILE', 'LEU', 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'VAL'],
  58. large: ['ARG', 'GLU', 'GLN', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'TRP', 'TYR'],
  59. medium: ['ASN', 'ASP', 'CYS', 'PRO', 'THR', 'VAL'],
  60. small: ['ALA', 'GLY', 'SER'],
  61. nucleic: ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'],
  62. protein: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'CYX', 'GLN', 'GLU', 'GLY', 'HIS', 'HID', 'HIE', 'HIP', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'],
  63. solvent: ['HOH', 'WAT', 'H20', 'TIP', 'SOL']
  64. };
  65. export const keywords: KeywordDict = {
  66. // general terms
  67. all: {
  68. '@desc': 'all atoms; same as *',
  69. abbr: ['*'],
  70. map: () => B.struct.generator.all()
  71. },
  72. none: {
  73. '@desc': 'no atoms',
  74. map: () => B.struct.generator.empty()
  75. },
  76. selected: {
  77. '@desc': 'atoms that have been selected; defaults to all when a file is first loaded'
  78. },
  79. unitcell: {
  80. '@desc': 'atoms within the current unitcell, which may be offset. This includes atoms on the faces and at the vertices of the unitcell.'
  81. },
  82. ions: {
  83. '@desc': '(specifically the PDB designations "PO4" and "SO4")'
  84. },
  85. ligand: {
  86. '@desc': '(originally "hetero and not solvent"; changed to "!(protein,nucleic,water,UREA)" for Jmol 12.2)'
  87. },
  88. nucleic: {
  89. '@desc': 'any group that (a) has one of the following group names: G, C, A, T, U, I, DG, DC, DA, DT, DU, DI, +G, +C, +A, +T, +U, +I; or (b) can be identified as a group that is only one atom, with name "P"; or (c) has all of the following atoms (prime, \', can replace * here): C1*, C2*, C3*, O3*, C4*, C5*, and O5*.',
  90. map: () => nucleicExpr()
  91. },
  92. purine: {
  93. '@desc': 'any nucleic group that (a) has one of the following group names: A, G, I, DA, DG, DI, +A, +G, or +I; or (b) also has atoms N7, C8, and N9.',
  94. map: () => B.struct.modifier.intersectBy({
  95. 0: nucleicExpr(),
  96. by: B.struct.combinator.merge([
  97. B.struct.generator.atomGroups({
  98. 'residue-test': B.core.set.has([
  99. B.set(...['A', 'G', 'I', 'DA', 'DG', 'DI', '+A', '+G', '+I']),
  100. B.ammp('label_comp_id')
  101. ])
  102. }),
  103. B.struct.filter.pick({
  104. 0: B.struct.generator.atomGroups({
  105. 'group-by': B.ammp('residueKey')
  106. }),
  107. test: B.core.set.isSubset([
  108. h.atomNameSet(['N7', 'C8', 'N9']),
  109. B.ammpSet('label_atom_id')
  110. ])
  111. })
  112. ])
  113. })
  114. },
  115. pyrimidine: {
  116. '@desc': 'any nucleic group that (a) has one of the following group names: C, T, U, DC, DT, DU, +C, +T, +U; or (b) also has atom O2.',
  117. map: () => B.struct.modifier.intersectBy({
  118. 0: nucleicExpr(),
  119. by: B.struct.combinator.merge([
  120. B.struct.generator.atomGroups({
  121. 'residue-test': B.core.set.has([
  122. B.set(...['C', 'T', 'U', 'DC', 'DT', 'DU', '+C', '+T', '+U']),
  123. B.ammp('label_comp_id')
  124. ])
  125. }),
  126. B.struct.filter.pick({
  127. 0: B.struct.generator.atomGroups({
  128. 'group-by': B.ammp('residueKey')
  129. }),
  130. test: B.core.logic.or([
  131. B.core.set.has([
  132. B.ammpSet('label_atom_id'),
  133. B.atomName('O2*')
  134. ]),
  135. B.core.set.has([
  136. B.ammpSet('label_atom_id'),
  137. B.atomName("O2'")
  138. ])
  139. ])
  140. })
  141. ])
  142. })
  143. },
  144. dna: {
  145. '@desc': 'any nucleic group that (a) has one of the following group names: DG, DC, DA, DT, DU, DI, T, +G, +C, +A, +T; or (b) has neither atom O2* or O2\'.',
  146. map: () => B.struct.modifier.intersectBy({
  147. 0: nucleicExpr(),
  148. by: B.struct.combinator.merge([
  149. B.struct.generator.atomGroups({
  150. 'residue-test': B.core.set.has([
  151. B.set(...['DG', 'DC', 'DA', 'DT', 'DU', 'DI', 'T', '+G', '+C', '+A', '+T']),
  152. B.ammp('label_comp_id')
  153. ])
  154. }),
  155. B.struct.filter.pick({
  156. 0: B.struct.generator.atomGroups({
  157. 'group-by': B.ammp('residueKey')
  158. }),
  159. test: B.core.logic.not([
  160. B.core.logic.or([
  161. B.core.set.has([
  162. B.ammpSet('label_atom_id'),
  163. B.atomName('O2*')
  164. ]),
  165. B.core.set.has([
  166. B.ammpSet('label_atom_id'),
  167. B.atomName("O2'")
  168. ])
  169. ])
  170. ])
  171. })
  172. ])
  173. })
  174. },
  175. rna: {
  176. '@desc': 'any nucleic group that (a) has one of the following group names: G, C, A, U, I, +U, +I; or (b) has atom O2* or O2\'.',
  177. map: () => B.struct.modifier.intersectBy({
  178. 0: nucleicExpr(),
  179. by: B.struct.combinator.merge([
  180. B.struct.generator.atomGroups({
  181. 'residue-test': B.core.set.has([
  182. B.set(...['G', 'C', 'A', 'U', 'I', '+U', '+I']),
  183. B.ammp('label_comp_id')
  184. ])
  185. }),
  186. B.struct.filter.pick({
  187. 0: B.struct.generator.atomGroups({
  188. 'group-by': B.ammp('residueKey')
  189. }),
  190. test: B.core.logic.or([
  191. B.core.set.has([
  192. B.ammpSet('label_atom_id'),
  193. B.atomName('O2*')
  194. ]),
  195. B.core.set.has([
  196. B.ammpSet('label_atom_id'),
  197. B.atomName("O2'")
  198. ])
  199. ])
  200. })
  201. ])
  202. })
  203. },
  204. protein: {
  205. '@desc': 'defined as a group that (a) has one of the following group names: ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU}, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, ASX, GLX, or UNK; or (b) contains PDB atom designations [C, O, CA, and N] bonded correctly; or (c) does not contain "O" but contains [C, CA, and N] bonded correctly; or (d) has only one atom, which has name CA and does not have the group name CA (indicating a calcium atom).',
  206. map: () => proteinExpr()
  207. },
  208. acidic: {
  209. '@desc': 'ASP GLU',
  210. map: () => h.resnameExpr(ResDict.acidic)
  211. },
  212. acyclic: {
  213. '@desc': 'amino and not cyclic',
  214. map: () => B.struct.modifier.intersectBy({
  215. 0: h.resnameExpr(ResDict.amino),
  216. by: h.invertExpr(h.resnameExpr(ResDict.cyclic))
  217. })
  218. },
  219. aliphatic: {
  220. '@desc': 'ALA GLY ILE LEU VAL',
  221. map: () => h.resnameExpr(ResDict.aliphatic)
  222. },
  223. amino: {
  224. '@desc': 'all twenty standard amino acids, plus ASX, GLX, UNK',
  225. map: () => h.resnameExpr(ResDict.amino)
  226. },
  227. aromatic: {
  228. '@desc': 'HIS PHE TRP TYR (see also "isaromatic" for aromatic bonds)',
  229. map: () => h.resnameExpr(ResDict.aromatic)
  230. },
  231. basic: {
  232. '@desc': 'ARG HIS LYS',
  233. map: () => h.resnameExpr(ResDict.basic)
  234. },
  235. buried: {
  236. '@desc': 'ALA CYS ILE LEU MET PHE TRP VAL',
  237. map: () => h.resnameExpr(ResDict.buried)
  238. },
  239. charged: {
  240. '@desc': 'same as acidic or basic -- ASP GLU, ARG HIS LYS',
  241. map: () => h.resnameExpr(ResDict.acidic.concat(ResDict.basic))
  242. },
  243. cyclic: {
  244. '@desc': 'HIS PHE PRO TRP TYR',
  245. map: () => h.resnameExpr(ResDict.cyclic)
  246. },
  247. helix: {
  248. '@desc': 'secondary structure-related.',
  249. map: () => B.struct.generator.atomGroups({
  250. 'residue-test': B.core.flags.hasAny([
  251. B.struct.type.secondaryStructureFlags(['helix']),
  252. B.ammp('secondaryStructureFlags')
  253. ])
  254. })
  255. },
  256. hetero: {
  257. '@desc': 'PDB atoms designated as HETATM',
  258. map: () => B.struct.generator.atomGroups({
  259. 'atom-test': B.ammp('isHet')
  260. })
  261. },
  262. hydrophobic: {
  263. '@desc': 'ALA GLY ILE LEU MET PHE PRO TRP TYR VAL',
  264. map: () => h.resnameExpr(ResDict.hydrophobic)
  265. },
  266. large: {
  267. '@desc': 'ARG GLU GLN HIS ILE LEU LYS MET PHE TRP TYR',
  268. map: () => h.resnameExpr(ResDict.large)
  269. },
  270. medium: {
  271. '@desc': 'ASN ASP CYS PRO THR VAL',
  272. map: () => h.resnameExpr(ResDict.medium)
  273. },
  274. negative: {
  275. '@desc': 'same as acidic -- ASP GLU',
  276. map: () => h.resnameExpr(ResDict.acidic)
  277. },
  278. neutral: {
  279. '@desc': 'amino and not (acidic or basic)',
  280. map: () => B.struct.modifier.intersectBy({
  281. 0: h.resnameExpr(ResDict.amino),
  282. by: h.invertExpr(h.resnameExpr(ResDict.acidic.concat(ResDict.basic)))
  283. })
  284. },
  285. polar: {
  286. '@desc': 'amino and not hydrophobic',
  287. map: () => B.struct.modifier.intersectBy({
  288. 0: h.resnameExpr(ResDict.amino),
  289. by: h.invertExpr(h.resnameExpr(ResDict.hydrophobic))
  290. })
  291. },
  292. positive: {
  293. '@desc': 'same as basic -- ARG HIS LYS',
  294. map: () => h.resnameExpr(ResDict.basic)
  295. },
  296. sheet: {
  297. '@desc': 'secondary structure-related',
  298. map: () => B.struct.generator.atomGroups({
  299. 'residue-test': B.core.flags.hasAny([
  300. B.struct.type.secondaryStructureFlags(['sheet']),
  301. B.ammp('secondaryStructureFlags')
  302. ])
  303. })
  304. },
  305. small: {
  306. '@desc': 'ALA GLY SER',
  307. map: () => h.resnameExpr(ResDict.small)
  308. },
  309. turn: {
  310. '@desc': 'secondary structure-related',
  311. map: () => B.struct.generator.atomGroups({
  312. 'residue-test': B.core.flags.hasAny([
  313. B.struct.type.secondaryStructureFlags(['turn']),
  314. B.ammp('secondaryStructureFlags')
  315. ])
  316. })
  317. },
  318. alpha: {
  319. '@desc': '(*.CA)',
  320. map: () => B.struct.generator.atomGroups({
  321. 'atom-test': B.core.rel.eq([
  322. B.atomName('CA'),
  323. B.ammp('label_atom_id')
  324. ])
  325. })
  326. },
  327. base: {
  328. '@desc': '(nucleic bases)'
  329. },
  330. backbone: {
  331. '@desc': '(*.C, *.CA, *.N, and all nucleic other than the bases themselves)',
  332. abbr: ['mainchain'],
  333. map: () => backboneExpr()
  334. },
  335. sidechain: {
  336. '@desc': '((protein or nucleic) and not backbone)'
  337. },
  338. solvent: {
  339. '@desc': 'PDB "HOH", water, also the connected set of H-O-H in any model',
  340. map: () => h.resnameExpr(ResDict.solvent)
  341. },
  342. };
  343. function backboneExpr() {
  344. return B.struct.combinator.merge([
  345. B.struct.generator.queryInSelection({
  346. 0: proteinExpr(),
  347. query: B.struct.generator.atomGroups({
  348. 'atom-test': B.core.set.has([
  349. h.atomNameSet(Backbone.protein),
  350. B.ammp('label_atom_id')
  351. ])
  352. })
  353. }),
  354. B.struct.generator.queryInSelection({
  355. 0: nucleicExpr(),
  356. query: B.struct.generator.atomGroups({
  357. 'atom-test': B.core.set.has([
  358. h.atomNameSet(Backbone.nucleic),
  359. B.ammp('label_atom_id')
  360. ])
  361. })
  362. })
  363. ]);
  364. }
  365. function proteinExpr() {
  366. return B.struct.filter.pick({
  367. 0: B.struct.generator.atomGroups({
  368. 'group-by': B.ammp('residueKey')
  369. }),
  370. test: B.core.set.isSubset([
  371. h.atomNameSet(['C', 'N', 'CA', 'O']),
  372. B.ammpSet('label_atom_id')
  373. ])
  374. });
  375. }