keywords.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. * @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> *
  4. * @author Koya Sakuma * This module was based on jmol transpiler from MolQL and modified in similar manner as pymol and vmd tranpilers.
  5. **/
  6. import { MolScriptBuilder } from '../../../mol-script/language/builder';
  7. const B = MolScriptBuilder;
  8. import * as h from '../helper';
  9. import { KeywordDict } from '../types';
  10. const Backbone = {
  11. nucleic: ['P', "O3'", "O5'", "C5'", "C4'", "C3'", 'OP1', 'OP2', 'O3*', 'O5*', 'C5*', 'C4*', 'C3*'],
  12. protein: ['C', 'N', 'CA', 'O']
  13. };
  14. function nucleicExpr() {
  15. return B.struct.combinator.merge([
  16. B.struct.generator.atomGroups({
  17. 'residue-test': B.core.set.has([
  18. B.core.type.set(['G', 'C', 'A', 'T', 'U', 'I', 'DG', 'DC', 'DA', 'DT', 'DU', 'DI', '+G', '+C', '+A', '+T', '+U', '+I']),
  19. B.ammp('label_comp_id')
  20. ])
  21. }),
  22. B.struct.filter.pick({
  23. 0: B.struct.generator.atomGroups({
  24. 'group-by': B.ammp('residueKey')
  25. }),
  26. test: B.core.logic.and([
  27. B.core.set.isSubset([
  28. // B.core.type.set([ 'P', 'O1P', 'O2P' ]),
  29. h.atomNameSet(['P']),
  30. B.ammpSet('label_atom_id')
  31. ]),
  32. ])
  33. }),
  34. B.struct.filter.pick({
  35. 0: B.struct.generator.atomGroups({
  36. 'group-by': B.ammp('residueKey')
  37. }),
  38. test: B.core.logic.or([
  39. B.core.set.isSubset([
  40. h.atomNameSet(["C1'", "C2'", "O3'", "C3'", "C4'", "C5'", "O5'"]),
  41. B.ammpSet('label_atom_id')
  42. ]),
  43. B.core.set.isSubset([
  44. h.atomNameSet(['C1*', 'C2*', 'O3*', 'C3*', 'C4*', 'C5*', 'O5*']),
  45. B.ammpSet('label_atom_id')
  46. ])
  47. ])
  48. })
  49. ]);
  50. }
  51. const ResDict = {
  52. aliphatic: ['ALA', 'GLY', 'ILE', 'LEU', 'VAL'],
  53. amino: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'ASX', 'GLX', 'UNK'],
  54. acidic: ['ASP', 'GLU'],
  55. aromatic: ['HIS', 'PHE', 'TRP', 'TYR'],
  56. basic: ['ARG', 'HIS', 'LYS'],
  57. buried: ['ALA', 'CYS', 'ILE', 'LEU', 'MET', 'PHE', 'TRP', 'VAL'],
  58. cg: ['CYT', 'C', 'GUA', 'G'],
  59. cyclic: ['HIS', 'PHE', 'PRO', 'TRP', 'TYR'],
  60. hydrophobic: ['ALA', 'GLY', 'ILE', 'LEU', 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'VAL'],
  61. large: ['ARG', 'GLU', 'GLN', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'TRP', 'TYR'],
  62. medium: ['ASN', 'ASP', 'CYS', 'PRO', 'THR', 'VAL'],
  63. small: ['ALA', 'GLY', 'SER'],
  64. nucleic: ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'],
  65. protein: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'CYX', 'GLN', 'GLU', 'GLY', 'HIS', 'HID', 'HIE', 'HIP', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'],
  66. solvent: ['HOH', 'WAT', 'H20', 'TIP', 'SOL']
  67. };
  68. export const keywords: KeywordDict = {
  69. // general terms
  70. all: {
  71. '@desc': 'all atoms; same as *',
  72. abbr: ['*'],
  73. map: () => B.struct.generator.all()
  74. },
  75. none: {
  76. '@desc': 'no atoms',
  77. map: () => B.struct.generator.empty()
  78. },
  79. selected: {
  80. '@desc': 'atoms that have been selected; defaults to all when a file is first loaded'
  81. },
  82. unitcell: {
  83. '@desc': 'atoms within the current unitcell, which may be offset. This includes atoms on the faces and at the vertices of the unitcell.'
  84. },
  85. ions: {
  86. '@desc': '(specifically the PDB designations "PO4" and "SO4")'
  87. },
  88. ligand: {
  89. '@desc': '(originally "hetero and not solvent"; changed to "!(protein,nucleic,water,UREA)" for Jmol 12.2)'
  90. },
  91. nucleic: {
  92. '@desc': 'any group that (a) has one of the following group names: G, C, A, T, U, I, DG, DC, DA, DT, DU, DI, +G, +C, +A, +T, +U, +I; or (b) can be identified as a group that is only one atom, with name "P"; or (c) has all of the following atoms (prime, \', can replace * here): C1*, C2*, C3*, O3*, C4*, C5*, and O5*.',
  93. map: () => nucleicExpr()
  94. },
  95. purine: {
  96. '@desc': 'any nucleic group that (a) has one of the following group names: A, G, I, DA, DG, DI, +A, +G, or +I; or (b) also has atoms N7, C8, and N9.',
  97. map: () => B.struct.modifier.intersectBy({
  98. 0: nucleicExpr(),
  99. by: B.struct.combinator.merge([
  100. B.struct.generator.atomGroups({
  101. 'residue-test': B.core.set.has([
  102. B.set(...['A', 'G', 'I', 'DA', 'DG', 'DI', '+A', '+G', '+I']),
  103. B.ammp('label_comp_id')
  104. ])
  105. }),
  106. B.struct.filter.pick({
  107. 0: B.struct.generator.atomGroups({
  108. 'group-by': B.ammp('residueKey')
  109. }),
  110. test: B.core.set.isSubset([
  111. h.atomNameSet(['N7', 'C8', 'N9']),
  112. B.ammpSet('label_atom_id')
  113. ])
  114. })
  115. ])
  116. })
  117. },
  118. pyrimidine: {
  119. '@desc': 'any nucleic group that (a) has one of the following group names: C, T, U, DC, DT, DU, +C, +T, +U; or (b) also has atom O2.',
  120. map: () => B.struct.modifier.intersectBy({
  121. 0: nucleicExpr(),
  122. by: B.struct.combinator.merge([
  123. B.struct.generator.atomGroups({
  124. 'residue-test': B.core.set.has([
  125. B.set(...['C', 'T', 'U', 'DC', 'DT', 'DU', '+C', '+T', '+U']),
  126. B.ammp('label_comp_id')
  127. ])
  128. }),
  129. B.struct.filter.pick({
  130. 0: B.struct.generator.atomGroups({
  131. 'group-by': B.ammp('residueKey')
  132. }),
  133. test: B.core.logic.or([
  134. B.core.set.has([
  135. B.ammpSet('label_atom_id'),
  136. B.atomName('O2*')
  137. ]),
  138. B.core.set.has([
  139. B.ammpSet('label_atom_id'),
  140. B.atomName("O2'")
  141. ])
  142. ])
  143. })
  144. ])
  145. })
  146. },
  147. dna: {
  148. '@desc': 'any nucleic group that (a) has one of the following group names: DG, DC, DA, DT, DU, DI, T, +G, +C, +A, +T; or (b) has neither atom O2* or O2\'.',
  149. map: () => B.struct.modifier.intersectBy({
  150. 0: nucleicExpr(),
  151. by: B.struct.combinator.merge([
  152. B.struct.generator.atomGroups({
  153. 'residue-test': B.core.set.has([
  154. B.set(...['DG', 'DC', 'DA', 'DT', 'DU', 'DI', 'T', '+G', '+C', '+A', '+T']),
  155. B.ammp('label_comp_id')
  156. ])
  157. }),
  158. B.struct.filter.pick({
  159. 0: B.struct.generator.atomGroups({
  160. 'group-by': B.ammp('residueKey')
  161. }),
  162. test: B.core.logic.not([
  163. B.core.logic.or([
  164. B.core.set.has([
  165. B.ammpSet('label_atom_id'),
  166. B.atomName('O2*')
  167. ]),
  168. B.core.set.has([
  169. B.ammpSet('label_atom_id'),
  170. B.atomName("O2'")
  171. ])
  172. ])
  173. ])
  174. })
  175. ])
  176. })
  177. },
  178. rna: {
  179. '@desc': 'any nucleic group that (a) has one of the following group names: G, C, A, U, I, +U, +I; or (b) has atom O2* or O2\'.',
  180. map: () => B.struct.modifier.intersectBy({
  181. 0: nucleicExpr(),
  182. by: B.struct.combinator.merge([
  183. B.struct.generator.atomGroups({
  184. 'residue-test': B.core.set.has([
  185. B.set(...['G', 'C', 'A', 'U', 'I', '+U', '+I']),
  186. B.ammp('label_comp_id')
  187. ])
  188. }),
  189. B.struct.filter.pick({
  190. 0: B.struct.generator.atomGroups({
  191. 'group-by': B.ammp('residueKey')
  192. }),
  193. test: B.core.logic.or([
  194. B.core.set.has([
  195. B.ammpSet('label_atom_id'),
  196. B.atomName('O2*')
  197. ]),
  198. B.core.set.has([
  199. B.ammpSet('label_atom_id'),
  200. B.atomName("O2'")
  201. ])
  202. ])
  203. })
  204. ])
  205. })
  206. },
  207. protein: {
  208. '@desc': 'defined as a group that (a) has one of the following group names: ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU}, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, ASX, GLX, or UNK; or (b) contains PDB atom designations [C, O, CA, and N] bonded correctly; or (c) does not contain "O" but contains [C, CA, and N] bonded correctly; or (d) has only one atom, which has name CA and does not have the group name CA (indicating a calcium atom).',
  209. map: () => proteinExpr()
  210. },
  211. acidic: {
  212. '@desc': 'ASP GLU',
  213. map: () => h.resnameExpr(ResDict.acidic)
  214. },
  215. acyclic: {
  216. '@desc': 'amino and not cyclic',
  217. map: () => B.struct.modifier.intersectBy({
  218. 0: h.resnameExpr(ResDict.amino),
  219. by: h.invertExpr(h.resnameExpr(ResDict.cyclic))
  220. })
  221. },
  222. aliphatic: {
  223. '@desc': 'ALA GLY ILE LEU VAL',
  224. map: () => h.resnameExpr(ResDict.aliphatic)
  225. },
  226. amino: {
  227. '@desc': 'all twenty standard amino acids, plus ASX, GLX, UNK',
  228. map: () => h.resnameExpr(ResDict.amino)
  229. },
  230. aromatic: {
  231. '@desc': 'HIS PHE TRP TYR (see also "isaromatic" for aromatic bonds)',
  232. map: () => h.resnameExpr(ResDict.aromatic)
  233. },
  234. basic: {
  235. '@desc': 'ARG HIS LYS',
  236. map: () => h.resnameExpr(ResDict.basic)
  237. },
  238. buried: {
  239. '@desc': 'ALA CYS ILE LEU MET PHE TRP VAL',
  240. map: () => h.resnameExpr(ResDict.buried)
  241. },
  242. charged: {
  243. '@desc': 'same as acidic or basic -- ASP GLU, ARG HIS LYS',
  244. map: () => h.resnameExpr(ResDict.acidic.concat(ResDict.basic))
  245. },
  246. cyclic: {
  247. '@desc': 'HIS PHE PRO TRP TYR',
  248. map: () => h.resnameExpr(ResDict.cyclic)
  249. },
  250. helix: {
  251. '@desc': 'secondary structure-related.',
  252. map: () => B.struct.generator.atomGroups({
  253. 'residue-test': B.core.flags.hasAny([
  254. B.struct.type.secondaryStructureFlags(['helix']),
  255. B.ammp('secondaryStructureFlags')
  256. ])
  257. })
  258. },
  259. hetero: {
  260. '@desc': 'PDB atoms designated as HETATM',
  261. map: () => B.struct.generator.atomGroups({
  262. 'atom-test': B.ammp('isHet')
  263. })
  264. },
  265. hydrophobic: {
  266. '@desc': 'ALA GLY ILE LEU MET PHE PRO TRP TYR VAL',
  267. map: () => h.resnameExpr(ResDict.hydrophobic)
  268. },
  269. large: {
  270. '@desc': 'ARG GLU GLN HIS ILE LEU LYS MET PHE TRP TYR',
  271. map: () => h.resnameExpr(ResDict.large)
  272. },
  273. medium: {
  274. '@desc': 'ASN ASP CYS PRO THR VAL',
  275. map: () => h.resnameExpr(ResDict.medium)
  276. },
  277. negative: {
  278. '@desc': 'same as acidic -- ASP GLU',
  279. map: () => h.resnameExpr(ResDict.acidic)
  280. },
  281. neutral: {
  282. '@desc': 'amino and not (acidic or basic)',
  283. map: () => B.struct.modifier.intersectBy({
  284. 0: h.resnameExpr(ResDict.amino),
  285. by: h.invertExpr(h.resnameExpr(ResDict.acidic.concat(ResDict.basic)))
  286. })
  287. },
  288. polar: {
  289. '@desc': 'amino and not hydrophobic',
  290. map: () => B.struct.modifier.intersectBy({
  291. 0: h.resnameExpr(ResDict.amino),
  292. by: h.invertExpr(h.resnameExpr(ResDict.hydrophobic))
  293. })
  294. },
  295. positive: {
  296. '@desc': 'same as basic -- ARG HIS LYS',
  297. map: () => h.resnameExpr(ResDict.basic)
  298. },
  299. sheet: {
  300. '@desc': 'secondary structure-related',
  301. map: () => B.struct.generator.atomGroups({
  302. 'residue-test': B.core.flags.hasAny([
  303. B.struct.type.secondaryStructureFlags(['sheet']),
  304. B.ammp('secondaryStructureFlags')
  305. ])
  306. })
  307. },
  308. small: {
  309. '@desc': 'ALA GLY SER',
  310. map: () => h.resnameExpr(ResDict.small)
  311. },
  312. turn: {
  313. '@desc': 'secondary structure-related',
  314. map: () => B.struct.generator.atomGroups({
  315. 'residue-test': B.core.flags.hasAny([
  316. B.struct.type.secondaryStructureFlags(['turn']),
  317. B.ammp('secondaryStructureFlags')
  318. ])
  319. })
  320. },
  321. alpha: {
  322. '@desc': '(*.CA)',
  323. map: () => B.struct.generator.atomGroups({
  324. 'atom-test': B.core.rel.eq([
  325. B.atomName('CA'),
  326. B.ammp('label_atom_id')
  327. ])
  328. })
  329. },
  330. base: {
  331. '@desc': '(nucleic bases)'
  332. },
  333. backbone: {
  334. '@desc': '(*.C, *.CA, *.N, and all nucleic other than the bases themselves)',
  335. abbr: ['mainchain'],
  336. map: () => backboneExpr()
  337. },
  338. sidechain: {
  339. '@desc': '((protein or nucleic) and not backbone)'
  340. },
  341. solvent: {
  342. '@desc': 'PDB "HOH", water, also the connected set of H-O-H in any model',
  343. map: () => h.resnameExpr(ResDict.solvent)
  344. },
  345. };
  346. function backboneExpr() {
  347. return B.struct.combinator.merge([
  348. B.struct.generator.queryInSelection({
  349. 0: proteinExpr(),
  350. query: B.struct.generator.atomGroups({
  351. 'atom-test': B.core.set.has([
  352. h.atomNameSet(Backbone.protein),
  353. B.ammp('label_atom_id')
  354. ])
  355. })
  356. }),
  357. B.struct.generator.queryInSelection({
  358. 0: nucleicExpr(),
  359. query: B.struct.generator.atomGroups({
  360. 'atom-test': B.core.set.has([
  361. h.atomNameSet(Backbone.nucleic),
  362. B.ammp('label_atom_id')
  363. ])
  364. })
  365. })
  366. ]);
  367. }
  368. function proteinExpr() {
  369. return B.struct.filter.pick({
  370. 0: B.struct.generator.atomGroups({
  371. 'group-by': B.ammp('residueKey')
  372. }),
  373. test: B.core.set.isSubset([
  374. h.atomNameSet(['C', 'N', 'CA', 'O']),
  375. B.ammpSet('label_atom_id')
  376. ])
  377. });
  378. }