parser.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  4. * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  5. * @author Koya Sakuma
  6. * This module is based on jmol tranpiler from MolQL and modified in similar manner as pymol and vmd tranpilers.
  7. **/
  8. import * as P from '../../../mol-util/monadic-parser';
  9. import * as h from '../helper';
  10. import { MolScriptBuilder } from '../../../mol-script/language/builder';
  11. const B = MolScriptBuilder;
  12. import { properties, structureMap, structureDict } from './properties';
  13. import { special_properties } from './special_properties';
  14. import { special_keywords } from './special_keywords';
  15. import { special_operators } from './special_operators';
  16. import { operators } from './operators';
  17. import { keywords } from './keywords';
  18. import { AtomGroupArgs } from '../types';
  19. import { Transpiler } from '../transpiler';
  20. import { OperatorList } from '../types';
  21. // const propertiesDict = h.getPropertyRules(properties);
  22. // const slash = P.MonadicParser.string('/');
  23. const propertiesDict = h.getPropertyRules(special_properties);
  24. const slash = P.MonadicParser.string('/');
  25. const dot = P.MonadicParser.string('.');
  26. const colon = P.MonadicParser.string(':');
  27. const comma = P.MonadicParser.string(',');
  28. const star = P.MonadicParser.string('*');
  29. const bra = P.MonadicParser.string('[');
  30. const ket = P.MonadicParser.string(']');
  31. const numbers = P.MonadicParser.regexp(/[0-9]/);
  32. /* is Parser -> MonadicParser substitution correct? */
  33. function orNull(rule: P.MonadicParser<any>) {
  34. return rule.or(P.MonadicParser.of(null));
  35. }
  36. function atomSelectionQuery2(x: any) {
  37. const tests: AtomGroupArgs = {};
  38. const props: { [k: string]: any[] } = {};
  39. for (const k in x) {
  40. const ps = special_properties[k];
  41. if (!ps) {
  42. throw new Error(`property '${k}' not supported, value '${x[k]}'`);
  43. }
  44. if (x[k] === null) continue;
  45. if (!props[ps.level]) props[ps.level] = [];
  46. props[ps.level].push(x[k]);
  47. }
  48. for (const p in props) {
  49. tests[p] = h.andExpr(props[p]);
  50. }
  51. return B.struct.generator.atomGroups(tests);
  52. }
  53. // <, <=, =, >=, >, !=, and LIKE
  54. const valueOperators: OperatorList = [
  55. {
  56. '@desc': 'value comparisons',
  57. '@examples': [],
  58. name: '=',
  59. abbr: ['=='],
  60. type: h.binaryLeft,
  61. rule: P.MonadicParser.regexp(/\s*(LIKE|>=|<=|=|!=|>|<)\s*/i, 1),
  62. map: (op, e1, e2) => {
  63. // console.log(op, e1, e2)
  64. let expr;
  65. if (e1 === 'structure') {
  66. expr = B.core.flags.hasAny([B.ammp('secondaryStructureFlags'), structureMap(e2)]);
  67. } else if (e2 === 'structure') {
  68. expr = B.core.flags.hasAny([B.ammp('secondaryStructureFlags'), structureMap(e1)]);
  69. } else if (e1.head !== undefined) {
  70. if (e1.head.name === 'core.type.regex') {
  71. expr = B.core.str.match([e1, B.core.type.str([e2])]);
  72. }
  73. } else if (e2.head !== undefined) {
  74. if (e2.head.name === 'core.type.regex') {
  75. expr = B.core.str.match([e2, B.core.type.str([e1])]);
  76. }
  77. } else if (op.toUpperCase() === 'LIKE') {
  78. if (e1.head) {
  79. expr = B.core.str.match([
  80. B.core.type.regex([`^${e2}$`, 'i']),
  81. B.core.type.str([e1])
  82. ]);
  83. } else {
  84. expr = B.core.str.match([
  85. B.core.type.regex([`^${e1}$`, 'i']),
  86. B.core.type.str([e2])
  87. ]);
  88. }
  89. }
  90. if (!expr) {
  91. if (e1.head) e2 = h.wrapValue(e1, e2);
  92. if (e2.head) e1 = h.wrapValue(e2, e1);
  93. switch (op) {
  94. case '=':
  95. expr = B.core.rel.eq([e1, e2]);
  96. break;
  97. case '!=':
  98. expr = B.core.rel.neq([e1, e2]);
  99. break;
  100. case '>':
  101. expr = B.core.rel.gr([e1, e2]);
  102. break;
  103. case '<':
  104. expr = B.core.rel.lt([e1, e2]);
  105. break;
  106. case '>=':
  107. expr = B.core.rel.gre([e1, e2]);
  108. break;
  109. case '<=':
  110. expr = B.core.rel.lte([e1, e2]);
  111. break;
  112. default: throw new Error(`value operator '${op}' not supported`);
  113. }
  114. }
  115. return B.struct.generator.atomGroups({ 'atom-test': expr });
  116. }
  117. }
  118. ];
  119. function atomExpressionQuery(x: any[]) {
  120. const [resno, inscode, chainname, atomname, altloc] = x[1];
  121. const tests: AtomGroupArgs = {};
  122. if (chainname) {
  123. // should be configurable, there is an option in Jmol to use auth or label
  124. tests['chain-test'] = B.core.rel.eq([B.ammp('auth_asym_id'), chainname]);
  125. }
  126. const resProps = [];
  127. if (resno) resProps.push(B.core.rel.eq([B.ammp('auth_seq_id'), resno]));
  128. if (inscode) resProps.push(B.core.rel.eq([B.ammp('pdbx_PDB_ins_code'), inscode]));
  129. if (resProps.length) tests['residue-test'] = h.andExpr(resProps);
  130. const atomProps = [];
  131. if (atomname) atomProps.push(B.core.rel.eq([B.ammp('auth_atom_id'), atomname]));
  132. if (altloc) atomProps.push(B.core.rel.eq([B.ammp('label_alt_id'), altloc]));
  133. if (atomProps.length) tests['atom-test'] = h.andExpr(atomProps);
  134. return B.struct.generator.atomGroups(tests);
  135. }
  136. const lang = P.MonadicParser.createLanguage({
  137. Integer: () => P.MonadicParser.regexp(/-?[0-9]+/).map(Number).desc('integer'),
  138. Parens: function (r: any) {
  139. return P.MonadicParser.alt(
  140. r.Parens,
  141. r.Operator,
  142. r.Expression
  143. ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')'));
  144. },
  145. Expression: function (r: any) {
  146. return P.MonadicParser.alt(
  147. // r.NamedAtomProperties,
  148. // r.AtomExpression.map(atomExpressionQuery),
  149. r.AtomSelectionMacro.map(atomSelectionQuery2),
  150. // r.AtomSelectionMacroResi.map(atomSelectionQuery2),
  151. // r.Keywords,
  152. // r.Resno.lookahead(P.MonadicParser.regexp(/\s*(?!(LIKE|>=|<=|!=|[:^%/.=><]))/i)).map((x: any) => B.struct.generator.atomGroups({
  153. // 'residue-test': B.core.rel.eq([B.ammp('auth_seq_id'), x])
  154. // })),
  155. // r.ValueQuery,
  156. // r.Element.map((x: string) => B.struct.generator.atomGroups({
  157. // 'atom-test': B.core.rel.eq([B.acp('elementSymbol'), B.struct.type.elementSymbol(x)])
  158. // })),
  159. // r.Resname.map((x: string) => B.struct.generator.atomGroups({
  160. // 'residue-test': B.core.rel.eq([B.ammp('label_comp_id'), x])
  161. // })),
  162. );
  163. },
  164. // lys:a.ca -> resn lys and chain A and name ca
  165. // lys*a.ca -> resn lys and chain A and name ca
  166. //
  167. // :a.ca -> chain A and name ca
  168. // *a.ca -> chain A and name ca
  169. //
  170. // *.cg -> name ca
  171. // :.cg -> name ca
  172. AtomSelectionMacro: function (r: any) {
  173. return P.MonadicParser.alt(
  174. colon.then(P.MonadicParser.alt(
  175. P.MonadicParser.seq(
  176. orNull(propertiesDict.chain).skip(dot),
  177. orNull(propertiesDict.name)
  178. ).map(x => { return { chain: x[0], name: x[1]}; }),
  179. P.MonadicParser.seq(
  180. orNull(propertiesDict.name).skip(dot)
  181. ).map(x => { return {name: x[0] }; }),
  182. )),
  183. star.then(P.MonadicParser.alt(
  184. P.MonadicParser.seq(
  185. orNull(propertiesDict.chain).skip(dot),
  186. orNull(propertiesDict.name)
  187. ).map(x => { return { chain: x[0], name: x[1]}; }),
  188. P.MonadicParser.seq(
  189. orNull(propertiesDict.name).skip(dot)
  190. ).map(x => { return {name: x[0] }; }),
  191. )),
  192. // 1-100 lys:a.ca lys:a lys lys.ca
  193. // numbers.then(P.MonadicParser.alt(
  194. // P.MonadicParser.alt(
  195. // P.MonadicParser.seq(
  196. // orNull(propertiesDict.resi),
  197. // ).map(x => { return { resi: x[0] };})
  198. // ))),
  199. P.MonadicParser.alt(
  200. P.MonadicParser.seq(
  201. orNull(propertiesDict.resn).skip(colon),
  202. orNull(propertiesDict.chain).skip(dot),
  203. orNull(propertiesDict.name)
  204. ).map(x => { return { resn: x[0], chain: x[1], name: x[2] }; }),
  205. P.MonadicParser.seq(
  206. orNull(propertiesDict.resn).skip(star),
  207. orNull(propertiesDict.chain).skip(dot),
  208. orNull(propertiesDict.name)
  209. ).map(x => { return { resn: x[0], chain: x[1], name: x[2] }; }),
  210. P.MonadicParser.seq(
  211. orNull(propertiesDict.resn).skip(colon),
  212. orNull(propertiesDict.chain),
  213. ).map(x => { return { resn: x[0], chain: x[1] }; }),
  214. P.MonadicParser.seq(
  215. orNull(propertiesDict.resn).skip(star),
  216. orNull(propertiesDict.chain),
  217. ).map(x => { return { resn: x[0], chain: x[1] }; }),
  218. P.MonadicParser.seq(
  219. orNull(propertiesDict.resn).skip(dot),
  220. orNull(propertiesDict.name),
  221. ).map(x => { return { resn: x[0], name: x[1] }; }),
  222. P.MonadicParser.seq(
  223. orNull(propertiesDict.resn),
  224. ).map(x => { return { resn: x[0] };}),
  225. ),
  226. );
  227. },
  228. AtomSelectionMacroResi: function (r: any) {
  229. return P.MonadicParser.alt(
  230. P.MonadicParser.alt(
  231. P.MonadicParser.seq(
  232. orNull(propertiesDict.resi)
  233. ).map(x => { return { resi: x[0] };}),
  234. ),
  235. );
  236. },
  237. ObjectProperty: () => {
  238. const w = h.getReservedWords(special_properties, special_keywords, special_operators)
  239. .sort(h.strLenSortFn).map(h.escapeRegExp).join('|');
  240. return P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i'));
  241. },
  242. Object: (r: any) => {
  243. return r.ObjectProperty.notFollowedBy(slash)
  244. .map((x: any) => { throw new Error(`property 'object' not supported, value '${x}'`); });
  245. },
  246. NamedAtomProperties: function () {
  247. return P.MonadicParser.alt(...h.getNamedPropertyRules(properties));
  248. },
  249. ValueRange: function (r: any) {
  250. return P.MonadicParser.seq(
  251. r.Value
  252. .skip(P.MonadicParser.regexp(/-/i)),
  253. r.Value
  254. ).map(x => ({ range: x }));
  255. },
  256. RangeListProperty: function (r: any) {
  257. return P.MonadicParser.seq(
  258. P.MonadicParser.alt(...h.getPropertyNameRules(special_properties, /\s/))
  259. .skip(P.MonadicParser.whitespace),
  260. P.MonadicParser.alt(
  261. r.ValueRange,
  262. r.Value
  263. ).sepBy1(comma)
  264. ).map(x => {
  265. const [property, values] = x;
  266. const listValues: (string | number)[] = [];
  267. const rangeValues: any[] = [];
  268. values.forEach((v: any) => {
  269. if (v.range) {
  270. rangeValues.push(
  271. B.core.rel.inRange([property, v.range[0], v.range[1]])
  272. );
  273. } else {
  274. listValues.push(h.wrapValue(property, v, structureDict));
  275. }
  276. });
  277. const rangeTest = h.orExpr(rangeValues);
  278. const listTest = h.valuesTest(property, listValues);
  279. let test;
  280. if (rangeTest && listTest) {
  281. test = B.core.logic.or([rangeTest, listTest]);
  282. } else {
  283. test = rangeTest ? rangeTest : listTest;
  284. }
  285. return B.struct.generator.atomGroups({ [h.testLevel(property)]: test });
  286. });
  287. },
  288. Operator: function (r: any) {
  289. return h.combineOperators(operators, P.MonadicParser.alt(r.Parens, r.Expression));
  290. },
  291. AtomExpression: function (r: any) {
  292. return P.MonadicParser.seq(
  293. P.MonadicParser.lookahead(r.AtomPrefix),
  294. P.MonadicParser.seq(
  295. r.ResnoRange.or(P.MonadicParser.of(null)),
  296. r.Resno.or(P.MonadicParser.of(null)),
  297. // r.Resno2.or(P.MonadicParser.of(null)),
  298. r.Inscode.or(P.MonadicParser.of(null)),
  299. r.Chainname.or(P.MonadicParser.of(null)),
  300. // r.Chainname2.or(P.MonadicParser.of(null)),
  301. r.Atomname.or(P.MonadicParser.of(null)),
  302. r.Altloc.or(P.MonadicParser.of(null)),
  303. r.Model.or(P.MonadicParser.of(null))),
  304. );
  305. },
  306. AtomPrefix: () => P.MonadicParser.regexp(/[0-9:^%/.]/).desc('atom-prefix'),
  307. Chainname: () => P.MonadicParser.regexp(/:([A-Za-z]{1,3})/, 1).desc('chainname'),
  308. // Chainname2: () => P.MonadicParser.regexp(/\*([A-Za-z]{1,3})/, 1).desc('chainname'),
  309. Model: () => P.MonadicParser.regexp(/\/([0-9]+)/, 1).map(Number).desc('model'),
  310. Element: () => P.MonadicParser.regexp(/_([A-Za-z]{1,3})/, 1).desc('element'),
  311. Atomname: () => P.MonadicParser.regexp(/\.([a-zA-Z0-9]{1,4})/, 1).map(B.atomName).desc('atomname'),
  312. Resname: () => P.MonadicParser.regexp(/[a-zA-Z0-9]{1,4}/).desc('resname'),
  313. Resno: (r: any) => r.Integer.desc('resno'),
  314. // Resno2: (r: any) => r.split(',').Integer.desc('resno'),
  315. Altloc: () => P.MonadicParser.regexp(/%([a-zA-Z0-9])/, 1).desc('altloc'),
  316. Inscode: () => P.MonadicParser.regexp(/\^([a-zA-Z0-9])/, 1).desc('inscode'),
  317. // function listMap(x: string) { return x.split(',').map(x => x.replace(/^["']|["']$/g, '')); }
  318. BracketedResname: function (r: any) {
  319. return P.MonadicParser.regexp(/\.([a-zA-Z0-9]{1,4})/, 1)
  320. .desc('bracketed-resname');
  321. // [0SD]
  322. },
  323. ResnoRange: function (r: any) {
  324. return P.MonadicParser.regexp(/\.([\s]){1,3}/, 1)
  325. .desc('resno-range');
  326. // 123-200
  327. // -12--3
  328. },
  329. Keywords: () => P.MonadicParser.alt(...h.getKeywordRules(keywords)),
  330. Query: function (r: any) {
  331. return P.MonadicParser.alt(
  332. r.Operator,
  333. r.Parens,
  334. r.Expression
  335. ).trim(P.MonadicParser.optWhitespace);
  336. },
  337. Number: function () {
  338. return P.MonadicParser.regexp(/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/)
  339. .map(Number)
  340. .desc('number');
  341. },
  342. String: function () {
  343. const w = h.getReservedWords(properties, keywords, operators)
  344. .sort(h.strLenSortFn).map(h.escapeRegExp).join('|');
  345. return P.MonadicParser.alt(
  346. P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i')),
  347. P.MonadicParser.regexp(/'((?:[^"\\]|\\.)*)'/, 1),
  348. P.MonadicParser.regexp(/"((?:[^"\\]|\\.)*)"/, 1).map(x => B.core.type.regex([`^${x}$`, 'i']))
  349. );
  350. },
  351. Value: function (r: any) {
  352. return P.MonadicParser.alt(r.Number, r.String);
  353. },
  354. ValueParens: function (r: any) {
  355. return P.MonadicParser.alt(
  356. r.ValueParens,
  357. r.ValueOperator,
  358. r.ValueExpressions
  359. ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')'));
  360. },
  361. ValuePropertyNames: function () {
  362. return P.MonadicParser.alt(...h.getPropertyNameRules(properties, /LIKE|>=|<=|=|!=|>|<|\)|\s/i));
  363. },
  364. ValueOperator: function (r: any) {
  365. return h.combineOperators(valueOperators, P.MonadicParser.alt(r.ValueParens, r.ValueExpressions));
  366. },
  367. ValueExpressions: function (r: any) {
  368. return P.MonadicParser.alt(
  369. r.Value,
  370. r.ValuePropertyNames
  371. );
  372. },
  373. ValueQuery: function (r: any) {
  374. return P.MonadicParser.alt(
  375. r.ValueOperator.map((x: any) => {
  376. if (x.head.name) {
  377. if (x.head.name.startsWith('structure-query.generator')) return x;
  378. } else {
  379. if (typeof x === 'string' && x.length <= 4) {
  380. return B.struct.generator.atomGroups({
  381. 'residue-test': B.core.rel.eq([B.ammp('label_comp_id'), x])
  382. });
  383. }
  384. }
  385. throw new Error(`values must be part of an comparison, value '${x}'`);
  386. })
  387. );
  388. }
  389. });
  390. export const transpiler: Transpiler = str => lang.Query.tryParse(str);