parser.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /**
  2. * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Koya Sakuma < koya.sakuma.work@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. *
  7. * Adapted from MolQL project
  8. **/
  9. import * as P from '../../../mol-util/monadic-parser';
  10. import * as h from '../helper';
  11. import { MolScriptBuilder } from '../../../mol-script/language/builder';
  12. const B = MolScriptBuilder;
  13. import { properties, structureMap } from './properties';
  14. import { operators } from './operators';
  15. import { keywords } from './keywords';
  16. import { AtomGroupArgs } from '../types';
  17. import { Transpiler } from '../transpiler';
  18. import { OperatorList } from '../types';
  19. import { Expression } from '../../language/expression';
  20. // <, <=, =, >=, >, !=, and LIKE
  21. const valueOperators: OperatorList = [
  22. {
  23. '@desc': 'value comparisons',
  24. '@examples': [],
  25. name: '=',
  26. abbr: ['=='],
  27. type: h.binaryLeft,
  28. rule: P.MonadicParser.regexp(/\s*(LIKE|>=|<=|=|!=|>|<)\s*/i, 1),
  29. map: (op, e1, e2) => {
  30. let expr;
  31. if (e1 === 'structure') {
  32. expr = B.core.flags.hasAny([B.ammp('secondaryStructureFlags'), structureMap(e2)]);
  33. } else if (e2 === 'structure') {
  34. expr = B.core.flags.hasAny([B.ammp('secondaryStructureFlags'), structureMap(e1)]);
  35. } else if (e1.head !== undefined) {
  36. if (e1.head.name === 'core.type.regex') {
  37. expr = B.core.str.match([e1, B.core.type.str([e2])]);
  38. }
  39. } else if (e2.head !== undefined) {
  40. if (e2.head.name === 'core.type.regex') {
  41. expr = B.core.str.match([e2, B.core.type.str([e1])]);
  42. }
  43. } else if (op.toUpperCase() === 'LIKE') {
  44. if (e1.head) {
  45. expr = B.core.str.match([
  46. B.core.type.regex([`^${e2}$`, 'i']),
  47. B.core.type.str([e1])
  48. ]);
  49. } else {
  50. expr = B.core.str.match([
  51. B.core.type.regex([`^${e1}$`, 'i']),
  52. B.core.type.str([e2])
  53. ]);
  54. }
  55. }
  56. if (!expr) {
  57. if (e1.head) e2 = h.wrapValue(e1, e2);
  58. if (e2.head) e1 = h.wrapValue(e2, e1);
  59. switch (op) {
  60. case '=':
  61. expr = B.core.rel.eq([e1, e2]);
  62. break;
  63. case '!=':
  64. expr = B.core.rel.neq([e1, e2]);
  65. break;
  66. case '>':
  67. expr = B.core.rel.gr([e1, e2]);
  68. break;
  69. case '<':
  70. expr = B.core.rel.lt([e1, e2]);
  71. break;
  72. case '>=':
  73. expr = B.core.rel.gre([e1, e2]);
  74. break;
  75. case '<=':
  76. expr = B.core.rel.lte([e1, e2]);
  77. break;
  78. default: throw new Error(`value operator '${op}' not supported`);
  79. }
  80. }
  81. return B.struct.generator.atomGroups({ 'atom-test': expr });
  82. }
  83. }
  84. ];
  85. function atomExpressionQuery(x: any[]) {
  86. const [resname, resno, inscode, chainname, atomname, altloc] = x[1];
  87. const tests: AtomGroupArgs = {};
  88. if (chainname) {
  89. // TODO: should be configurable, there is an option in Jmol to use auth or label
  90. tests['chain-test'] = B.core.rel.eq([B.ammp('auth_asym_id'), chainname]);
  91. }
  92. const resProps = [];
  93. if (resname) resProps.push(B.core.rel.eq([B.ammp('label_comp_id'), resname]));
  94. if (resno) resProps.push(B.core.rel.eq([B.ammp('auth_seq_id'), resno]));
  95. if (inscode) resProps.push(B.core.rel.eq([B.ammp('pdbx_PDB_ins_code'), inscode]));
  96. if (resProps.length) tests['residue-test'] = h.andExpr(resProps);
  97. const atomProps = [];
  98. if (atomname) atomProps.push(B.core.rel.eq([B.ammp('auth_atom_id'), atomname]));
  99. if (altloc) atomProps.push(B.core.rel.eq([B.ammp('label_alt_id'), altloc]));
  100. if (atomProps.length) tests['atom-test'] = h.andExpr(atomProps);
  101. return B.struct.generator.atomGroups(tests);
  102. }
  103. const lang = P.MonadicParser.createLanguage({
  104. Integer: () => P.MonadicParser.regexp(/-?[0-9]+/).map(Number).desc('integer'),
  105. Parens: function (r: any) {
  106. return P.MonadicParser.alt(
  107. r.Parens,
  108. r.Operator,
  109. r.Expression
  110. ).wrap(P.MonadicParser.regexp(/\(\s*/), P.MonadicParser.regexp(/\s*\)/));
  111. },
  112. Expression: function (r: any) {
  113. return P.MonadicParser.alt(
  114. r.Keywords,
  115. r.ResnoRange.map((x: [number, number]) => B.struct.generator.atomGroups({
  116. 'residue-test': B.core.logic.and([
  117. B.core.rel.gre([B.ammp('auth_seq_id'), x[0]]),
  118. B.core.rel.lte([B.ammp('auth_seq_id'), x[1]])
  119. ])
  120. })),
  121. r.Resno.lookahead(P.MonadicParser.regexp(/\s*(?!(LIKE|>=|<=|!=|[\[:^%/.=><]))/i)).map((x: number) => B.struct.generator.atomGroups({
  122. 'residue-test': B.core.rel.eq([B.ammp('auth_seq_id'), x])
  123. })),
  124. r.AtomExpression.map(atomExpressionQuery),
  125. r.Within.map((x: [number, Expression]) => B.struct.modifier.includeSurroundings({ 0: x[1], radius: x[0] })),
  126. r.ValueQuery,
  127. r.Element.map((x: string) => B.struct.generator.atomGroups({
  128. 'atom-test': B.core.rel.eq([B.acp('elementSymbol'), B.struct.type.elementSymbol(x)])
  129. })),
  130. r.Resname.map((x: string) => B.struct.generator.atomGroups({
  131. 'residue-test': B.core.rel.eq([B.ammp('label_comp_id'), x])
  132. })),
  133. );
  134. },
  135. Operator: function (r: any) {
  136. return h.combineOperators(operators, P.MonadicParser.alt(r.Parens, r.Expression));
  137. },
  138. AtomExpression: function (r: any) {
  139. return P.MonadicParser.seq(
  140. P.MonadicParser.lookahead(r.AtomPrefix),
  141. P.MonadicParser.seq(
  142. r.BracketedResname.or(P.MonadicParser.of(null)),
  143. r.Resno.or(P.MonadicParser.of(null)),
  144. r.Inscode.or(P.MonadicParser.of(null)),
  145. r.Chainname.or(P.MonadicParser.of(null)),
  146. r.Atomname.or(P.MonadicParser.of(null)),
  147. r.Altloc.or(P.MonadicParser.of(null)),
  148. r.Model.or(P.MonadicParser.of(null))
  149. )
  150. ).desc('expression');
  151. },
  152. AtomPrefix: () => P.MonadicParser.regexp(/[\[0-9:^%/.]/).desc('atom-prefix'),
  153. Chainname: () => P.MonadicParser.regexp(/:([A-Za-z]{1,3})/, 1).desc('chainname'),
  154. Model: () => P.MonadicParser.regexp(/\/([0-9]+)/, 1).map(Number).desc('model'),
  155. Element: () => P.MonadicParser.regexp(/_([A-Za-z]{1,3})/, 1).desc('element'),
  156. Atomname: () => P.MonadicParser.regexp(/\.([a-zA-Z0-9]{1,4})/, 1).map(B.atomName).desc('atomname'),
  157. Resname: () => P.MonadicParser.regexp(/[a-zA-Z0-9]{1,4}/).desc('resname'),
  158. Resno: (r: any) => r.Integer.desc('resno'),
  159. Altloc: () => P.MonadicParser.regexp(/%([a-zA-Z0-9])/, 1).desc('altloc'),
  160. Inscode: () => P.MonadicParser.regexp(/\^([a-zA-Z0-9])/, 1).desc('inscode'),
  161. BracketedResname: () => P.MonadicParser.regexp(/\[([a-zA-Z0-9]{1,4})\]/, 1).desc('bracketed-resname'),
  162. ResnoRange: (r: any) => {
  163. return P.MonadicParser.seq(
  164. r.Integer.skip(P.MonadicParser.seq(
  165. P.MonadicParser.optWhitespace,
  166. P.MonadicParser.string('-'),
  167. P.MonadicParser.optWhitespace
  168. )),
  169. r.Integer
  170. ).desc('resno-range');
  171. },
  172. Within: (r: any) => {
  173. return P.MonadicParser.regexp(/within/i)
  174. .skip(P.MonadicParser.regexp(/\s*\(\s*/))
  175. .then(P.MonadicParser.seq(
  176. r.Integer.skip(P.MonadicParser.regexp(/\s*,\s*/)),
  177. r.Query
  178. ))
  179. .skip(P.MonadicParser.regexp(/\)/));
  180. },
  181. Keywords: () => P.MonadicParser.alt(...h.getKeywordRules(keywords)).desc('keyword'),
  182. Query: function (r: any) {
  183. return P.MonadicParser.alt(
  184. r.Operator,
  185. r.Parens,
  186. r.Expression
  187. ).trim(P.MonadicParser.optWhitespace);
  188. },
  189. Number: function () {
  190. return P.MonadicParser.regexp(/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/)
  191. .map(Number)
  192. .desc('number');
  193. },
  194. String: function () {
  195. const w = h.getReservedWords(properties, keywords, operators)
  196. .sort(h.strLenSortFn).map(h.escapeRegExp).join('|');
  197. return P.MonadicParser.alt(
  198. P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i')),
  199. P.MonadicParser.regexp(/'((?:[^"\\]|\\.)*)'/, 1),
  200. P.MonadicParser.regexp(/"((?:[^"\\]|\\.)*)"/, 1).map(x => B.core.type.regex([`^${x}$`, 'i']))
  201. ).desc('string');
  202. },
  203. Value: function (r: any) {
  204. return P.MonadicParser.alt(r.Number, r.String);
  205. },
  206. ValueParens: function (r: any) {
  207. return P.MonadicParser.alt(
  208. r.ValueParens,
  209. r.ValueOperator,
  210. r.ValueExpressions
  211. ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')'));
  212. },
  213. ValuePropertyNames: function () {
  214. return P.MonadicParser.alt(...h.getPropertyNameRules(properties, /LIKE|>=|<=|=|!=|>|<|\)|\s/i));
  215. },
  216. ValueOperator: function (r: any) {
  217. return h.combineOperators(valueOperators, P.MonadicParser.alt(r.ValueParens, r.ValueExpressions));
  218. },
  219. ValueExpressions: function (r: any) {
  220. return P.MonadicParser.alt(
  221. r.Value,
  222. r.ValuePropertyNames
  223. );
  224. },
  225. ValueQuery: function (r: any) {
  226. return P.MonadicParser.alt(
  227. r.ValueOperator.map((x: any) => {
  228. if (x.head) {
  229. if (x.head.name.startsWith('structure-query.generator')) return x;
  230. } else {
  231. if (typeof x === 'string' && x.length <= 4) {
  232. return B.struct.generator.atomGroups({
  233. 'residue-test': B.core.rel.eq([B.ammp('label_comp_id'), x])
  234. });
  235. }
  236. }
  237. throw new Error(`values must be part of an comparison, value '${x}'`);
  238. })
  239. );
  240. }
  241. });
  242. export const transpiler: Transpiler = str => lang.Query.tryParse(str);