helper.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. /**
  2. * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  6. * @author Koya Sakuma <koya.sakuma.work@gmail.com>
  7. *
  8. * Adapted from MolQL project
  9. */
  10. import * as P from '../../mol-util/monadic-parser';
  11. import { MolScriptBuilder } from '../../mol-script/language/builder';
  12. const B = MolScriptBuilder;
  13. import { Expression } from '../language/expression';
  14. import { KeywordDict, PropertyDict, FunctionDict, OperatorList } from './types';
  15. export function escapeRegExp(s: String) {
  16. return String(s).replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
  17. }
  18. // Takes a parser for the prefix operator, and a parser for the base thing being
  19. // parsed, and parses as many occurrences as possible of the prefix operator.
  20. // Note that the parser is created using `P.lazy` because it's recursive. It's
  21. // valid for there to be zero occurrences of the prefix operator.
  22. export function prefix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  23. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  24. return P.MonadicParser.seq(opParser, parser)
  25. .map(x => mapFn(...x))
  26. .or(nextParser);
  27. });
  28. return parser;
  29. }
  30. export function prefixRemoveKet(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  31. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  32. return P.MonadicParser.seq(opParser, parser.skip(P.MonadicParser.regexp(/\s*\)/)))
  33. .map(x => mapFn(...x))
  34. .or(nextParser);
  35. });
  36. return parser;
  37. }
  38. // Ideally this function would be just like `PREFIX` but reordered like
  39. // `P.seq(parser, opParser).or(nextParser)`, but that doesn't work. The
  40. // reason for that is that Parsimmon will get stuck in infinite recursion, since
  41. // the very first rule. Inside `parser` is to match parser again. Alternatively,
  42. // you might think to try `nextParser.or(P.seq(parser, opParser))`, but
  43. // that won't work either because in a call to `.or` (aka `P.alt`), Parsimmon
  44. // takes the first possible match, even if subsequent matches are longer, so the
  45. // parser will never actually look far enough ahead to see the postfix
  46. // operators.
  47. export function postfix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  48. // Because we can't use recursion like stated above, we just match a flat list
  49. // of as many occurrences of the postfix operator as possible, then use
  50. // `.reduce` to manually nest the list.
  51. //
  52. // Example:
  53. //
  54. // INPUT :: "4!!!"
  55. // PARSE :: [4, "factorial", "factorial", "factorial"]
  56. // REDUCE :: ["factorial", ["factorial", ["factorial", 4]]]
  57. return P.MonadicParser.seqMap(
  58. nextParser,
  59. opParser.many(),
  60. (x: any, suffixes: any) =>
  61. suffixes.reduce((acc: any, x: any) => {
  62. return mapFn(x, acc);
  63. }, x)
  64. );
  65. }
  66. // Takes a parser for all the operators at this precedence level, and a parser
  67. // that parsers everything at the next precedence level, and returns a parser
  68. // that parses as many binary operations as possible, associating them to the
  69. // right. (e.g. 1^2^3 is 1^(2^3) not (1^2)^3)
  70. export function binaryRight(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  71. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() =>
  72. nextParser.chain(next =>
  73. P.MonadicParser.seq(
  74. opParser,
  75. P.MonadicParser.of(next),
  76. parser
  77. ).map((x) => {
  78. return x;
  79. }).or(P.MonadicParser.of(next))
  80. )
  81. );
  82. return parser;
  83. }
  84. // Takes a parser for all the operators at this precedence level, and a parser
  85. // that parsers everything at the next precedence level, and returns a parser
  86. // that parses as many binary operations as possible, associating them to the
  87. // left. (e.g. 1-2-3 is (1-2)-3 not 1-(2-3))
  88. export function binaryLeft(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  89. // We run into a similar problem as with the `POSTFIX` parser above where we
  90. // can't recurse in the direction we want, so we have to resort to parsing an
  91. // entire list of operator chunks and then using `.reduce` to manually nest
  92. // them again.
  93. //
  94. // Example:
  95. //
  96. // INPUT :: "1+2+3"
  97. // PARSE :: [1, ["+", 2], ["+", 3]]
  98. // REDUCE :: ["+", ["+", 1, 2], 3]
  99. return P.MonadicParser.seqMap(
  100. nextParser,
  101. P.MonadicParser.seq(opParser, nextParser).many(),
  102. (first: any, rest: any) => {
  103. return rest.reduce((acc: any, ch: any) => {
  104. const [op, another] = ch;
  105. return mapFn(op, acc, another);
  106. }, first);
  107. }
  108. );
  109. }
  110. /**
  111. * combine operators of decreasing binding strength
  112. */
  113. export function combineOperators(opList: any[], rule: P.MonadicParser<any>) {
  114. const x = opList.reduce(
  115. (acc, level) => {
  116. const map = level.isUnsupported ? makeError(`operator '${level.name}' not supported`) : level.map;
  117. return level.type(level.rule, acc, map);
  118. },
  119. rule
  120. );
  121. return x;
  122. }
  123. export function infixOp(re: RegExp, group: number = 0) {
  124. return P.MonadicParser.optWhitespace.then(P.MonadicParser.regexp(re, group).skip(P.MonadicParser.optWhitespace));
  125. }
  126. export function prefixOp(re: RegExp, group: number = 0) {
  127. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.optWhitespace);
  128. }
  129. export function prefixOpNoWhiteSpace(re: RegExp, group: number = 0) {
  130. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.regexp(/\s*/));
  131. }
  132. export function postfixOp(re: RegExp, group: number = 0) {
  133. return P.MonadicParser.optWhitespace.then(P.MonadicParser.regexp(re, group));
  134. }
  135. export function ofOp(name: string, short?: string) {
  136. const op = short ? `${name}|${escapeRegExp(short)}` : name;
  137. const re = RegExp(`(${op})\\s+([-+]?[0-9]*\\.?[0-9]+)\\s+OF`, 'i');
  138. return infixOp(re, 2).map(parseFloat);
  139. }
  140. export function makeError(msg: string) {
  141. return function () {
  142. throw new Error(msg);
  143. };
  144. }
  145. export function andExpr(selections: any[]) {
  146. if (selections.length === 1) {
  147. return selections[0];
  148. } else if (selections.length > 1) {
  149. return B.core.logic.and(selections);
  150. } else {
  151. return undefined;
  152. }
  153. }
  154. export function orExpr(selections: any[]) {
  155. if (selections.length === 1) {
  156. return selections[0];
  157. } else if (selections.length > 1) {
  158. return B.core.logic.or(selections);
  159. } else {
  160. return undefined;
  161. }
  162. }
  163. export function testExpr(property: any, args: any) {
  164. if (args && args.op !== undefined && args.val !== undefined) {
  165. const opArgs = [property, args.val];
  166. switch (args.op) {
  167. case '=': return B.core.rel.eq(opArgs);
  168. case '!=': return B.core.rel.neq(opArgs);
  169. case '>': return B.core.rel.gr(opArgs);
  170. case '<': return B.core.rel.lt(opArgs);
  171. case '>=': return B.core.rel.gre(opArgs);
  172. case '<=': return B.core.rel.lte(opArgs);
  173. default: throw new Error(`operator '${args.op}' not supported`);
  174. }
  175. } else if (args && args.flags !== undefined) {
  176. return B.core.flags.hasAny([property, args.flags]);
  177. } else if (args && args.min !== undefined && args.max !== undefined) {
  178. return B.core.rel.inRange([property, args.min, args.max]);
  179. } else if (!Array.isArray(args)) {
  180. return B.core.rel.eq([property, args]);
  181. } else if (args.length > 1) {
  182. return B.core.set.has([B.core.type.set(args), property]);
  183. } else {
  184. return B.core.rel.eq([property, args[0]]);
  185. }
  186. }
  187. export function invertExpr(selection: Expression) {
  188. return B.struct.generator.queryInSelection({
  189. 0: selection, query: B.struct.generator.all(), 'in-complement': true }
  190. );
  191. }
  192. export function strLenSortFn(a: string, b: string) {
  193. return a.length < b.length ? 1 : -1;
  194. }
  195. function getNamesRegex(name: string, abbr?: string[]) {
  196. const names = (abbr ? [name].concat(abbr) : [name])
  197. .sort(strLenSortFn).map(escapeRegExp).join('|');
  198. return RegExp(`${names}`, 'i');
  199. }
  200. export function getPropertyRules(properties: PropertyDict) {
  201. // in keyof typeof properties
  202. const propertiesDict: { [name: string]: P.MonadicParser<any> } = {};
  203. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  204. const ps = properties[name];
  205. const errorFn = makeError(`property '${name}' not supported`);
  206. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  207. if (ps.isUnsupported) errorFn();
  208. return testExpr(ps.property, ps.map(x));
  209. });
  210. if (!ps.isNumeric) {
  211. propertiesDict[name] = rule;
  212. }
  213. });
  214. return propertiesDict;
  215. }
  216. export function getNamedPropertyRules(properties: PropertyDict) {
  217. const namedPropertiesList: P.MonadicParser<any>[] = [];
  218. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  219. const ps = properties[name];
  220. const errorFn = makeError(`property '${name}' not supported`);
  221. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  222. if (ps.isUnsupported) errorFn();
  223. return testExpr(ps.property, ps.map(x));
  224. });
  225. const nameRule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).trim(P.MonadicParser.optWhitespace);
  226. const groupMap = (x: any) => B.struct.generator.atomGroups({ [ps.level]: x });
  227. if (ps.isNumeric) {
  228. namedPropertiesList.push(
  229. nameRule.then(P.MonadicParser.seq(
  230. P.MonadicParser.regexp(/>=|<=|=|!=|>|</).trim(P.MonadicParser.optWhitespace),
  231. P.MonadicParser.regexp(ps.regex).map(ps.map)
  232. )).map((x: any) => {
  233. if (ps.isUnsupported) errorFn();
  234. return testExpr(ps.property, { op: x[0], val: x[1] });
  235. }).map(groupMap)
  236. );
  237. } else {
  238. namedPropertiesList.push(nameRule.then(rule).map(groupMap));
  239. }
  240. });
  241. return namedPropertiesList;
  242. }
  243. export function getKeywordRules(keywords: KeywordDict) {
  244. const keywordsList: P.MonadicParser<any>[] = [];
  245. Object.keys(keywords).sort(strLenSortFn).forEach(name => {
  246. const ks = keywords[name];
  247. const mapFn = ks.map ? ks.map : makeError(`keyword '${name}' not supported`);
  248. const rule = P.MonadicParser.regexp(getNamesRegex(name, ks.abbr)).map(mapFn);
  249. keywordsList.push(rule);
  250. });
  251. return keywordsList;
  252. }
  253. export function getFunctionRules(functions: FunctionDict, argRule: P.MonadicParser<any>) {
  254. const functionsList: P.MonadicParser<any>[] = [];
  255. const begRule = P.MonadicParser.regexp(/\(\s*/);
  256. const endRule = P.MonadicParser.regexp(/\s*\)/);
  257. Object.keys(functions).sort(strLenSortFn).forEach(name => {
  258. const fs = functions[name];
  259. const mapFn = fs.map ? fs.map : makeError(`function '${name}' not supported`);
  260. const rule = P.MonadicParser.regexp(new RegExp(name, 'i')).skip(begRule).then(argRule).skip(endRule).map(mapFn);
  261. functionsList.push(rule);
  262. });
  263. return functionsList;
  264. }
  265. export function getPropertyNameRules(properties: PropertyDict, lookahead: RegExp) {
  266. const list: P.MonadicParser<any>[] = [];
  267. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  268. const ps = properties[name];
  269. const errorFn = makeError(`property '${name}' not supported`);
  270. const rule = (P.MonadicParser as any).regexp(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  271. if (ps.isUnsupported) errorFn();
  272. return ps.property;
  273. });
  274. list.push(rule);
  275. });
  276. return list;
  277. }
  278. export function getReservedWords(properties: PropertyDict, keywords: KeywordDict, operators: OperatorList, functions?: FunctionDict) {
  279. const w: string[] = [];
  280. for (const name in properties) {
  281. w.push(name);
  282. if (properties[name].abbr) w.push(...properties[name].abbr!);
  283. }
  284. for (const name in keywords) {
  285. w.push(name);
  286. if (keywords[name].abbr) w.push(...keywords[name].abbr!);
  287. }
  288. operators.forEach(o => {
  289. w.push(o.name);
  290. if (o.abbr) w.push(...o.abbr);
  291. });
  292. return w;
  293. }
  294. export function atomNameSet(ids: string[]) {
  295. return B.core.type.set(ids.map(B.atomName));
  296. }
  297. export function asAtoms(e: Expression) {
  298. return B.struct.generator.queryInSelection({
  299. 0: e,
  300. query: B.struct.generator.all()
  301. });
  302. }
  303. export function wrapValue(property: any, value: any, sstrucDict?: any) {
  304. switch (property.head.name) {
  305. case 'structure-query.atom-property.macromolecular.label_atom_id':
  306. return B.atomName(value);
  307. case 'structure-query.atom-property.core.element-symbol':
  308. return B.es(value);
  309. case 'structure-query.atom-property.macromolecular.secondary-structure-flags':
  310. if (sstrucDict) {
  311. value = [sstrucDict[value.toUpperCase()] || 'none'];
  312. }
  313. return B.struct.type.secondaryStructureFlags([value]);
  314. default:
  315. return value;
  316. }
  317. }
  318. const propPrefix = 'structure-query.atom-property.macromolecular.';
  319. const entityProps = ['entityKey', 'label_entity_id', 'entityType'];
  320. const chainProps = ['chainKey', 'label_asym_id', 'label_entity_id', 'auth_asym_id', 'entityType'];
  321. const residueProps = ['residueKey', 'label_comp_id', 'label_seq_id', 'auth_comp_id', 'auth_seq_id', 'pdbx_formal_charge', 'secondaryStructureKey', 'secondaryStructureFlags', 'isModified', 'modifiedParentName'];
  322. export function testLevel(property: any) {
  323. if (property.head.name.startsWith(propPrefix)) {
  324. const name = property.head.name.substr(propPrefix.length);
  325. if (entityProps.indexOf(name) !== -1) return 'entity-test' as string;
  326. if (chainProps.indexOf(name) !== -1) return 'chain-test' as string;
  327. if (residueProps.indexOf(name) !== -1) return 'residue-test' as string;
  328. }
  329. return 'atom-test' as string;
  330. }
  331. const flagProps = [
  332. 'structure-query.atom-property.macromolecular.secondary-structure-flags'
  333. ];
  334. export function valuesTest(property: any, values: any[]) {
  335. if (flagProps.indexOf(property.head.name) !== -1) {
  336. const name = values[0].head;
  337. const flags: any[] = [];
  338. values.forEach(v => flags.push(...v.args[0]));
  339. return B.core.flags.hasAny([property, { head: name, args: flags }]);
  340. } else {
  341. if (values.length === 1) {
  342. return B.core.rel.eq([property, values[0]]);
  343. } else if (values.length > 1) {
  344. return B.core.set.has([B.core.type.set(values), property]);
  345. }
  346. }
  347. }
  348. export function resnameExpr(resnameList: string[]) {
  349. return B.struct.generator.atomGroups({
  350. 'residue-test': B.core.set.has([
  351. B.core.type.set(resnameList),
  352. B.ammp('label_comp_id')
  353. ])
  354. });
  355. }