helper.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  6. */
  7. // import * as Q from 'parsimmon';
  8. import * as P from '../../mol-util/monadic-parser';
  9. import { MolScriptBuilder } from '../../mol-script/language/builder';
  10. const B = MolScriptBuilder;
  11. import { Expression } from '../language/expression';
  12. import { KeywordDict, PropertyDict, FunctionDict, OperatorList } from './types';
  13. export function escapeRegExp(s: String) {
  14. return String(s).replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
  15. }
  16. // Takes a parser for the prefix operator, and a parser for the base thing being
  17. // parsed, and parses as many occurrences as possible of the prefix operator.
  18. // Note that the parser is created using `P.lazy` because it's recursive. It's
  19. // valid for there to be zero occurrences of the prefix operator.
  20. export function prefix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  21. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  22. return P.MonadicParser.seq(opParser, parser)
  23. .map(x => mapFn(...x))
  24. .or(nextParser);
  25. });
  26. return parser;
  27. }
  28. // Ideally this function would be just like `PREFIX` but reordered like
  29. // `P.seq(parser, opParser).or(nextParser)`, but that doesn't work. The
  30. // reason for that is that Parsimmon will get stuck in infinite recursion, since
  31. // the very first rule. Inside `parser` is to match parser again. Alternatively,
  32. // you might think to try `nextParser.or(P.seq(parser, opParser))`, but
  33. // that won't work either because in a call to `.or` (aka `P.alt`), Parsimmon
  34. // takes the first possible match, even if subsequent matches are longer, so the
  35. // parser will never actually look far enough ahead to see the postfix
  36. // operators.
  37. export function postfix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  38. // Because we can't use recursion like stated above, we just match a flat list
  39. // of as many occurrences of the postfix operator as possible, then use
  40. // `.reduce` to manually nest the list.
  41. //
  42. // Example:
  43. //
  44. // INPUT :: "4!!!"
  45. // PARSE :: [4, "factorial", "factorial", "factorial"]
  46. // REDUCE :: ["factorial", ["factorial", ["factorial", 4]]]
  47. return P.MonadicParser.seqMap(/* no seqMap() in monadic-parser.ts, any suitable replacement? */
  48. nextParser,
  49. opParser.many(),
  50. (x: any, suffixes: any) =>
  51. suffixes.reduce((acc: any, x: any) => {
  52. return mapFn(x, acc);
  53. }, x)
  54. );
  55. }
  56. // Takes a parser for all the operators at this precedence level, and a parser
  57. // that parsers everything at the next precedence level, and returns a parser
  58. // that parses as many binary operations as possible, associating them to the
  59. // right. (e.g. 1^2^3 is 1^(2^3) not (1^2)^3)
  60. export function binaryRight(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  61. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() =>
  62. nextParser.chain(next =>
  63. P.MonadicParser.seq(
  64. opParser,
  65. P.MonadicParser.of(next),
  66. parser
  67. ).map((x) => {
  68. console.log(x);
  69. return x;
  70. }).or(P.MonadicParser.of(next))
  71. )
  72. );
  73. return parser;
  74. }
  75. // Takes a parser for all the operators at this precedence level, and a parser
  76. // that parsers everything at the next precedence level, and returns a parser
  77. // that parses as many binary operations as possible, associating them to the
  78. // left. (e.g. 1-2-3 is (1-2)-3 not 1-(2-3))
  79. export function binaryLeft(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  80. // We run into a similar problem as with the `POSTFIX` parser above where we
  81. // can't recurse in the direction we want, so we have to resort to parsing an
  82. // entire list of operator chunks and then using `.reduce` to manually nest
  83. // them again.
  84. //
  85. // Example:
  86. //
  87. // INPUT :: "1+2+3"
  88. // PARSE :: [1, ["+", 2], ["+", 3]]
  89. // REDUCE :: ["+", ["+", 1, 2], 3]
  90. return P.MonadicParser.seqMap(
  91. nextParser,
  92. P.MonadicParser.seq(opParser, nextParser).many(),
  93. (first: any, rest: any) => {
  94. return rest.reduce((acc: any, ch: any) => {
  95. const [op, another] = ch;
  96. return mapFn(op, acc, another);
  97. }, first);
  98. }
  99. );
  100. }
  101. /**
  102. * combine operators of decreasing binding strength
  103. */
  104. export function combineOperators(opList: any[], rule: P.MonadicParser<any>) {
  105. const x = opList.reduce(
  106. (acc, level) => {
  107. const map = level.isUnsupported ? makeError(`operator '${level.name}' not supported`) : level.map;
  108. return level.type(level.rule, acc, map);
  109. },
  110. rule
  111. );
  112. return x;
  113. }
  114. export function infixOp(re: RegExp, group: number = 0) {
  115. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace));
  116. // return P.optWhitespace.then(P.MonadicParser.regexp(re, group).lookahead(P.whitespace))
  117. // return P.MonadicParser.regexp(re, group).skip(P.whitespace
  118. }
  119. export function prefixOp(re: RegExp, group: number = 0) {
  120. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace);
  121. }
  122. export function postfixOp(re: RegExp, group: number = 0) {
  123. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group));
  124. }
  125. // export function functionOp (re: RegExp, rule: P.MonadicParser<any>) {
  126. // return P.MonadicParser.regexp(re, group).wrap(P.string('('), P.string(')'))
  127. // }
  128. export function ofOp(name: string, short?: string) {
  129. const op = short ? `${name}|${escapeRegExp(short)}` : name;
  130. const re = RegExp(`(${op})\\s+([-+]?[0-9]*\\.?[0-9]+)\\s+OF`, 'i');
  131. return infixOp(re, 2).map(parseFloat);
  132. }
  133. export function makeError(msg: string) {
  134. return function () {
  135. throw new Error(msg);
  136. };
  137. }
  138. export function andExpr(selections: any[]) {
  139. if (selections.length === 1) {
  140. return selections[0];
  141. } else if (selections.length > 1) {
  142. return B.core.logic.and(selections);
  143. } else {
  144. return undefined;
  145. }
  146. }
  147. export function orExpr(selections: any[]) {
  148. if (selections.length === 1) {
  149. return selections[0];
  150. } else if (selections.length > 1) {
  151. return B.core.logic.or(selections);
  152. } else {
  153. return undefined;
  154. }
  155. }
  156. export function testExpr(property: any, args: any) {
  157. if (args && args.op !== undefined && args.val !== undefined) {
  158. const opArgs = [property, args.val];
  159. switch (args.op) {
  160. case '=': return B.core.rel.eq(opArgs);
  161. case '!=': return B.core.rel.neq(opArgs);
  162. case '>': return B.core.rel.gr(opArgs);
  163. case '<': return B.core.rel.lt(opArgs);
  164. case '>=': return B.core.rel.gre(opArgs);
  165. case '<=': return B.core.rel.lte(opArgs);
  166. default: throw new Error(`operator '${args.op}' not supported`);
  167. }
  168. } else if (args && args.flags !== undefined) {
  169. return B.core.flags.hasAny([property, args.flags]);
  170. } else if (args && args.min !== undefined && args.max !== undefined) {
  171. return B.core.rel.inRange([property, args.min, args.max]);
  172. } else if (!Array.isArray(args)) {
  173. return B.core.rel.eq([property, args]);
  174. } else if (args.length > 1) {
  175. return B.core.set.has([B.core.type.set(args), property]);
  176. } else {
  177. return B.core.rel.eq([property, args[0]]);
  178. }
  179. }
  180. export function invertExpr(selection: Expression) {
  181. return B.struct.generator.queryInSelection({
  182. 0: selection, query: B.struct.generator.atomGroups(), 'in-complement': true
  183. });
  184. }
  185. export function strLenSortFn(a: string, b: string) {
  186. return a.length < b.length ? 1 : -1;
  187. }
  188. function getNamesRegex(name: string, abbr?: string[]) {
  189. const names = (abbr ? [name].concat(abbr) : [name])
  190. .sort(strLenSortFn).map(escapeRegExp).join('|');
  191. return RegExp(`${names}`, 'i');
  192. }
  193. export function getPropertyRules(properties: PropertyDict) {
  194. // in keyof typeof properties
  195. const propertiesDict: { [name: string]: P.MonadicParser<any> } = {};
  196. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  197. const ps = properties[name];
  198. const errorFn = makeError(`property '${name}' not supported`);
  199. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  200. if (ps.isUnsupported) errorFn();
  201. return testExpr(ps.property, ps.map(x));
  202. });
  203. if (!ps.isNumeric) {
  204. propertiesDict[name] = rule;
  205. }
  206. });
  207. return propertiesDict;
  208. }
  209. export function getNamedPropertyRules(properties: PropertyDict) {
  210. const namedPropertiesList: P.MonadicParser<any>[] = [];
  211. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  212. const ps = properties[name];
  213. const errorFn = makeError(`property '${name}' not supported`);
  214. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  215. if (ps.isUnsupported) errorFn();
  216. return testExpr(ps.property, ps.map(x));
  217. });
  218. const nameRule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).trim(P.MonadicParser.optWhitespace);
  219. const groupMap = (x: any) => B.struct.generator.atomGroups({ [ps.level]: x });
  220. if (ps.isNumeric) {
  221. namedPropertiesList.push(
  222. nameRule.then(P.MonadicParser.seq(
  223. P.MonadicParser.regexp(/>=|<=|=|!=|>|</).trim(P.MonadicParser.optWhitespace),
  224. P.MonadicParser.regexp(ps.regex).map(ps.map)
  225. )).map((x: any) => {
  226. if (ps.isUnsupported) errorFn();
  227. return testExpr(ps.property, { op: x[0], val: x[1] });
  228. }).map(groupMap)
  229. );
  230. } else {
  231. namedPropertiesList.push(nameRule.then(rule).map(groupMap));
  232. }
  233. });
  234. return namedPropertiesList;
  235. }
  236. export function getKeywordRules(keywords: KeywordDict) {
  237. const keywordsList: P.MonadicParser<any>[] = [];
  238. Object.keys(keywords).sort(strLenSortFn).forEach(name => {
  239. const ks = keywords[name];
  240. const mapFn = ks.map ? ks.map : makeError(`keyword '${name}' not supported`);
  241. const rule = P.MonadicParser.regexp(getNamesRegex(name, ks.abbr)).map(mapFn);
  242. keywordsList.push(rule);
  243. });
  244. return keywordsList;
  245. }
  246. export function getFunctionRules(functions: FunctionDict, argRule: P.MonadicParser<any>) {
  247. const functionsList: P.MonadicParser<any>[] = [];
  248. const begRule = P.MonadicParser.regexp(/\(\s*/);
  249. const endRule = P.MonadicParser.regexp(/\s*\)/);
  250. Object.keys(functions).sort(strLenSortFn).forEach(name => {
  251. const fs = functions[name];
  252. const mapFn = fs.map ? fs.map : makeError(`function '${name}' not supported`);
  253. const rule = P.MonadicParser.regexp(new RegExp(name, 'i')).skip(begRule).then(argRule).skip(endRule).map(mapFn);
  254. functionsList.push(rule);
  255. });
  256. return functionsList;
  257. }
  258. // const rule = P.regex(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  259. // if (ps.isUnsupported) errorFn()
  260. // return ps.property
  261. // })
  262. export function getPropertyNameRules(properties: PropertyDict, lookahead: RegExp) {
  263. const list: P.MonadicParser<any>[] = [];
  264. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  265. const ps = properties[name];
  266. const errorFn = makeError(`property '${name}' not supported`);
  267. const rule = (P.MonadicParser as any).regexp(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  268. if (ps.isUnsupported) errorFn();
  269. return ps.property;
  270. });
  271. list.push(rule);
  272. });
  273. return list;
  274. }
  275. export function getReservedWords(properties: PropertyDict, keywords: KeywordDict, operators: OperatorList, functions?: FunctionDict) {
  276. const w: string[] = [];
  277. for (const name in properties) {
  278. w.push(name);
  279. if (properties[name].abbr) w.push(...properties[name].abbr!);
  280. }
  281. for (const name in keywords) {
  282. w.push(name);
  283. if (keywords[name].abbr) w.push(...keywords[name].abbr!);
  284. }
  285. operators.forEach(o => {
  286. w.push(o.name);
  287. if (o.abbr) w.push(...o.abbr);
  288. });
  289. return w;
  290. }
  291. export function atomNameSet(ids: string[]) {
  292. return B.core.type.set(ids.map(B.atomName));
  293. }
  294. export function asAtoms(e: Expression) {
  295. return B.struct.generator.queryInSelection({
  296. 0: e,
  297. query: B.struct.generator.atomGroups()
  298. });
  299. }
  300. export function wrapValue(property: any, value: any, sstrucDict?: any) {
  301. switch (property.head) {
  302. case 'structure-query.atom-property.macromolecular.label_atom_id':
  303. return B.atomName(value);
  304. case 'structure-query.atom-property.core.element-symbol':
  305. return B.es(value);
  306. case 'structure-query.atom-property.macromolecular.secondary-structure-flags':
  307. if (sstrucDict) {
  308. value = [sstrucDict[value.toUpperCase()] || 'none'];
  309. }
  310. return B.struct.type.secondaryStructureFlags([value]);
  311. default:
  312. return value;
  313. }
  314. }
  315. const propPrefix = 'structure-query.atom-property.macromolecular.';
  316. const entityProps = ['entityKey', 'label_entity_id', 'entityType'];
  317. const chainProps = ['chainKey', 'label_asym_id', 'label_entity_id', 'auth_asym_id', 'entityType'];
  318. const residueProps = ['residueKey', 'label_comp_id', 'label_seq_id', 'auth_comp_id', 'auth_seq_id', 'pdbx_formal_charge', 'secondaryStructureKey', 'secondaryStructureFlags', 'isModified', 'modifiedParentName'];
  319. export function testLevel(property: any) {
  320. if (property.head.startsWith(propPrefix)) {
  321. const name = property.head.substr(propPrefix.length);
  322. if (entityProps.indexOf(name) !== -1) return 'entity-test' as string;
  323. if (chainProps.indexOf(name) !== -1) return 'chain-test' as string;
  324. if (residueProps.indexOf(name) !== -1) return 'residue-test' as string;
  325. }
  326. return 'atom-test' as string;
  327. }
  328. const flagProps = [
  329. 'structure-query.atom-property.macromolecular.secondary-structure-flags'
  330. ];
  331. export function valuesTest(property: any, values: any[]) {
  332. if (flagProps.indexOf(property.head) !== -1) {
  333. const name = values[0].head;
  334. const flags: any[] = [];
  335. values.forEach(v => flags.push(...v.args[0]));
  336. return B.core.flags.hasAny([property, { head: name, args: flags }]);
  337. } else {
  338. if (values.length === 1) {
  339. return B.core.rel.eq([property, values[0]]);
  340. } else if (values.length > 1) {
  341. return B.core.set.has([B.core.type.set(values), property]);
  342. }
  343. }
  344. }
  345. export function resnameExpr(resnameList: string[]) {
  346. return B.struct.generator.atomGroups({
  347. 'residue-test': B.core.set.has([
  348. B.core.type.set(resnameList),
  349. B.ammp('label_comp_id')
  350. ])
  351. });
  352. }