helper.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  6. * @author Koya Sakuma
  7. */
  8. // import * as Q from 'parsimmon';
  9. import * as P from '../../mol-util/monadic-parser';
  10. import { MolScriptBuilder } from '../../mol-script/language/builder';
  11. const B = MolScriptBuilder;
  12. import { Expression } from '../language/expression';
  13. import { KeywordDict, PropertyDict, FunctionDict, OperatorList } from './types';
  14. export function escapeRegExp(s: String) {
  15. return String(s).replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
  16. }
  17. // Takes a parser for the prefix operator, and a parser for the base thing being
  18. // parsed, and parses as many occurrences as possible of the prefix operator.
  19. // Note that the parser is created using `P.lazy` because it's recursive. It's
  20. // valid for there to be zero occurrences of the prefix operator.
  21. export function prefix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  22. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  23. return P.MonadicParser.seq(opParser, parser)
  24. .map(x => mapFn(...x))
  25. .or(nextParser);
  26. });
  27. return parser;
  28. }
  29. export function prefixRemoveKet(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  30. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  31. return P.MonadicParser.seq(opParser, parser.skip(P.MonadicParser.string(')')))
  32. .map(x => mapFn(...x))
  33. .or(nextParser);
  34. });
  35. return parser;
  36. }
  37. // Ideally this function would be just like `PREFIX` but reordered like
  38. // `P.seq(parser, opParser).or(nextParser)`, but that doesn't work. The
  39. // reason for that is that Parsimmon will get stuck in infinite recursion, since
  40. // the very first rule. Inside `parser` is to match parser again. Alternatively,
  41. // you might think to try `nextParser.or(P.seq(parser, opParser))`, but
  42. // that won't work either because in a call to `.or` (aka `P.alt`), Parsimmon
  43. // takes the first possible match, even if subsequent matches are longer, so the
  44. // parser will never actually look far enough ahead to see the postfix
  45. // operators.
  46. export function postfix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  47. // Because we can't use recursion like stated above, we just match a flat list
  48. // of as many occurrences of the postfix operator as possible, then use
  49. // `.reduce` to manually nest the list.
  50. //
  51. // Example:
  52. //
  53. // INPUT :: "4!!!"
  54. // PARSE :: [4, "factorial", "factorial", "factorial"]
  55. // REDUCE :: ["factorial", ["factorial", ["factorial", 4]]]
  56. return P.MonadicParser.seqMap(/* no seqMap() in monadic-parser.ts, any suitable replacement? */
  57. nextParser,
  58. opParser.many(),
  59. (x: any, suffixes: any) =>
  60. suffixes.reduce((acc: any, x: any) => {
  61. return mapFn(x, acc);
  62. }, x)
  63. );
  64. }
  65. // Takes a parser for all the operators at this precedence level, and a parser
  66. // that parsers everything at the next precedence level, and returns a parser
  67. // that parses as many binary operations as possible, associating them to the
  68. // right. (e.g. 1^2^3 is 1^(2^3) not (1^2)^3)
  69. export function binaryRight(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  70. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() =>
  71. nextParser.chain(next =>
  72. P.MonadicParser.seq(
  73. opParser,
  74. P.MonadicParser.of(next),
  75. parser
  76. ).map((x) => {
  77. console.log(x);
  78. return x;
  79. }).or(P.MonadicParser.of(next))
  80. )
  81. );
  82. return parser;
  83. }
  84. // Takes a parser for all the operators at this precedence level, and a parser
  85. // that parsers everything at the next precedence level, and returns a parser
  86. // that parses as many binary operations as possible, associating them to the
  87. // left. (e.g. 1-2-3 is (1-2)-3 not 1-(2-3))
  88. export function binaryLeft(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  89. // We run into a similar problem as with the `POSTFIX` parser above where we
  90. // can't recurse in the direction we want, so we have to resort to parsing an
  91. // entire list of operator chunks and then using `.reduce` to manually nest
  92. // them again.
  93. //
  94. // Example:
  95. //
  96. // INPUT :: "1+2+3"
  97. // PARSE :: [1, ["+", 2], ["+", 3]]
  98. // REDUCE :: ["+", ["+", 1, 2], 3]
  99. return P.MonadicParser.seqMap(
  100. nextParser,
  101. P.MonadicParser.seq(opParser, nextParser).many(),
  102. (first: any, rest: any) => {
  103. return rest.reduce((acc: any, ch: any) => {
  104. const [op, another] = ch;
  105. return mapFn(op, acc, another);
  106. }, first);
  107. }
  108. );
  109. }
  110. /**
  111. * combine operators of decreasing binding strength
  112. */
  113. export function combineOperators(opList: any[], rule: P.MonadicParser<any>) {
  114. const x = opList.reduce(
  115. (acc, level) => {
  116. const map = level.isUnsupported ? makeError(`operator '${level.name}' not supported`) : level.map;
  117. return level.type(level.rule, acc, map);
  118. },
  119. rule
  120. );
  121. return x;
  122. }
  123. export function infixOp(re: RegExp, group: number = 0) {
  124. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace));
  125. // return P.optWhitespace.then(P.MonadicParser.regexp(re, group).lookahead(P.whitespace))
  126. // return P.MonadicParser.regexp(re, group).skip(P.whitespace
  127. }
  128. export function prefixOp(re: RegExp, group: number = 0) {
  129. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace);
  130. }
  131. export function prefixOpNoWhiteSpace(re: RegExp, group: number = 0) {
  132. // return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.regexp(/\s*/));
  133. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.regexp(/\s*/));
  134. }
  135. export function postfixOp(re: RegExp, group: number = 0) {
  136. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group));
  137. }
  138. // export function functionOp (re: RegExp, rule: P.MonadicParser<any>) {
  139. // return P.MonadicParser.regexp(re, group).wrap(P.string('('), P.string(')'))
  140. // }
  141. export function ofOp(name: string, short?: string) {
  142. const op = short ? `${name}|${escapeRegExp(short)}` : name;
  143. const re = RegExp(`(${op})\\s+([-+]?[0-9]*\\.?[0-9]+)\\s+OF`, 'i');
  144. return infixOp(re, 2).map(parseFloat);
  145. }
  146. export function makeError(msg: string) {
  147. return function () {
  148. throw new Error(msg);
  149. };
  150. }
  151. export function andExpr(selections: any[]) {
  152. if (selections.length === 1) {
  153. return selections[0];
  154. } else if (selections.length > 1) {
  155. return B.core.logic.and(selections);
  156. } else {
  157. return undefined;
  158. }
  159. }
  160. export function orExpr(selections: any[]) {
  161. if (selections.length === 1) {
  162. return selections[0];
  163. } else if (selections.length > 1) {
  164. return B.core.logic.or(selections);
  165. } else {
  166. return undefined;
  167. }
  168. }
  169. export function testExpr(property: any, args: any) {
  170. if (args && args.op !== undefined && args.val !== undefined) {
  171. const opArgs = [property, args.val];
  172. switch (args.op) {
  173. case '=': return B.core.rel.eq(opArgs);
  174. case '!=': return B.core.rel.neq(opArgs);
  175. case '>': return B.core.rel.gr(opArgs);
  176. case '<': return B.core.rel.lt(opArgs);
  177. case '>=': return B.core.rel.gre(opArgs);
  178. case '<=': return B.core.rel.lte(opArgs);
  179. default: throw new Error(`operator '${args.op}' not supported`);
  180. }
  181. } else if (args && args.flags !== undefined) {
  182. return B.core.flags.hasAny([property, args.flags]);
  183. } else if (args && args.min !== undefined && args.max !== undefined) {
  184. return B.core.rel.inRange([property, args.min, args.max]);
  185. } else if (!Array.isArray(args)) {
  186. return B.core.rel.eq([property, args]);
  187. } else if (args.length > 1) {
  188. return B.core.set.has([B.core.type.set(args), property]);
  189. } else {
  190. return B.core.rel.eq([property, args[0]]);
  191. }
  192. }
  193. export function invertExpr(selection: Expression) {
  194. return B.struct.generator.queryInSelection({
  195. 0: selection, query: B.struct.generator.all(), 'in-complement': true }
  196. );
  197. }
  198. export function strLenSortFn(a: string, b: string) {
  199. return a.length < b.length ? 1 : -1;
  200. }
  201. function getNamesRegex(name: string, abbr?: string[]) {
  202. const names = (abbr ? [name].concat(abbr) : [name])
  203. .sort(strLenSortFn).map(escapeRegExp).join('|');
  204. return RegExp(`${names}`, 'i');
  205. }
  206. export function getPropertyRules(properties: PropertyDict) {
  207. // in keyof typeof properties
  208. const propertiesDict: { [name: string]: P.MonadicParser<any> } = {};
  209. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  210. const ps = properties[name];
  211. const errorFn = makeError(`property '${name}' not supported`);
  212. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  213. if (ps.isUnsupported) errorFn();
  214. return testExpr(ps.property, ps.map(x));
  215. });
  216. if (!ps.isNumeric) {
  217. propertiesDict[name] = rule;
  218. }
  219. });
  220. return propertiesDict;
  221. }
  222. export function getNamedPropertyRules(properties: PropertyDict) {
  223. const namedPropertiesList: P.MonadicParser<any>[] = [];
  224. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  225. const ps = properties[name];
  226. const errorFn = makeError(`property '${name}' not supported`);
  227. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  228. if (ps.isUnsupported) errorFn();
  229. return testExpr(ps.property, ps.map(x));
  230. });
  231. const nameRule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).trim(P.MonadicParser.optWhitespace);
  232. const groupMap = (x: any) => B.struct.generator.atomGroups({ [ps.level]: x });
  233. if (ps.isNumeric) {
  234. namedPropertiesList.push(
  235. nameRule.then(P.MonadicParser.seq(
  236. P.MonadicParser.regexp(/>=|<=|=|!=|>|</).trim(P.MonadicParser.optWhitespace),
  237. P.MonadicParser.regexp(ps.regex).map(ps.map)
  238. )).map((x: any) => {
  239. if (ps.isUnsupported) errorFn();
  240. return testExpr(ps.property, { op: x[0], val: x[1] });
  241. }).map(groupMap)
  242. );
  243. } else {
  244. namedPropertiesList.push(nameRule.then(rule).map(groupMap));
  245. }
  246. });
  247. return namedPropertiesList;
  248. }
  249. export function getKeywordRules(keywords: KeywordDict) {
  250. const keywordsList: P.MonadicParser<any>[] = [];
  251. Object.keys(keywords).sort(strLenSortFn).forEach(name => {
  252. const ks = keywords[name];
  253. const mapFn = ks.map ? ks.map : makeError(`keyword '${name}' not supported`);
  254. const rule = P.MonadicParser.regexp(getNamesRegex(name, ks.abbr)).map(mapFn);
  255. keywordsList.push(rule);
  256. });
  257. return keywordsList;
  258. }
  259. export function getFunctionRules(functions: FunctionDict, argRule: P.MonadicParser<any>) {
  260. const functionsList: P.MonadicParser<any>[] = [];
  261. const begRule = P.MonadicParser.regexp(/\(\s*/);
  262. const endRule = P.MonadicParser.regexp(/\s*\)/);
  263. Object.keys(functions).sort(strLenSortFn).forEach(name => {
  264. const fs = functions[name];
  265. const mapFn = fs.map ? fs.map : makeError(`function '${name}' not supported`);
  266. const rule = P.MonadicParser.regexp(new RegExp(name, 'i')).skip(begRule).then(argRule).skip(endRule).map(mapFn);
  267. functionsList.push(rule);
  268. });
  269. return functionsList;
  270. }
  271. // const rule = P.regex(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  272. // if (ps.isUnsupported) errorFn()
  273. // return ps.property
  274. // })
  275. export function getPropertyNameRules(properties: PropertyDict, lookahead: RegExp) {
  276. const list: P.MonadicParser<any>[] = [];
  277. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  278. const ps = properties[name];
  279. const errorFn = makeError(`property '${name}' not supported`);
  280. const rule = (P.MonadicParser as any).regexp(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  281. if (ps.isUnsupported) errorFn();
  282. return ps.property;
  283. });
  284. list.push(rule);
  285. });
  286. return list;
  287. }
  288. export function getReservedWords(properties: PropertyDict, keywords: KeywordDict, operators: OperatorList, functions?: FunctionDict) {
  289. const w: string[] = [];
  290. for (const name in properties) {
  291. w.push(name);
  292. if (properties[name].abbr) w.push(...properties[name].abbr!);
  293. }
  294. for (const name in keywords) {
  295. w.push(name);
  296. if (keywords[name].abbr) w.push(...keywords[name].abbr!);
  297. }
  298. operators.forEach(o => {
  299. w.push(o.name);
  300. if (o.abbr) w.push(...o.abbr);
  301. });
  302. return w;
  303. }
  304. export function atomNameSet(ids: string[]) {
  305. return B.core.type.set(ids.map(B.atomName));
  306. }
  307. export function asAtoms(e: Expression) {
  308. return B.struct.generator.queryInSelection({
  309. 0: e,
  310. query: B.struct.generator.all()
  311. });
  312. }
  313. export function wrapValue(property: any, value: any, sstrucDict?: any) {
  314. switch (property.head.name) {
  315. case 'structure-query.atom-property.macromolecular.label_atom_id':
  316. return B.atomName(value);
  317. case 'structure-query.atom-property.core.element-symbol':
  318. return B.es(value);
  319. case 'structure-query.atom-property.macromolecular.secondary-structure-flags':
  320. if (sstrucDict) {
  321. value = [sstrucDict[value.toUpperCase()] || 'none'];
  322. }
  323. return B.struct.type.secondaryStructureFlags([value]);
  324. default:
  325. return value;
  326. }
  327. }
  328. const propPrefix = 'structure-query.atom-property.macromolecular.';
  329. const entityProps = ['entityKey', 'label_entity_id', 'entityType'];
  330. const chainProps = ['chainKey', 'label_asym_id', 'label_entity_id', 'auth_asym_id', 'entityType'];
  331. const residueProps = ['residueKey', 'label_comp_id', 'label_seq_id', 'auth_comp_id', 'auth_seq_id', 'pdbx_formal_charge', 'secondaryStructureKey', 'secondaryStructureFlags', 'isModified', 'modifiedParentName'];
  332. export function testLevel(property: any) {
  333. if (property.head.name.startsWith(propPrefix)) {
  334. const name = property.head.name.substr(propPrefix.length);
  335. if (entityProps.indexOf(name) !== -1) return 'entity-test' as string;
  336. if (chainProps.indexOf(name) !== -1) return 'chain-test' as string;
  337. if (residueProps.indexOf(name) !== -1) return 'residue-test' as string;
  338. }
  339. return 'atom-test' as string;
  340. }
  341. const flagProps = [
  342. 'structure-query.atom-property.macromolecular.secondary-structure-flags'
  343. ];
  344. export function valuesTest(property: any, values: any[]) {
  345. if (flagProps.indexOf(property.head.name) !== -1) {
  346. const name = values[0].head;
  347. const flags: any[] = [];
  348. values.forEach(v => flags.push(...v.args[0]));
  349. return B.core.flags.hasAny([property, { head: name, args: flags }]);
  350. } else {
  351. if (values.length === 1) {
  352. return B.core.rel.eq([property, values[0]]);
  353. } else if (values.length > 1) {
  354. return B.core.set.has([B.core.type.set(values), property]);
  355. }
  356. }
  357. }
  358. export function resnameExpr(resnameList: string[]) {
  359. return B.struct.generator.atomGroups({
  360. 'residue-test': B.core.set.has([
  361. B.core.type.set(resnameList),
  362. B.ammp('label_comp_id')
  363. ])
  364. });
  365. }