helper.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /**
  2. * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  6. * @author Koya Sakuma
  7. */
  8. // import * as Q from 'parsimmon';
  9. import * as P from '../../mol-util/monadic-parser';
  10. import { MolScriptBuilder } from '../../mol-script/language/builder';
  11. const B = MolScriptBuilder;
  12. import { Expression } from '../language/expression';
  13. import { KeywordDict, PropertyDict, FunctionDict, OperatorList } from './types';
  14. export function escapeRegExp(s: String) {
  15. return String(s).replace(/[\\^$*+?.()|[\]{}]/g, '\\$&');
  16. }
  17. // Takes a parser for the prefix operator, and a parser for the base thing being
  18. // parsed, and parses as many occurrences as possible of the prefix operator.
  19. // Note that the parser is created using `P.lazy` because it's recursive. It's
  20. // valid for there to be zero occurrences of the prefix operator.
  21. export function prefix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  22. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => {
  23. return P.MonadicParser.seq(opParser, parser)
  24. .map(x => mapFn(...x))
  25. .or(nextParser);
  26. });
  27. return parser;
  28. }
  29. // Ideally this function would be just like `PREFIX` but reordered like
  30. // `P.seq(parser, opParser).or(nextParser)`, but that doesn't work. The
  31. // reason for that is that Parsimmon will get stuck in infinite recursion, since
  32. // the very first rule. Inside `parser` is to match parser again. Alternatively,
  33. // you might think to try `nextParser.or(P.seq(parser, opParser))`, but
  34. // that won't work either because in a call to `.or` (aka `P.alt`), Parsimmon
  35. // takes the first possible match, even if subsequent matches are longer, so the
  36. // parser will never actually look far enough ahead to see the postfix
  37. // operators.
  38. export function postfix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  39. // Because we can't use recursion like stated above, we just match a flat list
  40. // of as many occurrences of the postfix operator as possible, then use
  41. // `.reduce` to manually nest the list.
  42. //
  43. // Example:
  44. //
  45. // INPUT :: "4!!!"
  46. // PARSE :: [4, "factorial", "factorial", "factorial"]
  47. // REDUCE :: ["factorial", ["factorial", ["factorial", 4]]]
  48. return P.MonadicParser.seqMap(/* no seqMap() in monadic-parser.ts, any suitable replacement? */
  49. nextParser,
  50. opParser.many(),
  51. (x: any, suffixes: any) =>
  52. suffixes.reduce((acc: any, x: any) => {
  53. return mapFn(x, acc);
  54. }, x)
  55. );
  56. }
  57. // Takes a parser for all the operators at this precedence level, and a parser
  58. // that parsers everything at the next precedence level, and returns a parser
  59. // that parses as many binary operations as possible, associating them to the
  60. // right. (e.g. 1^2^3 is 1^(2^3) not (1^2)^3)
  61. export function binaryRight(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  62. const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() =>
  63. nextParser.chain(next =>
  64. P.MonadicParser.seq(
  65. opParser,
  66. P.MonadicParser.of(next),
  67. parser
  68. ).map((x) => {
  69. console.log(x);
  70. return x;
  71. }).or(P.MonadicParser.of(next))
  72. )
  73. );
  74. return parser;
  75. }
  76. // Takes a parser for all the operators at this precedence level, and a parser
  77. // that parsers everything at the next precedence level, and returns a parser
  78. // that parses as many binary operations as possible, associating them to the
  79. // left. (e.g. 1-2-3 is (1-2)-3 not 1-(2-3))
  80. export function binaryLeft(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) {
  81. // We run into a similar problem as with the `POSTFIX` parser above where we
  82. // can't recurse in the direction we want, so we have to resort to parsing an
  83. // entire list of operator chunks and then using `.reduce` to manually nest
  84. // them again.
  85. //
  86. // Example:
  87. //
  88. // INPUT :: "1+2+3"
  89. // PARSE :: [1, ["+", 2], ["+", 3]]
  90. // REDUCE :: ["+", ["+", 1, 2], 3]
  91. return P.MonadicParser.seqMap(
  92. nextParser,
  93. P.MonadicParser.seq(opParser, nextParser).many(),
  94. (first: any, rest: any) => {
  95. return rest.reduce((acc: any, ch: any) => {
  96. const [op, another] = ch;
  97. return mapFn(op, acc, another);
  98. }, first);
  99. }
  100. );
  101. }
  102. /**
  103. * combine operators of decreasing binding strength
  104. */
  105. export function combineOperators(opList: any[], rule: P.MonadicParser<any>) {
  106. const x = opList.reduce(
  107. (acc, level) => {
  108. const map = level.isUnsupported ? makeError(`operator '${level.name}' not supported`) : level.map;
  109. return level.type(level.rule, acc, map);
  110. },
  111. rule
  112. );
  113. return x;
  114. }
  115. export function infixOp(re: RegExp, group: number = 0) {
  116. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace));
  117. // return P.optWhitespace.then(P.MonadicParser.regexp(re, group).lookahead(P.whitespace))
  118. // return P.MonadicParser.regexp(re, group).skip(P.whitespace
  119. }
  120. export function prefixOp(re: RegExp, group: number = 0) {
  121. return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace);
  122. }
  123. export function postfixOp(re: RegExp, group: number = 0) {
  124. return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group));
  125. }
  126. // export function functionOp (re: RegExp, rule: P.MonadicParser<any>) {
  127. // return P.MonadicParser.regexp(re, group).wrap(P.string('('), P.string(')'))
  128. // }
  129. export function ofOp(name: string, short?: string) {
  130. const op = short ? `${name}|${escapeRegExp(short)}` : name;
  131. const re = RegExp(`(${op})\\s+([-+]?[0-9]*\\.?[0-9]+)\\s+OF`, 'i');
  132. return infixOp(re, 2).map(parseFloat);
  133. }
  134. export function makeError(msg: string) {
  135. return function () {
  136. throw new Error(msg);
  137. };
  138. }
  139. export function andExpr(selections: any[]) {
  140. if (selections.length === 1) {
  141. return selections[0];
  142. } else if (selections.length > 1) {
  143. return B.core.logic.and(selections);
  144. } else {
  145. return undefined;
  146. }
  147. }
  148. export function orExpr(selections: any[]) {
  149. if (selections.length === 1) {
  150. return selections[0];
  151. } else if (selections.length > 1) {
  152. return B.core.logic.or(selections);
  153. } else {
  154. return undefined;
  155. }
  156. }
  157. export function testExpr(property: any, args: any) {
  158. if (args && args.op !== undefined && args.val !== undefined) {
  159. const opArgs = [property, args.val];
  160. switch (args.op) {
  161. case '=': return B.core.rel.eq(opArgs);
  162. case '!=': return B.core.rel.neq(opArgs);
  163. case '>': return B.core.rel.gr(opArgs);
  164. case '<': return B.core.rel.lt(opArgs);
  165. case '>=': return B.core.rel.gre(opArgs);
  166. case '<=': return B.core.rel.lte(opArgs);
  167. default: throw new Error(`operator '${args.op}' not supported`);
  168. }
  169. } else if (args && args.flags !== undefined) {
  170. return B.core.flags.hasAny([property, args.flags]);
  171. } else if (args && args.min !== undefined && args.max !== undefined) {
  172. return B.core.rel.inRange([property, args.min, args.max]);
  173. } else if (!Array.isArray(args)) {
  174. return B.core.rel.eq([property, args]);
  175. } else if (args.length > 1) {
  176. return B.core.set.has([B.core.type.set(args), property]);
  177. } else {
  178. return B.core.rel.eq([property, args[0]]);
  179. }
  180. }
  181. export function invertExpr(selection: Expression) {
  182. return B.struct.generator.queryInSelection({
  183. 0: selection, query: B.struct.generator.all(), 'in-complement': true }
  184. );
  185. }
  186. export function strLenSortFn(a: string, b: string) {
  187. return a.length < b.length ? 1 : -1;
  188. }
  189. function getNamesRegex(name: string, abbr?: string[]) {
  190. const names = (abbr ? [name].concat(abbr) : [name])
  191. .sort(strLenSortFn).map(escapeRegExp).join('|');
  192. return RegExp(`${names}`, 'i');
  193. }
  194. export function getPropertyRules(properties: PropertyDict) {
  195. // in keyof typeof properties
  196. const propertiesDict: { [name: string]: P.MonadicParser<any> } = {};
  197. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  198. const ps = properties[name];
  199. const errorFn = makeError(`property '${name}' not supported`);
  200. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  201. if (ps.isUnsupported) errorFn();
  202. return testExpr(ps.property, ps.map(x));
  203. });
  204. if (!ps.isNumeric) {
  205. propertiesDict[name] = rule;
  206. }
  207. });
  208. return propertiesDict;
  209. }
  210. export function getNamedPropertyRules(properties: PropertyDict) {
  211. const namedPropertiesList: P.MonadicParser<any>[] = [];
  212. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  213. const ps = properties[name];
  214. const errorFn = makeError(`property '${name}' not supported`);
  215. const rule = P.MonadicParser.regexp(ps.regex).map((x: any) => {
  216. if (ps.isUnsupported) errorFn();
  217. return testExpr(ps.property, ps.map(x));
  218. });
  219. const nameRule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).trim(P.MonadicParser.optWhitespace);
  220. const groupMap = (x: any) => B.struct.generator.atomGroups({ [ps.level]: x });
  221. if (ps.isNumeric) {
  222. namedPropertiesList.push(
  223. nameRule.then(P.MonadicParser.seq(
  224. P.MonadicParser.regexp(/>=|<=|=|!=|>|</).trim(P.MonadicParser.optWhitespace),
  225. P.MonadicParser.regexp(ps.regex).map(ps.map)
  226. )).map((x: any) => {
  227. if (ps.isUnsupported) errorFn();
  228. return testExpr(ps.property, { op: x[0], val: x[1] });
  229. }).map(groupMap)
  230. );
  231. } else {
  232. namedPropertiesList.push(nameRule.then(rule).map(groupMap));
  233. }
  234. });
  235. return namedPropertiesList;
  236. }
  237. export function getKeywordRules(keywords: KeywordDict) {
  238. const keywordsList: P.MonadicParser<any>[] = [];
  239. Object.keys(keywords).sort(strLenSortFn).forEach(name => {
  240. const ks = keywords[name];
  241. const mapFn = ks.map ? ks.map : makeError(`keyword '${name}' not supported`);
  242. const rule = P.MonadicParser.regexp(getNamesRegex(name, ks.abbr)).map(mapFn);
  243. keywordsList.push(rule);
  244. });
  245. return keywordsList;
  246. }
  247. export function getFunctionRules(functions: FunctionDict, argRule: P.MonadicParser<any>) {
  248. const functionsList: P.MonadicParser<any>[] = [];
  249. const begRule = P.MonadicParser.regexp(/\(\s*/);
  250. const endRule = P.MonadicParser.regexp(/\s*\)/);
  251. Object.keys(functions).sort(strLenSortFn).forEach(name => {
  252. const fs = functions[name];
  253. const mapFn = fs.map ? fs.map : makeError(`function '${name}' not supported`);
  254. const rule = P.MonadicParser.regexp(new RegExp(name, 'i')).skip(begRule).then(argRule).skip(endRule).map(mapFn);
  255. functionsList.push(rule);
  256. });
  257. return functionsList;
  258. }
  259. // const rule = P.regex(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  260. // if (ps.isUnsupported) errorFn()
  261. // return ps.property
  262. // })
  263. export function getPropertyNameRules(properties: PropertyDict, lookahead: RegExp) {
  264. const list: P.MonadicParser<any>[] = [];
  265. Object.keys(properties).sort(strLenSortFn).forEach(name => {
  266. const ps = properties[name];
  267. const errorFn = makeError(`property '${name}' not supported`);
  268. const rule = (P.MonadicParser as any).regexp(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => {
  269. if (ps.isUnsupported) errorFn();
  270. return ps.property;
  271. });
  272. list.push(rule);
  273. });
  274. return list;
  275. }
  276. export function getReservedWords(properties: PropertyDict, keywords: KeywordDict, operators: OperatorList, functions?: FunctionDict) {
  277. const w: string[] = [];
  278. for (const name in properties) {
  279. w.push(name);
  280. if (properties[name].abbr) w.push(...properties[name].abbr!);
  281. }
  282. for (const name in keywords) {
  283. w.push(name);
  284. if (keywords[name].abbr) w.push(...keywords[name].abbr!);
  285. }
  286. operators.forEach(o => {
  287. w.push(o.name);
  288. if (o.abbr) w.push(...o.abbr);
  289. });
  290. return w;
  291. }
  292. export function atomNameSet(ids: string[]) {
  293. return B.core.type.set(ids.map(B.atomName));
  294. }
  295. export function asAtoms(e: Expression) {
  296. return B.struct.generator.queryInSelection({
  297. 0: e,
  298. query: B.struct.generator.all()
  299. });
  300. }
  301. export function wrapValue(property: any, value: any, sstrucDict?: any) {
  302. switch (property.head.name) {
  303. case 'structure-query.atom-property.macromolecular.label_atom_id':
  304. return B.atomName(value);
  305. case 'structure-query.atom-property.core.element-symbol':
  306. return B.es(value);
  307. case 'structure-query.atom-property.macromolecular.secondary-structure-flags':
  308. if (sstrucDict) {
  309. value = [sstrucDict[value.toUpperCase()] || 'none'];
  310. }
  311. return B.struct.type.secondaryStructureFlags([value]);
  312. default:
  313. return value;
  314. }
  315. }
  316. const propPrefix = 'structure-query.atom-property.macromolecular.';
  317. const entityProps = ['entityKey', 'label_entity_id', 'entityType'];
  318. const chainProps = ['chainKey', 'label_asym_id', 'label_entity_id', 'auth_asym_id', 'entityType'];
  319. const residueProps = ['residueKey', 'label_comp_id', 'label_seq_id', 'auth_comp_id', 'auth_seq_id', 'pdbx_formal_charge', 'secondaryStructureKey', 'secondaryStructureFlags', 'isModified', 'modifiedParentName'];
  320. export function testLevel(property: any) {
  321. if (property.head.name.startsWith(propPrefix)) {
  322. const name = property.head.name.substr(propPrefix.length);
  323. if (entityProps.indexOf(name) !== -1) return 'entity-test' as string;
  324. if (chainProps.indexOf(name) !== -1) return 'chain-test' as string;
  325. if (residueProps.indexOf(name) !== -1) return 'residue-test' as string;
  326. }
  327. return 'atom-test' as string;
  328. }
  329. const flagProps = [
  330. 'structure-query.atom-property.macromolecular.secondary-structure-flags'
  331. ];
  332. export function valuesTest(property: any, values: any[]) {
  333. if (flagProps.indexOf(property.head.name) !== -1) {
  334. const name = values[0].head;
  335. const flags: any[] = [];
  336. values.forEach(v => flags.push(...v.args[0]));
  337. return B.core.flags.hasAny([property, { head: name, args: flags }]);
  338. } else {
  339. if (values.length === 1) {
  340. return B.core.rel.eq([property, values[0]]);
  341. } else if (values.length > 1) {
  342. return B.core.set.has([B.core.type.set(values), property]);
  343. }
  344. }
  345. }
  346. export function resnameExpr(resnameList: string[]) {
  347. return B.struct.generator.atomGroups({
  348. 'residue-test': B.core.set.has([
  349. B.core.type.set(resnameList),
  350. B.ammp('label_comp_id')
  351. ])
  352. });
  353. }