monadic-parser.ts 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. /**
  7. * Adapted from Parsimmon (https://github.com/jneen/parsimmon)
  8. * Copyright (c) 2011-present J. Adkisson (http://jneen.net).
  9. */
  10. export class MonadicParser<A> {
  11. constructor(public _: MonadicParser.Action<A>) { }
  12. parse(input: string): MonadicParser.ParseResult<A> {
  13. const result = this.skip(MonadicParser.eof)._(input, 0);
  14. if (result.status) {
  15. return { success: true, value: result.value };
  16. }
  17. return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected };
  18. };
  19. tryParse(str: string) {
  20. const result = this.parse(str);
  21. if (result.success) {
  22. return result.value;
  23. } else {
  24. const msg = formatError(str, result);
  25. const err = new Error(msg);
  26. throw err;
  27. }
  28. }
  29. or<B>(alternative: MonadicParser<B>): MonadicParser<A | B> {
  30. return MonadicParser.alt(this, alternative);
  31. }
  32. trim<B>(parser: MonadicParser<B> | string): MonadicParser<A> {
  33. return this.wrap(parser, parser);
  34. }
  35. wrap<L, R>(leftParser: MonadicParser<L> | string, rightParser: MonadicParser<R> | string): MonadicParser<A> {
  36. return seqPick(1,
  37. typeof leftParser === 'string' ? MonadicParser.string(leftParser) : leftParser,
  38. this,
  39. typeof rightParser === 'string' ? MonadicParser.string(rightParser) : rightParser);
  40. }
  41. thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) {
  42. return wrapper(this);
  43. }
  44. then<B>(next: MonadicParser<B>): MonadicParser<B> {
  45. return seqPick(1, this, next);
  46. }
  47. many() {
  48. return new MonadicParser((input, i) => {
  49. const accum: A[] = [];
  50. let result: MonadicParser.Result<A> | undefined = void 0;
  51. while (true) {
  52. result = mergeReplies(this._(input, i), result);
  53. if (result.status) {
  54. if (i === result.index) {
  55. throw new Error('infinite loop detected in .many() parser --- calling .many() on a parser which can accept zero characters is usually the cause');
  56. }
  57. i = result.index;
  58. accum.push(result.value);
  59. } else {
  60. return mergeReplies(makeSuccess(i, accum), result);
  61. }
  62. }
  63. });
  64. };
  65. times(min: number, _max?: number): MonadicParser<A[]> {
  66. const max = typeof _max === 'undefined' ? min : _max;
  67. return new MonadicParser((input, i) => {
  68. const accum: A[] = [];
  69. let result: MonadicParser.Result<A> | undefined = void 0;
  70. let prevResult: MonadicParser.Result<A> | undefined = void 0;
  71. let times: number;
  72. for (times = 0; times < min; times++) {
  73. result = this._(input, i);
  74. prevResult = mergeReplies(result, prevResult);
  75. if (result.status) {
  76. i = result.index;
  77. accum.push(result.value);
  78. } else {
  79. return prevResult as any;
  80. }
  81. }
  82. for (; times < max; times += 1) {
  83. result = this._(input, i);
  84. prevResult = mergeReplies(result, prevResult);
  85. if (result.status) {
  86. i = result.index;
  87. accum.push(result.value);
  88. } else {
  89. break;
  90. }
  91. }
  92. return mergeReplies(makeSuccess(i, accum), prevResult);
  93. });
  94. };
  95. result<B>(res: B) {
  96. return this.map(() => res);
  97. };
  98. atMost(n: number) {
  99. return this.times(0, n);
  100. };
  101. atLeast(n: number) {
  102. return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]);
  103. };
  104. map<B>(f: (a: A) => B): MonadicParser<B> {
  105. return new MonadicParser((input, i) => {
  106. const result = this._(input, i);
  107. if (!result.status) {
  108. return result;
  109. }
  110. return mergeReplies(makeSuccess(result.index, f(result.value)), result);
  111. });
  112. }
  113. skip<B>(next: MonadicParser<B>): MonadicParser<A> {
  114. return seqPick(0, this, next);
  115. }
  116. mark(): MonadicParser<MonadicParser.Mark<A>> {
  117. return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ start: r[0], value: r[1], end: r[2] }));
  118. }
  119. node(name: string): MonadicParser<MonadicParser.Node<A>> {
  120. return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ name, start: r[0], value: r[1], end: r[2] }));
  121. };
  122. sepBy<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
  123. return MonadicParser.sepBy(this, separator);
  124. }
  125. sepBy1<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
  126. return MonadicParser.sepBy1(this, separator);
  127. }
  128. lookahead<B>(x: MonadicParser<B>) {
  129. return this.skip(MonadicParser.lookahead(x));
  130. };
  131. notFollowedBy<B>(x: MonadicParser<B>) {
  132. return this.skip(MonadicParser.notFollowedBy(x));
  133. };
  134. desc(expected: string) {
  135. return new MonadicParser((input, i) => {
  136. const reply = this._(input, i);
  137. if (!reply.status) {
  138. reply.expected = [expected];
  139. }
  140. return reply;
  141. });
  142. };
  143. fallback<B>(result: B) {
  144. return this.or(MonadicParser.succeed(result));
  145. };
  146. ap<B>(other: MonadicParser<(x: A) => B>): MonadicParser<B> {
  147. return MonadicParser.seq(other, this).map(([f, x]) => f(x));
  148. };
  149. chain<B>(f: (a: A) => MonadicParser<B>): MonadicParser<B> {
  150. return new MonadicParser<B>((input, i) => {
  151. const result = this._(input, i);
  152. if (!result.status) {
  153. return result as any;
  154. }
  155. const nextParser = f(result.value);
  156. return mergeReplies(nextParser._(input, result.index), result);
  157. });
  158. };
  159. }
  160. export namespace MonadicParser {
  161. export type Action<T> = (input: string, i: number) => MonadicParser.Result<T>
  162. export type ParseResult<T> = ParseSuccess<T> | ParseFailure;
  163. export interface Index {
  164. /** zero-based character offset */
  165. offset: number;
  166. /** one-based line offset */
  167. line: number;
  168. /** one-based column offset */
  169. column: number;
  170. }
  171. export interface ParseSuccess<T> {
  172. success: true,
  173. value: T
  174. }
  175. export interface ParseFailure {
  176. success: false,
  177. index: Index,
  178. expected: string[],
  179. }
  180. export interface Mark<T> {
  181. start: Index;
  182. end: Index;
  183. value: T;
  184. }
  185. export interface Node<T> extends Mark<T> {
  186. name: string
  187. }
  188. export interface Success<T> {
  189. status: true,
  190. value: T,
  191. index: number
  192. }
  193. export interface Failure {
  194. status: false,
  195. furthest: number,
  196. expected: string[]
  197. }
  198. export type Result<T> = Success<T> | Failure
  199. // export function createLanguage(parsers: any) {
  200. // const language: any = {};
  201. // for (const key of Object.keys(parsers)) {
  202. // (function (key) {
  203. // language[key] = lazy(() => parsers[key](language));
  204. // })(key);
  205. // }
  206. // return language;
  207. // }
  208. export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]>
  209. export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]>
  210. export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]>
  211. export function seq<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<[A, B, C, D]>
  212. export function seq<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<[A, B, C, D, E]>
  213. export function seq<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
  214. export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> {
  215. const numParsers = parsers.length;
  216. return new MonadicParser<any[]>((input, index) => {
  217. let result: MonadicParser.Result<any> | undefined;
  218. const accum = new Array(numParsers);
  219. let i = index;
  220. for (let j = 0; j < numParsers; j++) {
  221. result = mergeReplies(parsers[j]._(input, i), result);
  222. if (!result.status) {
  223. return result;
  224. }
  225. accum[j] = result.value;
  226. i = result.index;
  227. }
  228. return mergeReplies(makeSuccess(i, accum), result);
  229. });
  230. }
  231. export function alt<A>(a: MonadicParser<A>): MonadicParser<A>
  232. export function alt<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<A | B>
  233. export function alt<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<A | B | C>
  234. export function alt<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<A | B | C | D>
  235. export function alt<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<A | B | C | D | E>
  236. export function alt<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
  237. export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> {
  238. const numParsers = parsers.length;
  239. if (numParsers === 0) {
  240. return fail('zero alternates');
  241. }
  242. return new MonadicParser((input, i) => {
  243. let result: MonadicParser.Result<any> | undefined;
  244. for (let j = 0; j < parsers.length; j++) {
  245. result = mergeReplies(parsers[j]._(input, i), result);
  246. if (result.status) {
  247. return result;
  248. }
  249. }
  250. return result!;
  251. });
  252. }
  253. export function sepBy<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>): MonadicParser<A[]> {
  254. return sepBy1(parser, separator).or(succeed([]));
  255. }
  256. export function sepBy1<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>) {
  257. const pairs = separator.then(parser).many();
  258. return seq(parser, pairs).map(r => [r[0], ...r[1]]);
  259. }
  260. export function string(str: string) {
  261. const expected = `'${str}'`;
  262. if (str.length === 1) {
  263. const code = str.charCodeAt(0);
  264. return new MonadicParser((input, i) => input.charCodeAt(i) === code ? makeSuccess(i + 1, str) : makeFailure(i, expected));
  265. }
  266. return new MonadicParser((input, i) => {
  267. const j = i + str.length;
  268. if (input.slice(i, j) === str) return makeSuccess(j, str);
  269. else return makeFailure(i, expected);
  270. });
  271. }
  272. function flags(re: RegExp) {
  273. const s = '' + re;
  274. return s.slice(s.lastIndexOf('/') + 1);
  275. }
  276. function anchoredRegexp(re: RegExp) {
  277. return RegExp('^(?:' + re.source + ')', flags(re));
  278. }
  279. export function regexp(re: RegExp, group = 0) {
  280. const anchored = anchoredRegexp(re);
  281. const expected = '' + re;
  282. return new MonadicParser(function (input, i) {
  283. const match = anchored.exec(input.slice(i));
  284. if (match) {
  285. if (0 <= group && group <= match.length) {
  286. const fullMatch = match[0];
  287. const groupMatch = match[group];
  288. return makeSuccess(i + fullMatch.length, groupMatch);
  289. }
  290. const message = `invalid match group (0 to ${match.length}) in ${expected}`;
  291. return makeFailure(i, message);
  292. }
  293. return makeFailure(i, expected);
  294. });
  295. }
  296. export function succeed<A>(value: A) {
  297. return new MonadicParser((input, i) => makeSuccess(i, value));
  298. }
  299. export function fail(expected: string): MonadicParser<any> {
  300. return new MonadicParser((input, i) => makeFailure(i, expected));
  301. }
  302. export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> {
  303. if (isParser(x)) {
  304. return new MonadicParser((input, i) => {
  305. const result = x._(input, i);
  306. if (result.status) {
  307. result.index = i;
  308. result.value = null as any;
  309. }
  310. return result as any;
  311. });
  312. } else if (typeof x === 'string') {
  313. return lookahead(string(x));
  314. } else if (x instanceof RegExp) {
  315. return lookahead(regexp(x));
  316. }
  317. throw new Error('not a string, regexp, or parser: ' + x);
  318. }
  319. export function notFollowedBy<A>(parser: MonadicParser<A>): MonadicParser<null> {
  320. return new MonadicParser((input, i) => {
  321. const result = parser._(input, i);
  322. return result.status
  323. ? makeFailure(i, 'not "' + input.slice(i, result.index) + '"')
  324. : makeSuccess(i, null);
  325. });
  326. }
  327. export function test(predicate: (char: string) => boolean): MonadicParser<string> {
  328. return new MonadicParser((input, i) => {
  329. const char = input.charAt(i);
  330. if (i < input.length && predicate(char)) {
  331. return makeSuccess(i + 1, char);
  332. } else {
  333. return makeFailure(i, 'a character ' + predicate);
  334. }
  335. });
  336. }
  337. export function oneOf(str: string) {
  338. return test(ch => str.indexOf(ch) >= 0);
  339. }
  340. export function noneOf(str: string) {
  341. return test(ch => str.indexOf(ch) < 0);
  342. }
  343. export function range(begin: string, end: string) {
  344. return test(ch => begin <= ch && ch <= end).desc(begin + '-' + end);
  345. }
  346. export function takeWhile(predicate: (ch: string) => boolean) {
  347. return new MonadicParser((input, i) => {
  348. let j = i;
  349. while (j < input.length && predicate(input.charAt(j))) {
  350. j++;
  351. }
  352. return makeSuccess(j, input.slice(i, j));
  353. });
  354. }
  355. export function lazy<T>(f: () => MonadicParser<T>) {
  356. const parser = new MonadicParser((input, i) => {
  357. const a = f()._;
  358. parser._ = a;
  359. return a(input, i);
  360. });
  361. return parser;
  362. }
  363. export function empty() {
  364. return fail('empty');
  365. }
  366. export const index = new MonadicParser(function (input, i) {
  367. return makeSuccess(i, makeLineColumnIndex(input, i));
  368. });
  369. export const anyChar = new MonadicParser<string>((input, i) => {
  370. if (i >= input.length) {
  371. return makeFailure(i, 'any character');
  372. }
  373. return makeSuccess(i + 1, input.charAt(i));
  374. });
  375. export const all = new MonadicParser(function (input, i) {
  376. return makeSuccess(input.length, input.slice(i));
  377. });
  378. export const eof = new MonadicParser(function (input, i) {
  379. if (i < input.length) {
  380. return makeFailure(i, 'EOF');
  381. }
  382. return makeSuccess(i, null);
  383. });
  384. export const digit = regexp(/[0-9]/).desc('a digit');
  385. export const digits = regexp(/[0-9]*/).desc('optional digits');
  386. export const letter = regexp(/[a-z]/i).desc('a letter');
  387. export const letters = regexp(/[a-z]*/i).desc('optional letters');
  388. export const optWhitespace = regexp(/\s*/).desc('optional whitespace');
  389. export const whitespace = regexp(/\s+/).desc('whitespace');
  390. export const cr = string('\r');
  391. export const lf = string('\n');
  392. export const crlf = string('\r\n');
  393. export const newline = alt(crlf, lf, cr).desc('newline');
  394. export const end = alt(newline, eof);
  395. }
  396. function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> {
  397. const numParsers = parsers.length;
  398. return new MonadicParser<any[]>((input, index) => {
  399. let result: MonadicParser.Result<any> | undefined;
  400. let picked: any;
  401. let i = index;
  402. for (let j = 0; j < numParsers; j++) {
  403. result = mergeReplies(parsers[j]._(input, i), result);
  404. if (!result.status) {
  405. return result;
  406. }
  407. if (idx === j) picked = result.value;
  408. i = result.index;
  409. }
  410. return mergeReplies(makeSuccess(i, picked), result);
  411. });
  412. }
  413. function makeSuccess<T>(index: number, value: T): MonadicParser.Success<T> {
  414. return { status: true, index, value };
  415. }
  416. function makeFailure(index: number, expected: string): MonadicParser.Failure {
  417. return { status: false, furthest: index, expected: [expected] };
  418. }
  419. function mergeReplies<A, B>(result: MonadicParser.Result<A>, last?: MonadicParser.Result<B>): MonadicParser.Result<A> {
  420. if (!last || result.status || last.status || result.furthest > last.furthest) {
  421. return result;
  422. }
  423. const expected = result.furthest === last.furthest
  424. ? unsafeUnion(result.expected, last.expected)
  425. : last.expected;
  426. return { status: result.status, furthest: last.furthest, expected };
  427. }
  428. function makeLineColumnIndex(input: string, i: number): MonadicParser.Index {
  429. const lines = input.slice(0, i).split('\n');
  430. // Note that unlike the character offset, the line and column offsets are
  431. // 1-based.
  432. const lineWeAreUpTo = lines.length;
  433. const columnWeAreUpTo = lines[lines.length - 1].length + 1;
  434. return { offset: i, line: lineWeAreUpTo, column: columnWeAreUpTo };
  435. }
  436. function formatExpected(expected: string[]) {
  437. if (expected.length === 1) {
  438. return expected[0];
  439. }
  440. return 'one of ' + expected.join(', ');
  441. }
  442. function formatGot(input: string, error: MonadicParser.ParseFailure) {
  443. const index = error.index;
  444. const i = index.offset;
  445. if (i === input.length) {
  446. return ', got the end of the input';
  447. }
  448. const prefix = i > 0 ? '\'...' : '\'';
  449. const suffix = input.length - i > 12 ? '...\'' : '\'';
  450. return ` at line ${index.line} column ${index.column}, got ${prefix}${input.slice(i, i + 12)}${suffix}`;
  451. }
  452. function formatError(input: string, error: MonadicParser.ParseFailure) {
  453. return `expected ${formatExpected(error.expected)}${formatGot(input, error)}`;
  454. }
  455. function unsafeUnion(xs: string[], ys: string[]) {
  456. const xn = xs.length;
  457. const yn = ys.length;
  458. if (xn === 0) return ys;
  459. else if (yn === 0) return xs;
  460. const set = new Set<string>();
  461. const ret: string[] = [];
  462. for (let i = 0; i < xn; i++) {
  463. if (!set.has(xs[i])) {
  464. ret[ret.length] = xs[i];
  465. set.add(xs[i]);
  466. }
  467. }
  468. for (let i = 0; i < yn; i++) {
  469. if (!set.has(ys[i])) {
  470. ret[ret.length] = ys[i];
  471. set.add(ys[i]);
  472. }
  473. }
  474. ret.sort();
  475. return ret;
  476. }
  477. function isParser(obj: any): obj is MonadicParser<any> {
  478. return obj instanceof MonadicParser;
  479. }