monadic-parser.ts 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. /**
  7. * Adapted from Parsimmon (https://github.com/jneen/parsimmon)
  8. * Copyright (c) 2011-present J. Adkisson (http://jneen.net).
  9. */
  10. export class MonadicParser<A> {
  11. constructor(public _: MonadicParser.Action<A>) { }
  12. parse(input: string): MonadicParser.ParseResult<A> {
  13. const result = this.skip(MonadicParser.eof)._(input, 0);
  14. if (result.status) {
  15. return { success: true, value: result.value };
  16. }
  17. return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected };
  18. };
  19. tryParse(str: string) {
  20. const result = this.parse(str);
  21. if (result.success) {
  22. return result.value;
  23. } else {
  24. const msg = formatError(str, result);
  25. const err = new Error(msg);
  26. throw err;
  27. }
  28. }
  29. or<B>(alternative: MonadicParser<B>): MonadicParser<A | B> {
  30. return MonadicParser.alt(this, alternative);
  31. }
  32. trim<B>(parser: MonadicParser<B> | string): MonadicParser<A> {
  33. return this.wrap(parser, parser);
  34. }
  35. wrap<L, R>(leftParser: MonadicParser<L> | string, rightParser: MonadicParser<R> | string): MonadicParser<A> {
  36. return seqPick(1,
  37. typeof leftParser === 'string' ? MonadicParser.string(leftParser) : leftParser,
  38. this,
  39. typeof rightParser === 'string' ? MonadicParser.string(rightParser) : rightParser);
  40. }
  41. thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) {
  42. return wrapper(this);
  43. }
  44. then<B>(next: MonadicParser<B>): MonadicParser<B> {
  45. return seqPick(1, this, next);
  46. }
  47. many() {
  48. return new MonadicParser((input, i) => {
  49. const accum: A[] = [];
  50. let result: MonadicParser.Result<A> | undefined = void 0;
  51. while (true) {
  52. result = mergeReplies(this._(input, i), result);
  53. if (result.status) {
  54. if (i === result.index) {
  55. throw new Error('infinite loop detected in .many() parser --- calling .many() on a parser which can accept zero characters is usually the cause');
  56. }
  57. i = result.index;
  58. accum.push(result.value);
  59. } else {
  60. return mergeReplies(makeSuccess(i, accum), result);
  61. }
  62. }
  63. });
  64. };
  65. times(min: number, _max?: number): MonadicParser<A[]> {
  66. const max = typeof _max === 'undefined' ? min : _max;
  67. return new MonadicParser((input, i) => {
  68. const accum: A[] = [];
  69. let result: MonadicParser.Result<A> | undefined = void 0;
  70. let prevResult: MonadicParser.Result<A> | undefined = void 0;
  71. let times: number;
  72. for (times = 0; times < min; times++) {
  73. result = this._(input, i);
  74. prevResult = mergeReplies(result, prevResult);
  75. if (result.status) {
  76. i = result.index;
  77. accum.push(result.value);
  78. } else {
  79. return prevResult as any;
  80. }
  81. }
  82. for (; times < max; times += 1) {
  83. result = this._(input, i);
  84. prevResult = mergeReplies(result, prevResult);
  85. if (result.status) {
  86. i = result.index;
  87. accum.push(result.value);
  88. } else {
  89. break;
  90. }
  91. }
  92. return mergeReplies(makeSuccess(i, accum), prevResult);
  93. });
  94. };
  95. result<B>(res: B) {
  96. return this.map(() => res);
  97. };
  98. atMost(n: number) {
  99. return this.times(0, n);
  100. };
  101. atLeast(n: number) {
  102. return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]);
  103. };
  104. map<B>(f: (a: A) => B): MonadicParser<B> {
  105. return new MonadicParser((input, i) => {
  106. const result = this._(input, i);
  107. if (!result.status) {
  108. return result;
  109. }
  110. return mergeReplies(makeSuccess(result.index, f(result.value)), result);
  111. });
  112. }
  113. skip<B>(next: MonadicParser<B>): MonadicParser<A> {
  114. return seqPick(0, this, next);
  115. }
  116. mark(): MonadicParser<MonadicParser.Mark<A>> {
  117. return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ start: r[0], value: r[1], end: r[2] }));
  118. }
  119. node(name: string): MonadicParser<MonadicParser.Node<A>> {
  120. return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ name, start: r[0], value: r[1], end: r[2] }));
  121. };
  122. sepBy<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
  123. return MonadicParser.sepBy(this, separator);
  124. }
  125. sepBy1<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
  126. return MonadicParser.sepBy1(this, separator);
  127. }
  128. lookahead<B>(x: MonadicParser<B>) {
  129. return this.skip(MonadicParser.lookahead(x));
  130. };
  131. notFollowedBy<B>(x: MonadicParser<B>) {
  132. return this.skip(MonadicParser.notFollowedBy(x));
  133. };
  134. desc(expected: string) {
  135. return new MonadicParser((input, i) => {
  136. const reply = this._(input, i);
  137. if (!reply.status) {
  138. reply.expected = [expected];
  139. }
  140. return reply;
  141. });
  142. };
  143. fallback<B>(result: B) {
  144. return this.or(MonadicParser.succeed(result));
  145. };
  146. ap<B>(other: MonadicParser<(x: A) => B>): MonadicParser<B> {
  147. return MonadicParser.seq(other, this).map(([f, x]) => f(x));
  148. };
  149. chain<B>(f: (a: A) => MonadicParser<B>): MonadicParser<B> {
  150. return new MonadicParser<B>((input, i) => {
  151. const result = this._(input, i);
  152. if (!result.status) {
  153. return result as any;
  154. }
  155. const nextParser = f(result.value);
  156. return mergeReplies(nextParser._(input, result.index), result);
  157. });
  158. };
  159. }
  160. export namespace MonadicParser {
  161. export type Action<T> = (input: string, i: number) => MonadicParser.Result<T>
  162. export type ParseResult<T> = ParseSuccess<T> | ParseFailure;
  163. export interface Index {
  164. /** zero-based character offset */
  165. offset: number;
  166. /** one-based line offset */
  167. line: number;
  168. /** one-based column offset */
  169. column: number;
  170. }
  171. export interface ParseSuccess<T> {
  172. success: true,
  173. value: T
  174. }
  175. export interface ParseFailure {
  176. success: false,
  177. index: Index,
  178. expected: string[],
  179. }
  180. export interface Mark<T> {
  181. start: Index;
  182. end: Index;
  183. value: T;
  184. }
  185. export interface Node<T> extends Mark<T> {
  186. name: string
  187. }
  188. export interface Success<T> {
  189. status: true,
  190. value: T,
  191. index: number
  192. }
  193. export interface Failure {
  194. status: false,
  195. furthest: number,
  196. expected: string[]
  197. }
  198. export type Result<T> = Success<T> | Failure
  199. // export function createLanguage(parsers: any) {
  200. // const language: any = {};
  201. // for (const key of Object.keys(parsers)) {
  202. // (function (key) {
  203. // language[key] = lazy(() => parsers[key](language));
  204. // })(key);
  205. // }
  206. // return language;
  207. // }
  208. export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]>
  209. export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]>
  210. export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]>
  211. export function seq<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<[A, B, C, D]>
  212. export function seq<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<[A, B, C, D, E]>
  213. export function seq<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
  214. export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> {
  215. const numParsers = parsers.length;
  216. return new MonadicParser<any[]>((input, index) => {
  217. let result: MonadicParser.Result<any> | undefined;
  218. let accum = new Array(numParsers);
  219. let i = index;
  220. for (let j = 0; j < numParsers; j++) {
  221. result = mergeReplies(parsers[j]._(input, i), result);
  222. if (!result.status) {
  223. return result;
  224. }
  225. accum[j] = result.value;
  226. i = result.index;
  227. }
  228. return mergeReplies(makeSuccess(i, accum), result);
  229. });
  230. }
  231. export function alt<A>(a: MonadicParser<A>): MonadicParser<A>
  232. export function alt<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<A | B>
  233. export function alt<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<A | B | C>
  234. export function alt<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<A | B | C | D>
  235. export function alt<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<A | B | C | D | E>
  236. export function alt<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
  237. export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> {
  238. const numParsers = parsers.length;
  239. if (numParsers === 0) {
  240. return fail('zero alternates');
  241. }
  242. return new MonadicParser((input, i) => {
  243. let result: MonadicParser.Result<any> | undefined;
  244. for (let j = 0; j < parsers.length; j++) {
  245. result = mergeReplies(parsers[j]._(input, i), result);
  246. if (result.status) {
  247. return result;
  248. }
  249. }
  250. return result!;
  251. });
  252. }
  253. export function sepBy<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>): MonadicParser<A[]> {
  254. return sepBy1(parser, separator).or(succeed([]));
  255. }
  256. export function sepBy1<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>) {
  257. const pairs = separator.then(parser).many();
  258. return seq(parser, pairs).map(r => [r[0], ...r[1]]);
  259. }
  260. export function string(str: string) {
  261. const expected = `'${str}'`;
  262. if (str.length === 1) {
  263. const code = str.charCodeAt(0);
  264. return new MonadicParser((input, i) => input.charCodeAt(i) === code ? makeSuccess(i + 1, str) : makeFailure(i, expected));
  265. }
  266. return new MonadicParser((input, i) => {
  267. const j = i + str.length;
  268. if (input.slice(i, j) === str) return makeSuccess(j, str);
  269. else return makeFailure(i, expected);
  270. });
  271. }
  272. function flags(re: RegExp) {
  273. const s = '' + re;
  274. return s.slice(s.lastIndexOf('/') + 1);
  275. }
  276. function anchoredRegexp(re: RegExp) {
  277. return RegExp('^(?:' + re.source + ')', flags(re));
  278. }
  279. export function regexp(re: RegExp, groupNumber?: number) {
  280. const anchored = anchoredRegexp(re);
  281. const expected = '' + re;
  282. const group = groupNumber || 0;
  283. return new MonadicParser(function (input, i) {
  284. const match = anchored.exec(input.slice(i));
  285. if (match) {
  286. if (0 <= group && group <= match.length) {
  287. const fullMatch = match[0];
  288. const groupMatch = match[group];
  289. return makeSuccess(i + fullMatch.length, groupMatch);
  290. }
  291. const message = `invalid match group (0 to ${match.length}) in ${expected}`;
  292. return makeFailure(i, message);
  293. }
  294. return makeFailure(i, expected);
  295. });
  296. }
  297. export function succeed<A>(value: A) {
  298. return new MonadicParser((input, i) => makeSuccess(i, value));
  299. }
  300. export function fail(expected: string): MonadicParser<any> {
  301. return new MonadicParser((input, i) => makeFailure(i, expected));
  302. }
  303. export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> {
  304. if (isParser(x)) {
  305. return new MonadicParser((input, i) => {
  306. const result = x._(input, i);
  307. if (result.status) {
  308. result.index = i;
  309. result.value = null as any;
  310. }
  311. return result as any;
  312. });
  313. } else if (typeof x === 'string') {
  314. return lookahead(string(x));
  315. } else if (x instanceof RegExp) {
  316. return lookahead(regexp(x));
  317. }
  318. throw new Error('not a string, regexp, or parser: ' + x);
  319. }
  320. export function notFollowedBy<A>(parser: MonadicParser<A>): MonadicParser<null> {
  321. return new MonadicParser((input, i) => {
  322. const result = parser._(input, i);
  323. return result.status
  324. ? makeFailure(i, 'not "' + input.slice(i, result.index) + '"')
  325. : makeSuccess(i, null);
  326. });
  327. }
  328. export function test(predicate: (char: string) => boolean): MonadicParser<string> {
  329. return new MonadicParser((input, i) => {
  330. const char = input.charAt(i);
  331. if (i < input.length && predicate(char)) {
  332. return makeSuccess(i + 1, char);
  333. } else {
  334. return makeFailure(i, 'a character ' + predicate);
  335. }
  336. });
  337. }
  338. export function oneOf(str: string) {
  339. return test(ch => str.indexOf(ch) >= 0);
  340. }
  341. export function noneOf(str: string) {
  342. return test(ch => str.indexOf(ch) < 0);
  343. }
  344. export function range(begin: string, end: string) {
  345. return test(ch => begin <= ch && ch <= end).desc(begin + '-' + end);
  346. }
  347. export function takeWhile(predicate: (ch: string) => boolean) {
  348. return new MonadicParser((input, i) => {
  349. let j = i;
  350. while (j < input.length && predicate(input.charAt(j))) {
  351. j++;
  352. }
  353. return makeSuccess(j, input.slice(i, j));
  354. });
  355. }
  356. export function lazy<T>(f: () => MonadicParser<T>) {
  357. const parser = new MonadicParser((input, i) => {
  358. const a = f()._;
  359. parser._ = a;
  360. return a(input, i);
  361. });
  362. return parser;
  363. }
  364. export function empty() {
  365. return fail('empty');
  366. }
  367. export const index = new MonadicParser(function (input, i) {
  368. return makeSuccess(i, makeLineColumnIndex(input, i));
  369. });
  370. export const anyChar = new MonadicParser<string>((input, i) => {
  371. if (i >= input.length) {
  372. return makeFailure(i, 'any character');
  373. }
  374. return makeSuccess(i + 1, input.charAt(i));
  375. });
  376. export const all = new MonadicParser(function (input, i) {
  377. return makeSuccess(input.length, input.slice(i));
  378. });
  379. export const eof = new MonadicParser(function (input, i) {
  380. if (i < input.length) {
  381. return makeFailure(i, 'EOF');
  382. }
  383. return makeSuccess(i, null);
  384. });
  385. export const digit = regexp(/[0-9]/).desc('a digit');
  386. export const digits = regexp(/[0-9]*/).desc('optional digits');
  387. export const letter = regexp(/[a-z]/i).desc('a letter');
  388. export const letters = regexp(/[a-z]*/i).desc('optional letters');
  389. export const optWhitespace = regexp(/\s*/).desc('optional whitespace');
  390. export const whitespace = regexp(/\s+/).desc('whitespace');
  391. export const cr = string('\r');
  392. export const lf = string('\n');
  393. export const crlf = string('\r\n');
  394. export const newline = alt(crlf, lf, cr).desc('newline');
  395. export const end = alt(newline, eof);
  396. }
  397. function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> {
  398. const numParsers = parsers.length;
  399. return new MonadicParser<any[]>((input, index) => {
  400. let result: MonadicParser.Result<any> | undefined;
  401. let picked: any;
  402. let i = index;
  403. for (let j = 0; j < numParsers; j++) {
  404. result = mergeReplies(parsers[j]._(input, i), result);
  405. if (!result.status) {
  406. return result;
  407. }
  408. if (idx === j) picked = result.value;
  409. i = result.index;
  410. }
  411. return mergeReplies(makeSuccess(i, picked), result);
  412. });
  413. }
  414. function makeSuccess<T>(index: number, value: T): MonadicParser.Success<T> {
  415. return { status: true, index, value };
  416. }
  417. function makeFailure(index: number, expected: string): MonadicParser.Failure {
  418. return { status: false, furthest: index, expected: [expected] };
  419. }
  420. function mergeReplies<A, B>(result: MonadicParser.Result<A>, last?: MonadicParser.Result<B>): MonadicParser.Result<A> {
  421. if (!last || result.status || last.status || result.furthest > last.furthest) {
  422. return result;
  423. }
  424. const expected = result.furthest === last.furthest
  425. ? unsafeUnion(result.expected, last.expected)
  426. : last.expected;
  427. return { status: result.status, furthest: last.furthest, expected };
  428. }
  429. function makeLineColumnIndex(input: string, i: number): MonadicParser.Index {
  430. const lines = input.slice(0, i).split('\n');
  431. // Note that unlike the character offset, the line and column offsets are
  432. // 1-based.
  433. const lineWeAreUpTo = lines.length;
  434. const columnWeAreUpTo = lines[lines.length - 1].length + 1;
  435. return { offset: i, line: lineWeAreUpTo, column: columnWeAreUpTo };
  436. }
  437. function formatExpected(expected: string[]) {
  438. if (expected.length === 1) {
  439. return expected[0];
  440. }
  441. return 'one of ' + expected.join(', ');
  442. }
  443. function formatGot(input: string, error: MonadicParser.ParseFailure) {
  444. const index = error.index;
  445. const i = index.offset;
  446. if (i === input.length) {
  447. return ', got the end of the input';
  448. }
  449. const prefix = i > 0 ? '\'...' : '\'';
  450. const suffix = input.length - i > 12 ? '...\'' : '\'';
  451. return ` at line ${index.line} column ${index.column}, got ${prefix}${input.slice(i, i + 12)}${suffix}`;
  452. }
  453. function formatError(input: string, error: MonadicParser.ParseFailure) {
  454. return `expected ${formatExpected(error.expected)}${formatGot(input, error)}`;
  455. }
  456. function unsafeUnion(xs: string[], ys: string[]) {
  457. const xn = xs.length;
  458. const yn = ys.length;
  459. if (xn === 0) return ys;
  460. else if (yn === 0) return xs;
  461. const set = new Set<string>();
  462. const ret: string[] = [];
  463. for (let i = 0; i < xn; i++) {
  464. if (!set.has(xs[i])) {
  465. ret[ret.length] = xs[i];
  466. set.add(xs[i]);
  467. }
  468. }
  469. for (let i = 0; i < yn; i++) {
  470. if (!set.has(ys[i])) {
  471. ret[ret.length] = ys[i];
  472. set.add(ys[i]);
  473. }
  474. }
  475. ret.sort();
  476. return ret;
  477. }
  478. function isParser(obj: any): obj is MonadicParser<any> {
  479. return obj instanceof MonadicParser;
  480. }