parser.ts 9.6 KB


  1. /**
  2. * Copyright (c) 2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { Task, RuntimeContext } from '../../../mol-task';
  7. import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
  8. import { ReaderResult as Result } from '../result';
  9. import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
  10. import { Column, Table } from '../../../mol-data/db';
  11. import { Mutable } from '../../../mol-util/type-helpers';
  12. // https://manual.gromacs.org/2021-current/reference-manual/file-formats.html#top
  13. const AtomsSchema = {
  14. nr: Column.Schema.Int(),
  15. type: Column.Schema.Str(),
  16. resnr: Column.Schema.Int(),
  17. residu: Column.Schema.Str(),
  18. atom: Column.Schema.Str(),
  19. cgnr: Column.Schema.Int(),
  20. charge: Column.Schema.Float(),
  21. mass: Column.Schema.Float(),
  22. };
  23. const BondsSchema = {
  24. ai: Column.Schema.Int(),
  25. aj: Column.Schema.Int(),
  26. };
  27. const MoleculesSchema = {
  28. compound: Column.Schema.Str(),
  29. molCount: Column.Schema.Int(),
  30. };
  31. type Compound = {
  32. atoms: Table<typeof AtomsSchema>
  33. bonds?: Table<typeof BondsSchema>
  34. }
  35. export interface TopFile {
  36. readonly system: string
  37. readonly molecules: Table<typeof MoleculesSchema>
  38. readonly compounds: Record<string, Compound>
  39. }
  40. const { readLine, markLine, skipWhitespace, markStart, eatValue, eatLine } = Tokenizer;
  41. function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext) {
  42. return {
  43. tokenizer,
  44. runtimeCtx,
  45. };
  46. }
  47. type State = ReturnType<typeof State>
  48. const reField = /\[ (.+) \]/;
  49. const reWhitespace = /\s+/;
  50. function handleMoleculetype(state: State) {
  51. const { tokenizer } = state;
  52. let molName: string | undefined = undefined;
  53. while (tokenizer.tokenEnd < tokenizer.length) {
  54. skipWhitespace(tokenizer);
  55. const c = tokenizer.data[tokenizer.position];
  56. if (c === '[') break;
  57. if (c === ';' || c === '*') {
  58. markLine(tokenizer);
  59. continue;
  60. }
  61. if (molName !== undefined) throw new Error('more than one molName');
  62. const line = readLine(tokenizer);
  63. molName = line.split(reWhitespace)[0];
  64. }
  65. if (molName === undefined) throw new Error('missing molName');
  66. return molName;
  67. }
  68. function handleAtoms(state: State) {
  69. const { tokenizer } = state;
  70. const nr = TokenBuilder.create(tokenizer.data, 64);
  71. const type = TokenBuilder.create(tokenizer.data, 64);
  72. const resnr = TokenBuilder.create(tokenizer.data, 64);
  73. const residu = TokenBuilder.create(tokenizer.data, 64);
  74. const atom = TokenBuilder.create(tokenizer.data, 64);
  75. const cgnr = TokenBuilder.create(tokenizer.data, 64);
  76. const charge = TokenBuilder.create(tokenizer.data, 64);
  77. const mass = TokenBuilder.create(tokenizer.data, 64);
  78. while (tokenizer.tokenEnd < tokenizer.length) {
  79. skipWhitespace(tokenizer);
  80. const c = tokenizer.data[tokenizer.position];
  81. if (c === '[') break;
  82. if (c === ';' || c === '*') {
  83. markLine(tokenizer);
  84. continue;
  85. }
  86. for (let j = 0; j < 8; ++j) {
  87. skipWhitespace(tokenizer);
  88. markStart(tokenizer);
  89. eatValue(tokenizer);
  90. switch (j) {
  91. case 0: TokenBuilder.add(nr, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  92. case 1: TokenBuilder.add(type, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  93. case 2: TokenBuilder.add(resnr, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  94. case 3: TokenBuilder.add(residu, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  95. case 4: TokenBuilder.add(atom, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  96. case 5: TokenBuilder.add(cgnr, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  97. case 6: TokenBuilder.add(charge, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  98. case 7: TokenBuilder.add(mass, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  99. }
  100. }
  101. // ignore any extra columns
  102. markLine(tokenizer);
  103. }
  104. return Table.ofColumns(AtomsSchema, {
  105. nr: TokenColumn(nr)(Column.Schema.int),
  106. type: TokenColumn(type)(Column.Schema.str),
  107. resnr: TokenColumn(resnr)(Column.Schema.int),
  108. residu: TokenColumn(residu)(Column.Schema.str),
  109. atom: TokenColumn(atom)(Column.Schema.str),
  110. cgnr: TokenColumn(cgnr)(Column.Schema.int),
  111. charge: TokenColumn(charge)(Column.Schema.float),
  112. mass: TokenColumn(mass)(Column.Schema.float),
  113. });
  114. }
  115. function handleBonds(state: State) {
  116. const { tokenizer } = state;
  117. const ai = TokenBuilder.create(tokenizer.data, 64);
  118. const aj = TokenBuilder.create(tokenizer.data, 64);
  119. while (tokenizer.tokenEnd < tokenizer.length) {
  120. skipWhitespace(tokenizer);
  121. const c = tokenizer.data[tokenizer.position];
  122. if (c === '[') break;
  123. if (c === ';' || c === '*') {
  124. markLine(tokenizer);
  125. continue;
  126. }
  127. for (let j = 0; j < 2; ++j) {
  128. skipWhitespace(tokenizer);
  129. markStart(tokenizer);
  130. eatValue(tokenizer);
  131. switch (j) {
  132. case 0: TokenBuilder.add(ai, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  133. case 1: TokenBuilder.add(aj, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  134. }
  135. }
  136. // ignore any extra columns
  137. markLine(tokenizer);
  138. }
  139. return Table.ofColumns(BondsSchema, {
  140. ai: TokenColumn(ai)(Column.Schema.int),
  141. aj: TokenColumn(aj)(Column.Schema.int),
  142. });
  143. }
  144. function handleSystem(state: State) {
  145. const { tokenizer } = state;
  146. let system: string | undefined = undefined;
  147. while (tokenizer.tokenEnd < tokenizer.length) {
  148. skipWhitespace(tokenizer);
  149. const c = tokenizer.data[tokenizer.position];
  150. if (c === '[') break;
  151. if (c === ';' || c === '*') {
  152. markLine(tokenizer);
  153. continue;
  154. }
  155. if (system !== undefined) throw new Error('more than one system');
  156. system = readLine(tokenizer).trim();
  157. }
  158. if (system === undefined) throw new Error('missing system');
  159. return system;
  160. }
  161. function handleMolecules(state: State) {
  162. const { tokenizer } = state;
  163. const compound = TokenBuilder.create(tokenizer.data, 64);
  164. const molCount = TokenBuilder.create(tokenizer.data, 64);
  165. while (tokenizer.tokenEnd < tokenizer.length) {
  166. skipWhitespace(tokenizer);
  167. if (tokenizer.position >= tokenizer.length) break;
  168. const c = tokenizer.data[tokenizer.position];
  169. if (c === '[') break;
  170. if (c === ';' || c === '*') {
  171. markLine(tokenizer);
  172. continue;
  173. }
  174. for (let j = 0; j < 2; ++j) {
  175. skipWhitespace(tokenizer);
  176. markStart(tokenizer);
  177. eatValue(tokenizer);
  178. switch (j) {
  179. case 0: TokenBuilder.add(compound, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  180. case 1: TokenBuilder.add(molCount, tokenizer.tokenStart, tokenizer.tokenEnd); break;
  181. }
  182. }
  183. // ignore any extra columns
  184. eatLine(tokenizer);
  185. markStart(tokenizer);
  186. }
  187. return Table.ofColumns(MoleculesSchema, {
  188. compound: TokenColumn(compound)(Column.Schema.str),
  189. molCount: TokenColumn(molCount)(Column.Schema.int),
  190. });
  191. }
  192. async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<TopFile>> {
  193. const t = Tokenizer(data);
  194. const state = State(t, ctx);
  195. const result: Mutable<TopFile> = Object.create(null);
  196. let prevPosition = 0;
  197. result.compounds = {};
  198. let currentCompound: Partial<Compound> = {};
  199. let currentMolName = '';
  200. function addMol() {
  201. if (currentMolName && currentCompound.atoms) {
  202. result.compounds[currentMolName] = currentCompound as Compound;
  203. currentCompound = {};
  204. currentMolName = '';
  205. }
  206. }
  207. while (t.tokenEnd < t.length) {
  208. if (t.position - prevPosition > 100000 && ctx.shouldUpdate) {
  209. prevPosition = t.position;
  210. await ctx.update({ current: t.position, max: t.length });
  211. }
  212. const line = readLine(state.tokenizer).trim();
  213. if (!line || line[0] === '*' || line[0] === ';') {
  214. continue;
  215. }
  216. if (line.startsWith('#include')) {
  217. throw new Error('#include statements not allowed');
  218. }
  219. if (line.startsWith('[')) {
  220. const fieldMatch = line.match(reField);
  221. if (fieldMatch === null) throw new Error('expected field name');
  222. const fieldName = fieldMatch[1];
  223. if (fieldName === 'moleculetype') {
  224. addMol();
  225. currentMolName = handleMoleculetype(state);
  226. } else if (fieldName === 'atoms') {
  227. currentCompound.atoms = handleAtoms(state);
  228. } else if (fieldName === 'bonds') {
  229. currentCompound.bonds = handleBonds(state);
  230. } else if (fieldName === 'system') {
  231. result.system = handleSystem(state);
  232. } else if (fieldName === 'molecules') {
  233. addMol(); // add the last compound
  234. result.molecules = handleMolecules(state);
  235. } else {
  236. while (t.tokenEnd < t.length) {
  237. if (t.data[t.position] === '[') break;
  238. markLine(t);
  239. }
  240. }
  241. }
  242. }
  243. return Result.success(result);
  244. }
  245. export function parseTop(data: string) {
  246. return Task.create<Result<TopFile>>('Parse TOP', async ctx => {
  247. return await parseInternal(data, ctx);
  248. });
  249. }