parser.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /**
  2. * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { Task, RuntimeContext, chunkedSubtask } from '../../../mol-task'
  7. import { Tokenizer, TokenBuilder } from '../common/text/tokenizer'
  8. import { ReaderResult as Result } from '../result'
  9. import TokenColumn from '../common/text/column/token';
  10. import { Column } from '../../../mol-data/db';
  11. // http://www.ks.uiuc.edu/Training/Tutorials/namd/namd-tutorial-unix-html/node23.html
  12. export interface PsfFile {
  13. readonly id: string
  14. readonly title: string[]
  15. readonly atoms: {
  16. readonly count: number
  17. readonly atomId: Column<number>
  18. readonly segmentName: Column<string>
  19. readonly residueId: Column<number>
  20. readonly residueName: Column<string>
  21. readonly atomName: Column<string>
  22. readonly atomType: Column<string>
  23. readonly charge: Column<number>
  24. readonly mass: Column<number>
  25. }
  26. readonly bonds: {
  27. readonly count: number
  28. readonly atomIdA: Column<number>
  29. readonly atomIdB: Column<number>
  30. }
  31. }
  32. const { readLine, skipWhitespace, eatValue, eatLine, markStart } = Tokenizer;
  33. const reWhitespace = /\s+/
  34. const reTitle = /(^\*|REMARK)*/
  35. function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext) {
  36. return {
  37. tokenizer,
  38. runtimeCtx,
  39. }
  40. }
  41. type State = ReturnType<typeof State>
  42. async function handleAtoms(state: State, count: number): Promise<PsfFile['atoms']> {
  43. const { tokenizer } = state
  44. const atomId = TokenBuilder.create(tokenizer.data, count * 2)
  45. const segmentName = TokenBuilder.create(tokenizer.data, count * 2)
  46. const residueId = TokenBuilder.create(tokenizer.data, count * 2)
  47. const residueName = TokenBuilder.create(tokenizer.data, count * 2)
  48. const atomName = TokenBuilder.create(tokenizer.data, count * 2)
  49. const atomType = TokenBuilder.create(tokenizer.data, count * 2)
  50. const charge = TokenBuilder.create(tokenizer.data, count * 2)
  51. const mass = TokenBuilder.create(tokenizer.data, count * 2)
  52. const { length } = tokenizer
  53. let linesAlreadyRead = 0
  54. await chunkedSubtask(state.runtimeCtx, 10, void 0, chunkSize => {
  55. const linesToRead = Math.min(count - linesAlreadyRead, chunkSize)
  56. for (let i = 0; i < linesToRead; ++i) {
  57. for (let j = 0; j < 8; ++j) {
  58. skipWhitespace(tokenizer)
  59. markStart(tokenizer)
  60. eatValue(tokenizer)
  61. switch (j) {
  62. case 0: TokenBuilder.addUnchecked(atomId, tokenizer.tokenStart, tokenizer.tokenEnd); break
  63. case 1: TokenBuilder.addUnchecked(segmentName, tokenizer.tokenStart, tokenizer.tokenEnd); break
  64. case 2: TokenBuilder.addUnchecked(residueId, tokenizer.tokenStart, tokenizer.tokenEnd); break
  65. case 3: TokenBuilder.addUnchecked(residueName, tokenizer.tokenStart, tokenizer.tokenEnd); break
  66. case 4: TokenBuilder.addUnchecked(atomName, tokenizer.tokenStart, tokenizer.tokenEnd); break
  67. case 5: TokenBuilder.addUnchecked(atomType, tokenizer.tokenStart, tokenizer.tokenEnd); break
  68. case 6: TokenBuilder.addUnchecked(charge, tokenizer.tokenStart, tokenizer.tokenEnd); break
  69. case 7: TokenBuilder.addUnchecked(mass, tokenizer.tokenStart, tokenizer.tokenEnd); break
  70. }
  71. }
  72. // ignore any extra columns
  73. eatLine(tokenizer)
  74. markStart(tokenizer)
  75. }
  76. linesAlreadyRead += linesToRead
  77. return linesToRead
  78. }, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }))
  79. return {
  80. count,
  81. atomId: TokenColumn(atomId)(Column.Schema.int),
  82. segmentName: TokenColumn(segmentName)(Column.Schema.str),
  83. residueId: TokenColumn(residueId)(Column.Schema.int),
  84. residueName: TokenColumn(residueName)(Column.Schema.str),
  85. atomName: TokenColumn(atomName)(Column.Schema.str),
  86. atomType: TokenColumn(atomType)(Column.Schema.str),
  87. charge: TokenColumn(charge)(Column.Schema.float),
  88. mass: TokenColumn(mass)(Column.Schema.float)
  89. }
  90. }
  91. async function handleBonds(state: State, count: number): Promise<PsfFile['bonds']> {
  92. const { tokenizer } = state
  93. const atomIdA = TokenBuilder.create(tokenizer.data, count * 2)
  94. const atomIdB = TokenBuilder.create(tokenizer.data, count * 2)
  95. const { length } = tokenizer
  96. let bondsAlreadyRead = 0
  97. await chunkedSubtask(state.runtimeCtx, 10, void 0, chunkSize => {
  98. const bondsToRead = Math.min(count - bondsAlreadyRead, chunkSize)
  99. for (let i = 0; i < bondsToRead; ++i) {
  100. for (let j = 0; j < 2; ++j) {
  101. skipWhitespace(tokenizer)
  102. markStart(tokenizer)
  103. eatValue(tokenizer)
  104. switch (j) {
  105. case 0: TokenBuilder.addUnchecked(atomIdA, tokenizer.tokenStart, tokenizer.tokenEnd); break
  106. case 1: TokenBuilder.addUnchecked(atomIdB, tokenizer.tokenStart, tokenizer.tokenEnd); break
  107. }
  108. }
  109. }
  110. bondsAlreadyRead += bondsToRead
  111. return bondsToRead
  112. }, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }))
  113. return {
  114. count,
  115. atomIdA: TokenColumn(atomIdA)(Column.Schema.int),
  116. atomIdB: TokenColumn(atomIdB)(Column.Schema.int),
  117. }
  118. }
  119. function parseTitle(state: State, count: number) {
  120. const title: string[] = []
  121. for (let i = 0; i < count; ++i) {
  122. const line = readLine(state.tokenizer)
  123. title.push(line.replace(reTitle, '').trim())
  124. }
  125. return title
  126. }
  127. async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<PsfFile>> {
  128. const tokenizer = Tokenizer(data);
  129. const state = State(tokenizer, ctx);
  130. let title = undefined as string[] | undefined
  131. let atoms = undefined as PsfFile['atoms'] | undefined
  132. let bonds = undefined as PsfFile['bonds'] | undefined
  133. const id = readLine(state.tokenizer).trim()
  134. while(tokenizer.tokenEnd < tokenizer.length) {
  135. const line = readLine(state.tokenizer).trim()
  136. if (line.includes('!NTITLE')) {
  137. const numTitle = parseInt(line.split(reWhitespace)[0])
  138. title = parseTitle(state, numTitle)
  139. } else if (line.includes('!NATOM')) {
  140. const numAtoms = parseInt(line.split(reWhitespace)[0])
  141. atoms = await handleAtoms(state, numAtoms)
  142. } else if (line.includes('!NBOND')) {
  143. const numBonds = parseInt(line.split(reWhitespace)[0])
  144. bonds = await handleBonds(state, numBonds)
  145. break // TODO: don't break when the below are implemented
  146. } else if (line.includes('!NTHETA')) {
  147. // TODO
  148. } else if (line.includes('!NPHI')) {
  149. // TODO
  150. } else if (line.includes('!NIMPHI')) {
  151. // TODO
  152. } else if (line.includes('!NDON')) {
  153. // TODO
  154. } else if (line.includes('!NACC')) {
  155. // TODO
  156. } else if (line.includes('!NNB')) {
  157. // TODO
  158. } else if (line.includes('!NGRP NST2')) {
  159. // TODO
  160. } else if (line.includes('!MOLNT')) {
  161. // TODO
  162. } else if (line.includes('!NUMLP NUMLPH')) {
  163. // TODO
  164. } else if (line.includes('!NCRTERM')) {
  165. // TODO
  166. }
  167. }
  168. if (title === undefined) {
  169. title = []
  170. }
  171. if (atoms === undefined) {
  172. return Result.error('no atoms data')
  173. }
  174. if (bonds === undefined) {
  175. return Result.error('no bonds data')
  176. }
  177. const result: PsfFile = {
  178. id,
  179. title,
  180. atoms,
  181. bonds
  182. }
  183. return Result.success(result);
  184. }
  185. export function parsePsf(data: string) {
  186. return Task.create<Result<PsfFile>>('Parse PSF', async ctx => {
  187. return await parseInternal(data, ctx)
  188. });
  189. }