parser.ts 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author David Sehnal <david.sehnal@gmail.com>
  6. */
  7. import { Column } from 'mol-data/db'
  8. import { Tokenizer } from '../common/text/tokenizer'
  9. import FixedColumn from '../common/text/column/fixed'
  10. import * as Schema from './schema'
  11. import { ReaderResult as Result } from '../result'
  12. import { Task, RuntimeContext } from 'mol-task'
  13. interface State {
  14. tokenizer: Tokenizer,
  15. header: Schema.GroHeader,
  16. numberOfAtoms: number,
  17. runtimeCtx: RuntimeContext
  18. }
  19. function createEmptyHeader(): Schema.GroHeader {
  20. return {
  21. title: '',
  22. timeInPs: 0,
  23. hasVelocities: false,
  24. precision: { position: 0, velocity: 0 },
  25. box: [0, 0, 0]
  26. };
  27. }
  28. function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext): State {
  29. return {
  30. tokenizer,
  31. header: createEmptyHeader(),
  32. numberOfAtoms: 0,
  33. runtimeCtx
  34. };
  35. }
  36. /**
  37. * title string (free format string, optional time in ps after 't=')
  38. */
  39. function handleTitleString(state: State) {
  40. const { tokenizer, header } = state;
  41. let line = Tokenizer.readLine(tokenizer);
  42. // skip potential empty lines...
  43. if (line.trim().length === 0) {
  44. line = Tokenizer.readLine(tokenizer);
  45. }
  46. const timeOffset = line.lastIndexOf('t=');
  47. if (timeOffset >= 0) {
  48. header.timeInPs = parseFloat(line.substring(timeOffset + 2));
  49. header.title = line.substring(0, timeOffset).trim();
  50. if (header.title && header.title[header.title.length - 1] === ',') {
  51. header.title = header.title.substring(0, header.title.length - 1);
  52. }
  53. } else {
  54. header.title = line;
  55. }
  56. }
  57. /**
  58. * number of atoms (free format integer)
  59. */
  60. function handleNumberOfAtoms(state: State) {
  61. const { tokenizer } = state;
  62. Tokenizer.markLine(tokenizer);
  63. const line = Tokenizer.getTokenString(tokenizer);
  64. state.numberOfAtoms = parseInt(line);
  65. }
  66. /**
  67. * This format is fixed, ie. all columns are in a fixed position.
  68. * Optionally (for now only yet with trjconv) you can write gro files
  69. * with any number of decimal places, the format will then be n+5
  70. * positions with n decimal places (n+1 for velocities) in stead
  71. * of 8 with 3 (with 4 for velocities). Upon reading, the precision
  72. * will be inferred from the distance between the decimal points
  73. * (which will be n+5). Columns contain the following information
  74. * (from left to right):
  75. * residue number (5 positions, integer)
  76. * residue name (5 characters)
  77. * atom name (5 characters)
  78. * atom number (5 positions, integer)
  79. * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
  80. * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
  81. */
  82. async function handleAtoms(state: State): Promise<Schema.GroAtoms> {
  83. const { tokenizer, numberOfAtoms } = state;
  84. const lines = await Tokenizer.readLinesAsync(tokenizer, numberOfAtoms, state.runtimeCtx, 100000);
  85. const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
  86. const precisions = positionSample.match(/\.\d+/g)!;
  87. const hasVelocities = precisions.length === 6;
  88. state.header.hasVelocities = hasVelocities;
  89. state.header.precision.position = precisions[0].length - 1;
  90. state.header.precision.velocity = hasVelocities ? precisions[3].length - 1 : 0;
  91. const pO = 20;
  92. const pW = state.header.precision.position + 5;
  93. const vO = pO + 3 * pW;
  94. const vW = state.header.precision.velocity + 4;
  95. const col = FixedColumn(lines);
  96. const undef = Column.Undefined(state.numberOfAtoms, Column.Schema.float);
  97. const ret = {
  98. count: state.numberOfAtoms,
  99. residueNumber: col(0, 5, Column.Schema.int),
  100. residueName: col(5, 5, Column.Schema.str),
  101. atomName: col(10, 5, Column.Schema.str),
  102. atomNumber: col(15, 5, Column.Schema.int),
  103. x: col(pO, pW, Column.Schema.float),
  104. y: col(pO + pW, pW, Column.Schema.float),
  105. z: col(pO + 2 * pW, pW, Column.Schema.float),
  106. vx: hasVelocities ? col(vO, vW, Column.Schema.float) : undef,
  107. vy: hasVelocities ? col(vO + vW, vW, Column.Schema.float) : undef,
  108. vz: hasVelocities ? col(vO + 2 * vW, vW, Column.Schema.float) : undef,
  109. };
  110. return ret;
  111. }
  112. /**
  113. * box vectors (free format, space separated reals), values:
  114. * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
  115. * the last 6 values may be omitted (they will be set to zero).
  116. * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
  117. */
  118. function handleBoxVectors(state: State) {
  119. const { tokenizer } = state;
  120. const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
  121. state.header.box = [+values[0], +values[1], +values[2]];
  122. }
  123. async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<Schema.GroFile>> {
  124. const tokenizer = Tokenizer(data);
  125. await ctx.update({ message: 'Parsing...', current: 0, max: data.length });
  126. const structures: Schema.GroStructure[] = [];
  127. while (tokenizer.position < data.length) {
  128. const state = State(tokenizer, ctx);
  129. handleTitleString(state);
  130. handleNumberOfAtoms(state);
  131. const atoms = await handleAtoms(state);
  132. handleBoxVectors(state);
  133. structures.push({ header: state.header, atoms });
  134. }
  135. const result: Schema.GroFile = { structures };
  136. return Result.success(result);
  137. }
  138. export function parse(data: string) {
  139. return Task.create<Result<Schema.GroFile>>('Parse GRO', async ctx => {
  140. return await parseInternal(data, ctx);
  141. });
  142. }
  143. export default parse;