parser.ts 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*
  2. * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author David Sehnal <david.sehnal@gmail.com>
  6. */
  7. import Tokenizer from '../common/text/tokenizer'
  8. import FixedColumn from '../common/text/column/fixed'
  9. import { ColumnType, UndefinedColumn } from '../common/column'
  10. import * as Schema from './schema'
  11. import Result from '../result'
  12. interface State {
  13. tokenizer: Tokenizer,
  14. header: Schema.Header,
  15. numberOfAtoms: number,
  16. }
  17. function createEmptyHeader(): Schema.Header {
  18. return {
  19. title: '',
  20. timeInPs: 0,
  21. hasVelocities: false,
  22. precision: { position: 0, velocity: 0 },
  23. box: [0, 0, 0]
  24. };
  25. }
  26. function State(tokenizer: Tokenizer): State {
  27. return {
  28. tokenizer,
  29. header: createEmptyHeader(),
  30. numberOfAtoms: 0
  31. };
  32. }
  33. /**
  34. * title string (free format string, optional time in ps after 't=')
  35. */
  36. function handleTitleString(state: State) {
  37. const { tokenizer, header } = state;
  38. let line = Tokenizer.readLine(tokenizer);
  39. // skip potential empty lines...
  40. if (line.trim().length === 0) {
  41. line = Tokenizer.readLine(tokenizer);
  42. }
  43. const timeOffset = line.lastIndexOf('t=');
  44. if (timeOffset >= 0) {
  45. header.timeInPs = parseFloat(line.substring(timeOffset + 2));
  46. header.title = line.substring(0, timeOffset).trim();
  47. if (header.title && header.title[header.title.length - 1] === ',') {
  48. header.title = header.title.substring(0, header.title.length - 1);
  49. }
  50. } else {
  51. header.title = line;
  52. }
  53. }
  54. /**
  55. * number of atoms (free format integer)
  56. */
  57. function handleNumberOfAtoms(state: State) {
  58. const { tokenizer } = state;
  59. Tokenizer.markLine(tokenizer);
  60. const line = Tokenizer.getTokenString(tokenizer);
  61. state.numberOfAtoms = parseInt(line);
  62. }
  63. /**
  64. * This format is fixed, ie. all columns are in a fixed position.
  65. * Optionally (for now only yet with trjconv) you can write gro files
  66. * with any number of decimal places, the format will then be n+5
  67. * positions with n decimal places (n+1 for velocities) in stead
  68. * of 8 with 3 (with 4 for velocities). Upon reading, the precision
  69. * will be inferred from the distance between the decimal points
  70. * (which will be n+5). Columns contain the following information
  71. * (from left to right):
  72. * residue number (5 positions, integer)
  73. * residue name (5 characters)
  74. * atom name (5 characters)
  75. * atom number (5 positions, integer)
  76. * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
  77. * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
  78. */
  79. function handleAtoms(state: State): Schema.Atoms {
  80. const { tokenizer, numberOfAtoms } = state;
  81. const lines = Tokenizer.readLines(tokenizer, numberOfAtoms);
  82. const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
  83. const precisions = positionSample.match(/\.\d+/g)!;
  84. const hasVelocities = precisions.length === 6;
  85. state.header.hasVelocities = hasVelocities;
  86. state.header.precision.position = precisions[0].length - 1;
  87. state.header.precision.velocity = hasVelocities ? precisions[3].length - 1 : 0;
  88. const pO = 20;
  89. const pW = state.header.precision.position + 5;
  90. const vO = pO + 3 * pW;
  91. const vW = state.header.precision.velocity + 4;
  92. const col = FixedColumn(lines);
  93. const undef = UndefinedColumn(state.numberOfAtoms, ColumnType.float);
  94. const ret = {
  95. count: state.numberOfAtoms,
  96. residueNumber: col(0, 5, ColumnType.int),
  97. residueName: col(5, 5, ColumnType.pooledStr),
  98. atomName: col(10, 5, ColumnType.pooledStr),
  99. atomNumber: col(15, 5, ColumnType.int),
  100. x: col(pO, pW, ColumnType.float),
  101. y: col(pO + pW, pW, ColumnType.float),
  102. z: col(pO + 2 * pW, pW, ColumnType.float),
  103. vx: hasVelocities ? col(vO, vW, ColumnType.float) : undef,
  104. vy: hasVelocities ? col(vO + vW, vW, ColumnType.float) : undef,
  105. vz: hasVelocities ? col(vO + 2 * vW, vW, ColumnType.float) : undef,
  106. };
  107. return ret;
  108. }
  109. /**
  110. * box vectors (free format, space separated reals), values:
  111. * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
  112. * the last 6 values may be omitted (they will be set to zero).
  113. * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
  114. */
  115. function handleBoxVectors(state: State) {
  116. const { tokenizer } = state;
  117. const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
  118. state.header.box = [+values[0], +values[1], +values[2]];
  119. }
  120. function parseInternal(data: string): Result<Schema.File> {
  121. const tokenizer = Tokenizer(data);
  122. const structures: Schema.Structure[] = [];
  123. while (tokenizer.position < data.length) {
  124. const state = State(tokenizer);
  125. handleTitleString(state);
  126. handleNumberOfAtoms(state);
  127. const atoms = handleAtoms(state);
  128. handleBoxVectors(state);
  129. structures.push({ header: state.header, atoms });
  130. }
  131. const result: Schema.File = { structures };
  132. return Result.success(result);
  133. }
  134. export function parse(data: string) {
  135. return parseInternal(data);
  136. }
  137. export default parse;