parser.ts 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /**
  2. * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * Adapted from NGL.
  5. *
  6. * @author David Sehnal <david.sehnal@gmail.com>
  7. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  8. */
  9. import { Vec3 } from '../../../mol-math/linear-algebra';
  10. import { Tokenizer } from '../common/text/tokenizer';
  11. import { Column } from '../../../mol-data/db';
  12. import { Task, chunkedSubtask, RuntimeContext } from '../../../mol-task';
  13. import { ReaderResult as Result } from '../result';
  14. import { parseFloat as fastParseFloat } from '../common/text/number-parser';
  15. // https://h5cube-spec.readthedocs.io/en/latest/cubeformat.html
  16. export interface CubeFile {
  17. name: string,
  18. header: CubeFile.Header,
  19. atoms: CubeFile.Atoms,
  20. values: Float64Array
  21. }
  22. export namespace CubeFile {
  23. export interface Header {
  24. orbitals: boolean,
  25. comment1: string,
  26. comment2: string,
  27. atomCount: number,
  28. origin: Vec3,
  29. dim: Vec3,
  30. basisX: Vec3,
  31. basisY: Vec3,
  32. basisZ: Vec3,
  33. dataSetIds: number[]
  34. }
  35. export interface Atoms {
  36. count: number,
  37. number: Column<number>,
  38. nuclearCharge: Column<number>,
  39. x: Column<number>,
  40. y: Column<number>,
  41. z: Column<number>
  42. }
  43. }
  44. const bohrToAngstromFactor = 0.529177210859;
  45. function readHeader(tokenizer: Tokenizer) {
  46. const headerLines = Tokenizer.readLines(tokenizer, 6);
  47. const h = (k: number, l: number) => {
  48. const field = +headerLines[k].trim().split(/\s+/g)[l];
  49. return Number.isNaN(field) ? 0 : field;
  50. };
  51. const basis = (i: number) => {
  52. const n = h(i + 2, 0);
  53. const s = bohrToAngstromFactor;
  54. return [Math.abs(n), Vec3.create(h(i + 2, 1) * s, h(i + 2, 2) * s, h(i + 2, 3) * s), n] as const;
  55. };
  56. const comment1 = headerLines[0].trim();
  57. const comment2 = headerLines[1].trim();
  58. const [atomCount, origin, rawAtomCount] = basis(0);
  59. const [NVX, basisX] = basis(1);
  60. const [NVY, basisY] = basis(2);
  61. const [NVZ, basisZ] = basis(3);
  62. const atoms = readAtoms(tokenizer, atomCount, bohrToAngstromFactor);
  63. const dataSetIds: number[] = [];
  64. if (rawAtomCount >= 0) {
  65. let nVal = h(2, 4);
  66. if (nVal === 0) nVal = 1;
  67. for (let i = 0; i < nVal; i++) dataSetIds.push(i);
  68. } else {
  69. const counts = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
  70. for (let i = 0, _i = +counts[0]; i < _i; i++) dataSetIds.push(+counts[i + 1]);
  71. }
  72. const header: CubeFile.Header = { orbitals: rawAtomCount < 0, comment1, comment2, atomCount, origin, dim: Vec3.create(NVX, NVY, NVZ), basisX, basisY, basisZ, dataSetIds };
  73. return { header, atoms };
  74. }
  75. function readAtoms(tokenizer: Tokenizer, count: number, scaleFactor: number): CubeFile.Atoms {
  76. const number = new Int32Array(count);
  77. const value = new Float64Array(count);
  78. const x = new Float32Array(count);
  79. const y = new Float32Array(count);
  80. const z = new Float32Array(count);
  81. for (let i = 0; i < count; i++) {
  82. const fields = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
  83. number[i] = +fields[0];
  84. value[i] = +fields[1];
  85. x[i] = +fields[2] * scaleFactor;
  86. y[i] = +fields[3] * scaleFactor;
  87. z[i] = +fields[4] * scaleFactor;
  88. }
  89. return {
  90. count,
  91. number: Column.ofArray({ array: number, schema: Column.Schema.int }),
  92. nuclearCharge: Column.ofArray({ array: value, schema: Column.Schema.float }),
  93. x: Column.ofArray({ array: x, schema: Column.Schema.float }),
  94. y: Column.ofArray({ array: y, schema: Column.Schema.float }),
  95. z: Column.ofArray({ array: z, schema: Column.Schema.float })
  96. };
  97. }
  98. function readValues(ctx: RuntimeContext, tokenizer: Tokenizer, header: CubeFile.Header) {
  99. const N = header.dim[0] * header.dim[1] * header.dim[2] * header.dataSetIds.length;
  100. const chunkSize = 100 * 100 * 100;
  101. const data = new Float64Array(N);
  102. let offset = 0;
  103. return chunkedSubtask(ctx, chunkSize, data, (count, data) => {
  104. const max = Math.min(N, offset + count);
  105. for (let i = offset; i < max; i++) {
  106. Tokenizer.skipWhitespace(tokenizer);
  107. tokenizer.tokenStart = tokenizer.position;
  108. Tokenizer.eatValue(tokenizer);
  109. data[i] = fastParseFloat(tokenizer.data, tokenizer.tokenStart, tokenizer.tokenEnd);
  110. }
  111. offset = max;
  112. return max === N ? 0 : chunkSize;
  113. }, (ctx, _, i) => ctx.update({ current: Math.min(i, N), max: N }));
  114. }
  115. export function parseCube(data: string, name: string) {
  116. return Task.create<Result<CubeFile>>('Parse Cube', async taskCtx => {
  117. await taskCtx.update('Reading header...');
  118. const tokenizer = Tokenizer(data);
  119. const { header, atoms } = readHeader(tokenizer);
  120. const values = await readValues(taskCtx, tokenizer, header);
  121. return Result.success({ header, atoms, values, name });
  122. });
  123. }