parser.ts 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /**
  2. * Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { Column } from '../../../mol-data/db';
  8. import { MolFile, handleAtoms, handleBonds } from '../mol/parser';
  9. import { Task } from '../../../mol-task';
  10. import { ReaderResult as Result } from '../result';
  11. import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
  12. import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
  13. /** http://c4.cabrillo.edu/404/ctfile.pdf - page 41 */
  14. export interface SdfFileCompound {
  15. readonly molFile: MolFile,
  16. readonly dataItems: {
  17. readonly dataHeader: Column<string>,
  18. readonly data: Column<string>
  19. }
  20. }
  21. export interface SdfFile {
  22. readonly compounds: SdfFileCompound[]
  23. }
  24. const delimiter = '$$$$';
  25. function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
  26. const dataHeader = TokenBuilder.create(tokenizer.data, 32);
  27. const data = TokenBuilder.create(tokenizer.data, 32);
  28. while (tokenizer.position < tokenizer.length) {
  29. const line = Tokenizer.readLine(tokenizer);
  30. if (line.startsWith(delimiter)) break;
  31. if (!line) continue;
  32. if (line.startsWith('> ')) {
  33. TokenBuilder.add(dataHeader, tokenizer.tokenStart + 2, tokenizer.tokenEnd);
  34. Tokenizer.markLine(tokenizer);
  35. const start = tokenizer.tokenStart;
  36. let end = tokenizer.tokenEnd;
  37. let added = false;
  38. while (tokenizer.position < tokenizer.length) {
  39. const line2 = Tokenizer.readLine(tokenizer);
  40. if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> ')) {
  41. TokenBuilder.add(data, start, end);
  42. added = true;
  43. break;
  44. }
  45. end = tokenizer.tokenEnd;
  46. }
  47. if (!added) {
  48. TokenBuilder.add(data, start, end);
  49. }
  50. }
  51. }
  52. return {
  53. dataHeader: TokenColumn(dataHeader)(Column.Schema.str),
  54. data: TokenColumn(data)(Column.Schema.str)
  55. };
  56. }
  57. function handleMolFile(tokenizer: Tokenizer) {
  58. const title = Tokenizer.readLine(tokenizer).trim();
  59. const program = Tokenizer.readLine(tokenizer).trim();
  60. const comment = Tokenizer.readLine(tokenizer).trim();
  61. const counts = Tokenizer.readLine(tokenizer);
  62. const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3);
  63. if (Number.isNaN(atomCount) || Number.isNaN(bondCount)) {
  64. // try to skip to next molecule
  65. while (tokenizer.position < tokenizer.length) {
  66. const line = Tokenizer.readLine(tokenizer);
  67. if (line.startsWith(delimiter)) break;
  68. }
  69. return;
  70. }
  71. const atoms = handleAtoms(tokenizer, atomCount);
  72. const bonds = handleBonds(tokenizer, bondCount);
  73. const dataItems = handleDataItems(tokenizer);
  74. return {
  75. molFile: { title, program, comment, atoms, bonds },
  76. dataItems
  77. };
  78. }
  79. function parseInternal(data: string): Result<SdfFile> {
  80. const tokenizer = Tokenizer(data);
  81. const compounds: SdfFile['compounds'] = [];
  82. while (tokenizer.position < tokenizer.length) {
  83. const c = handleMolFile(tokenizer);
  84. if (c) compounds.push(c);
  85. }
  86. return Result.success({ compounds });
  87. }
  88. export function parseSdf(data: string) {
  89. return Task.create<Result<SdfFile>>('Parse Sdf', async () => {
  90. return parseInternal(data);
  91. });
  92. }