parser.ts 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /**
  2. * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
  5. */
  6. import { Column } from '../../../mol-data/db';
  7. import { MolFile, handleAtoms, handleBonds } from '../mol/parser';
  8. import { Task } from '../../../mol-task';
  9. import { ReaderResult as Result } from '../result';
  10. import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
  11. import TokenColumn from '../common/text/column/token';
  12. /** http://c4.cabrillo.edu/404/ctfile.pdf - page 41 */
  13. export interface SdfFile {
  14. readonly compounds: {
  15. readonly molFile: MolFile,
  16. readonly dataItems: {
  17. readonly dataHeader: Column<string>,
  18. readonly data: Column<string>
  19. }
  20. }[]
  21. }
  22. function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
  23. const dataHeader = TokenBuilder.create(tokenizer.data, 32);
  24. const data = TokenBuilder.create(tokenizer.data, 32);
  25. let sawHeaderToken = false;
  26. while (tokenizer.position < tokenizer.length) {
  27. const line = Tokenizer.readLine(tokenizer);
  28. if (!!line) {
  29. if (line.startsWith('> <')) {
  30. TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1);
  31. sawHeaderToken = true;
  32. } else if (sawHeaderToken) {
  33. TokenBuilder.add(data, tokenizer.tokenStart, tokenizer.tokenEnd);
  34. sawHeaderToken = false;
  35. // TODO can there be multiline values?
  36. }
  37. } else {
  38. sawHeaderToken = false;
  39. }
  40. }
  41. return {
  42. dataHeader: TokenColumn(dataHeader)(Column.Schema.str),
  43. data: TokenColumn(data)(Column.Schema.str)
  44. };
  45. }
  46. function handleMolFile(data: string) {
  47. const tokenizer = Tokenizer(data);
  48. const title = Tokenizer.readLine(tokenizer).trim();
  49. const program = Tokenizer.readLine(tokenizer).trim();
  50. const comment = Tokenizer.readLine(tokenizer).trim();
  51. const counts = Tokenizer.readLine(tokenizer);
  52. const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3);
  53. const atoms = handleAtoms(tokenizer, atomCount);
  54. const bonds = handleBonds(tokenizer, bondCount);
  55. const dataItems = handleDataItems(tokenizer);
  56. return {
  57. molFile: { title, program, comment, atoms, bonds },
  58. dataItems
  59. };
  60. }
  61. const delimiter = '$$$$';
  62. function parseInternal(data: string): Result<SdfFile> {
  63. const result: SdfFile = { compounds: data.split(delimiter).map(d => handleMolFile(d)) };
  64. return Result.success(result);
  65. }
  66. export function parseSdf(data: string) {
  67. return Task.create<Result<SdfFile>>('Parse Sdf', async () => {
  68. return parseInternal(data);
  69. });
  70. }