Browse Source

basic MOL/SDF support

David Sehnal 5 years ago
parent
commit
1b26aa4b36

+ 73 - 0
src/mol-io/reader/_spec/mol.spec.ts

@@ -0,0 +1,73 @@
+
+import { parseMol } from '../mol/parser'
+
+const MolString = `2244
+  -OEChem-04072009073D
+
+ 21 21  0     0  0  0  0  0  0999 V2000
+    1.2333    0.5540    0.7792 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.6952   -2.7148   -0.7502 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.7958   -2.1843    0.8685 O   0  0  0  0  0  0  0  0  0  0  0  0
+    1.7813    0.8105   -1.4821 O   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.0857    0.6088    0.4403 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.7927   -0.5515    0.1244 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.7288    1.8464    0.4133 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.1426   -0.4741   -0.2184 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.0787    1.9238    0.0706 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7855    0.7636   -0.2453 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.1409   -1.8536    0.1477 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.1094    0.6715   -0.3113 C   0  0  0  0  0  0  0  0  0  0  0  0
+    3.5305    0.5996    0.1635 C   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.1851    2.7545    0.6593 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.7247   -1.3605   -0.4564 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -2.5797    2.8872    0.0506 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -3.8374    0.8238   -0.5090 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.7290    1.4184    0.8593 H   0  0  0  0  0  0  0  0  0  0  0  0
+    4.2045    0.6969   -0.6924 H   0  0  0  0  0  0  0  0  0  0  0  0
+    3.7105   -0.3659    0.6426 H   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.2555   -3.5916   -0.7337 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END`
+
+describe('mol reader', () => {
+    it('basic', async () => {
+        const parsed =  await parseMol(MolString).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const { atoms, bonds } = parsed.result;
+
+        // number of structures
+        expect(atoms.count).toBe(21);
+        expect(bonds.count).toBe(21);
+
+        expect(atoms.x.value(0)).toBeCloseTo(1.2333, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(0.5540, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(0.7792, 0.0001);
+        expect(atoms.type_symbol.value(0)).toBe('O');
+
+        expect(bonds.atomIdxA.value(20)).toBe(13);
+        expect(bonds.atomIdxB.value(20)).toBe(20);
+        expect(bonds.order.value(20)).toBe(1);
+    });
+});

+ 115 - 0
src/mol-io/reader/mol/parser.ts

@@ -0,0 +1,115 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column } from '../../../mol-data/db';
+import { Task } from '../../../mol-task';
+import TokenColumn from '../common/text/column/token';
+import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
+import { ReaderResult as Result } from '../result';
+
+/** Subset of the MolFile V2000 format */
+export interface MolFile {
+    readonly title: string,
+    readonly program: string,
+    readonly comment: string,
+    readonly atoms: {
+        readonly count: number,
+        readonly x: Column<number>,
+        readonly y: Column<number>,
+        readonly z: Column<number>,
+        readonly type_symbol: Column<string>
+    },
+    readonly bonds: {
+        readonly count: number
+        readonly atomIdxA: Column<number>,
+        readonly atomIdxB: Column<number>,
+        readonly order: Column<number>
+    }
+}
+
+function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
+    const x = TokenBuilder.create(tokenizer.data, count * 2);
+    const y = TokenBuilder.create(tokenizer.data, count * 2);
+    const z = TokenBuilder.create(tokenizer.data, count * 2);
+    const type_symbol = TokenBuilder.create(tokenizer.data, count * 2);
+
+    for (let i = 0; i < count; ++i) {
+        Tokenizer.markLine(tokenizer);
+        const { tokenStart: s, position } = tokenizer;
+        Tokenizer.trim(tokenizer, s, s + 10);
+        TokenBuilder.addUnchecked(x, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 10, s + 20);
+        TokenBuilder.addUnchecked(y, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 20, s + 30);
+        TokenBuilder.addUnchecked(z, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 31, s + 34);
+        TokenBuilder.addUnchecked(type_symbol, tokenizer.tokenStart, tokenizer.tokenEnd);
+        tokenizer.position = position;
+    }
+
+    return {
+        count,
+        x: TokenColumn(x)(Column.Schema.float),
+        y: TokenColumn(y)(Column.Schema.float),
+        z: TokenColumn(z)(Column.Schema.float),
+        type_symbol: TokenColumn(type_symbol)(Column.Schema.str)
+    }
+}
+
+function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds'] {
+    const atomIdxA = TokenBuilder.create(tokenizer.data, count * 2);
+    const atomIdxB = TokenBuilder.create(tokenizer.data, count * 2);
+    const order = TokenBuilder.create(tokenizer.data, count * 2);
+
+    for (let i = 0; i < count; ++i) {
+        Tokenizer.markLine(tokenizer);
+        const { tokenStart: s, position } = tokenizer;
+        Tokenizer.trim(tokenizer, s, s + 3);
+        TokenBuilder.addUnchecked(atomIdxA, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 3, s + 6);
+        TokenBuilder.addUnchecked(atomIdxB, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 6, s + 9);
+        TokenBuilder.addUnchecked(order, tokenizer.tokenStart, tokenizer.tokenEnd);
+        tokenizer.position = position;
+    }
+
+    return {
+        count,
+        atomIdxA: TokenColumn(atomIdxA)(Column.Schema.int),
+        atomIdxB: TokenColumn(atomIdxB)(Column.Schema.int),
+        order: TokenColumn(order)(Column.Schema.int)
+    }
+}
+
+function parseInternal(data: string): Result<MolFile> {
+    const tokenizer = Tokenizer(data);
+
+    const title = Tokenizer.readLine(tokenizer).trim();
+    const program = Tokenizer.readLine(tokenizer).trim();
+    const comment = Tokenizer.readLine(tokenizer).trim();
+
+    const counts = Tokenizer.readLine(tokenizer);
+
+    const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3);
+
+    const atoms = handleAtoms(tokenizer, atomCount);
+    const bonds = handleBonds(tokenizer, bondCount);
+
+    const result: MolFile = {
+        title,
+        program,
+        comment,
+        atoms,
+        bonds
+    }
+    return Result.success(result);
+}
+
+export function parseMol(data: string) {
+    return Task.create<Result<MolFile>>('Parse PSF', async () => {
+        return parseInternal(data)
+    });
+}

+ 91 - 0
src/mol-model-formats/structure/mol.ts

@@ -0,0 +1,91 @@
+/**
+ * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from '../../mol-data/db';
+import { MolFile } from '../../mol-io/reader/mol/parser';
+import { Model } from '../../mol-model/structure/model';
+import { MoleculeType } from '../../mol-model/structure/model/types';
+import { RuntimeContext, Task } from '../../mol-task';
+import { createModels } from './basic/parser';
+import { BasicSchema, createBasic } from './basic/schema';
+import { ComponentBuilder } from './common/component';
+import { EntityBuilder } from './common/entity';
+import { ModelFormat } from './format';
+import { IndexPairBonds } from './property/bonds/index-pair';
+
+async function getModels(mol: MolFile, ctx: RuntimeContext): Promise<Model[]> {
+    const { atoms, bonds } = mol;
+
+    const UNK = Column.ofConst('UNK', mol.atoms.count, Column.Schema.str);
+    const A = Column.ofConst('A', mol.atoms.count, Column.Schema.str);
+    const type_symbol = Column.asArrayColumn(atoms.type_symbol);
+    const seq_id = Column.ofConst(1, atoms.count, Column.Schema.int);
+
+    const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
+        auth_asym_id: A,
+        auth_atom_id: type_symbol,
+        auth_comp_id: UNK,
+        auth_seq_id: seq_id,
+        Cartn_x: Column.asArrayColumn(atoms.x, Float32Array),
+        Cartn_y: Column.asArrayColumn(atoms.y, Float32Array),
+        Cartn_z: Column.asArrayColumn(atoms.z, Float32Array),
+        id: Column.range(0, atoms.count - 1),
+
+        label_asym_id: A,
+        label_atom_id: type_symbol,
+        label_comp_id: UNK,
+        label_seq_id: seq_id,
+        label_entity_id: Column.ofConst('1', atoms.count, Column.Schema.str),
+
+        occupancy: Column.ofConst(1, atoms.count, Column.Schema.float),
+        type_symbol,
+
+        pdbx_PDB_model_num: Column.ofConst(1, atoms.count, Column.Schema.int),
+    }, atoms.count);
+
+    const entityBuilder = new EntityBuilder()
+    entityBuilder.getEntityId('UNK', MoleculeType.Unknown, 'A');
+    const componentBuilder = new ComponentBuilder(seq_id, type_symbol);
+    componentBuilder.add('UNK', 0);
+
+    const basics = createBasic({
+        entity: entityBuilder.getEntityTable(),
+        chem_comp: componentBuilder.getChemCompTable(),
+        atom_site
+    });
+
+    const models = await createModels(basics, MolFormat.create(mol), ctx);
+
+    if (models.length > 0) {
+        const indexA = Column.ofIntArray(Column.mapToArray(bonds.atomIdxA, x => x - 1, Int32Array));
+        const indexB = Column.ofIntArray(Column.mapToArray(bonds.atomIdxB, x => x - 1, Int32Array));
+        const order = Column.asArrayColumn(bonds.order, Int32Array);
+        const pairBonds = IndexPairBonds.fromData({ pairs: { indexA, indexB, order }, count: bonds.count });
+        IndexPairBonds.Provider.set(models[0], pairBonds);
+    }
+
+    return models;
+}
+
+//
+
+export { MolFormat };
+
+type MolFormat = ModelFormat<MolFile>
+
+namespace MolFormat {
+    export function is(x: ModelFormat): x is MolFormat {
+        return x.kind === 'mol'
+    }
+
+    export function create(mol: MolFile): MolFormat {
+        return { kind: 'mol', name: mol.title, data: mol };
+    }
+}
+
+export function trajectoryFromMol(mol: MolFile): Task<Model.Trajectory> {
+    return Task.create('Parse MOL', ctx => getModels(mol, ctx))
+}

+ 2 - 1
src/mol-plugin-state/actions/data-format.ts

@@ -12,7 +12,7 @@ import { FileInfo, getFileInfo } from '../../mol-util/file-info';
 import { ParamDefinition as PD } from '../../mol-util/param-definition';
 import { PluginStateObject } from '../objects';
 import { PlyProvider } from './shape';
-import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider } from './structure';
+import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider, MolProvider } from './structure';
 import { Ccp4Provider, DscifProvider, Dsn6Provider } from './volume';
 
 export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | PluginStateObject.Data.String> {
@@ -60,6 +60,7 @@ export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | Plugin
         this.add('dscif', DscifProvider)
         this.add('dsn6', Dsn6Provider)
         this.add('gro', GroProvider)
+        this.add('mol', MolProvider)
         this.add('mmcif', MmcifProvider)
         this.add('pdb', PdbProvider)
         this.add('ply', PlyProvider)

+ 17 - 0
src/mol-plugin-state/actions/structure.ts

@@ -75,6 +75,23 @@ export const GroProvider: DataFormatProvider<any> = {
     }
 }
 
+export const MolProvider: DataFormatProvider<any> = {
+    label: 'MOL',
+    description: 'MOL',
+    stringExtensions: ['mol', 'sdf'],
+    binaryExtensions: [],
+    isApplicable: (info: FileInfo, data: string) => {
+        return info.ext === 'mol' || info.ext === 'sdf'
+    },
+    getDefaultBuilder: (ctx: PluginContext, data, options) => {
+        return Task.create('MOL default builder', async () => {
+            const trajectory = await ctx.builders.structure.parseTrajectory(data, 'mol');
+            const representationPreset = options.visuals ? 'atomic-detail' : 'empty';
+            await ctx.builders.structure.hierarchy.applyPreset(trajectory, 'default', { showUnitcell: options.visuals, representationPreset });
+        })
+    }
+}
+
 export const Provider3dg: DataFormatProvider<any> = {
     label: '3DG',
     description: '3DG',

+ 13 - 0
src/mol-plugin-state/formats/trajectory.ts

@@ -86,11 +86,24 @@ export const Provider3dg: TrajectoryFormatProvider = {
     parse: directTrajectory(StateTransforms.Model.TrajectoryFrom3DG)
 }
 
+export const MolProvider: TrajectoryFormatProvider = {
+    label: 'MOL',
+    description: 'MOL',
+    stringExtensions: ['mol', 'sdf'],
+    binaryExtensions: [],
+    isApplicable: (info: FileInfo, data: string) => {
+        return info.ext === 'mol' || info.ext === 'sdf'
+    },
+    parse: directTrajectory(StateTransforms.Model.TrajectoryFromMOL)
+}
+
+
 export const BuildInTrajectoryFormats = [
     ['mmcif', MmcifProvider] as const,
     ['pdb', PdbProvider] as const,
     ['gro', GroProvider] as const,
     ['3dg', Provider3dg] as const,
+    ['mol', MolProvider] as const
 ] as const
 
 export type BuiltInTrajectoryFormat = (typeof BuildInTrajectoryFormats)[number][0]

+ 8 - 15
src/mol-plugin-state/objects.ts

@@ -5,25 +5,22 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
+import { File3DG } from '../mol-io/reader/3dg/parser';
+import { Ccp4File } from '../mol-io/reader/ccp4/schema';
 import { CifFile } from '../mol-io/reader/cif';
+import { DcdFile } from '../mol-io/reader/dcd/parser';
+import { Dsn6File } from '../mol-io/reader/dsn6/schema';
 import { PlyFile } from '../mol-io/reader/ply/schema';
-import { Coordinates as _Coordinates } from '../mol-model/structure';
-import { Topology as _Topology } from '../mol-model/structure';
-import { Model as _Model, Structure as _Structure, StructureElement } from '../mol-model/structure';
+import { PsfFile } from '../mol-io/reader/psf/parser';
+import { ShapeProvider } from '../mol-model/shape/provider';
+import { Coordinates as _Coordinates, Model as _Model, Structure as _Structure, StructureElement, Topology as _Topology } from '../mol-model/structure';
 import { VolumeData } from '../mol-model/volume';
 import { PluginBehavior } from '../mol-plugin/behavior/behavior';
 import { Representation } from '../mol-repr/representation';
+import { ShapeRepresentation } from '../mol-repr/shape/representation';
 import { StructureRepresentation, StructureRepresentationState } from '../mol-repr/structure/representation';
 import { VolumeRepresentation } from '../mol-repr/volume/representation';
 import { StateObject, StateTransformer } from '../mol-state';
-import { Ccp4File } from '../mol-io/reader/ccp4/schema';
-import { Dsn6File } from '../mol-io/reader/dsn6/schema';
-import { ShapeRepresentation } from '../mol-repr/shape/representation';
-import { Shape as _Shape } from '../mol-model/shape';
-import { ShapeProvider } from '../mol-model/shape/provider';
-import { File3DG } from '../mol-io/reader/3dg/parser';
-import { DcdFile } from '../mol-io/reader/dcd/parser';
-import { PsfFile } from '../mol-io/reader/psf/parser';
 
 export type TypeClass = 'root' | 'data' | 'prop'
 
@@ -70,11 +67,7 @@ export namespace PluginStateObject {
     export namespace Format {
         export class Json extends Create<any>({ name: 'JSON Data', typeClass: 'Data' }) { }
         export class Cif extends Create<CifFile>({ name: 'CIF File', typeClass: 'Data' }) { }
-        export class Pdb extends Create<CifFile>({ name: 'PDB File', typeClass: 'Data' }) { }
-        export class Gro extends Create<CifFile>({ name: 'GRO File', typeClass: 'Data' }) { }
-        export class _3dg extends Create<CifFile>({ name: '3DG File', typeClass: 'Data' }) { }
         export class Psf extends Create<PsfFile>({ name: 'PSF File', typeClass: 'Data' }) { }
-        export class Dcd extends Create<CifFile>({ name: 'DCD File', typeClass: 'Data' }) { }
         export class Ply extends Create<PlyFile>({ name: 'PLY File', typeClass: 'Data' }) { }
         export class Ccp4 extends Create<Ccp4File>({ name: 'CCP4/MRC/MAP File', typeClass: 'Data' }) { }
         export class Dsn6 extends Create<Dsn6File>({ name: 'DSN6/BRIX File', typeClass: 'Data' }) { }

+ 21 - 0
src/mol-plugin-state/transforms/model.ts

@@ -32,6 +32,8 @@ import { createStructureComponent, StructureComponentParams, updateStructureComp
 import { StructureQueryHelper } from '../helpers/structure-query';
 import { StructureSelectionQueries } from '../helpers/structure-selection-query';
 import { PluginStateObject as SO, PluginStateTransform } from '../objects';
+import { parseMol } from '../../mol-io/reader/mol/parser';
+import { trajectoryFromMol } from '../../mol-model-formats/structure/mol';
 
 export { CoordinatesFromDcd };
 export { TopologyFromPsf };
@@ -40,6 +42,7 @@ export { TrajectoryFromBlob };
 export { TrajectoryFromMmCif };
 export { TrajectoryFromPDB };
 export { TrajectoryFromGRO };
+export { TrajectoryFromMOL };
 export { TrajectoryFrom3DG };
 export { ModelFromTrajectory };
 export { StructureFromTrajectory };
@@ -212,6 +215,24 @@ const TrajectoryFromGRO = PluginStateTransform.BuiltIn({
     }
 });
 
+type TrajectoryFromMOL = typeof TrajectoryFromMOL
+const TrajectoryFromMOL = PluginStateTransform.BuiltIn({
+    name: 'trajectory-from-mol',
+    display: { name: 'Parse MOL', description: 'Parse MOL string and create trajectory.' },
+    from: [SO.Data.String],
+    to: SO.Molecule.Trajectory
+})({
+    apply({ a }) {
+        return Task.create('Parse MOL', async ctx => {
+            const parsed = await parseMol(a.data).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+            const models = await trajectoryFromMol(parsed.result).runInContext(ctx);
+            const props = { label: `${models[0].entry}`, description: `${models.length} model${models.length === 1 ? '' : 's'}` };
+            return new SO.Molecule.Trajectory(models, props);
+        });
+    }
+});
+
 type TrajectoryFrom3DG = typeof TrajectoryFrom3DG
 const TrajectoryFrom3DG = PluginStateTransform.BuiltIn({
     name: 'trajectory-from-3dg',