Browse Source

basic mol2 format support

Alexander Rose 5 years ago
parent
commit
b18b3be070

+ 4 - 4
src/mol-io/reader/_spec/mol2.spec.ts

@@ -1,5 +1,5 @@
 
-import Mol2 from '../mol2/parser';
+import { parseMol2 } from '../mol2/parser';
 
 const Mol2String = `@<TRIPOS>MOLECULE
 5816
@@ -246,7 +246,7 @@ GASTEIGER
 
 describe('mol2 reader', () => {
     it('basic', async () => {
-        const parsed =  await Mol2(Mol2String).run();
+        const parsed =  await parseMol2(Mol2String, '').run();
         if (parsed.isError) {
             throw new Error(parsed.message);
         }
@@ -297,7 +297,7 @@ describe('mol2 reader', () => {
     });
 
     it('multiblocks', async () => {
-        const parsed =  await Mol2(Mol2StringMultiBlocks).run();
+        const parsed =  await parseMol2(Mol2StringMultiBlocks, '').run();
         if (parsed.isError) {
             throw new Error(parsed.message);
         }
@@ -348,7 +348,7 @@ describe('mol2 reader', () => {
     });
 
     it('minimal', async () => {
-        const parsed =  await Mol2(Mol2StringMinimal).run();
+        const parsed =  await parseMol2(Mol2StringMinimal, '').run();
         if (parsed.isError) {
             throw new Error(parsed.message);
         }

+ 9 - 10
src/mol-io/reader/mol2/parser.ts

@@ -54,7 +54,7 @@ const reWhitespace = /\s+/g;
 function handleMolecule(state: State) {
     const { tokenizer, molecule } = state;
 
-    while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE') {
+    while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) {
         markLine(tokenizer);
     }
 
@@ -101,7 +101,7 @@ async function handleAtoms(state: State): Promise<Schema.Mol2Atoms> {
     let hasStatus_bit = false;
 
     // skip empty lines and '@<TRIPOS>ATOM'
-    while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM') {
+    while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM' && tokenizer.position < tokenizer.data.length) {
         markLine(tokenizer);
     }
 
@@ -243,7 +243,7 @@ async function handleBonds(state: State): Promise<Schema.Mol2Bonds> {
     const { tokenizer, molecule } = state;
     let hasStatus_bit = false;
 
-    while (getTokenString(tokenizer) !== '@<TRIPOS>BOND') {
+    while (getTokenString(tokenizer) !== '@<TRIPOS>BOND' && tokenizer.position < tokenizer.data.length) {
         markLine(tokenizer);
     }
 
@@ -324,7 +324,7 @@ async function handleBonds(state: State): Promise<Schema.Mol2Bonds> {
     return ret;
 }
 
-async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<Schema.Mol2File>> {
+async function parseInternal(ctx: RuntimeContext, data: string, name: string): Promise<Result<Schema.Mol2File>> {
     const tokenizer = Tokenizer(data);
 
     ctx.update({ message: 'Parsing...', current: 0, max: data.length });
@@ -335,16 +335,15 @@ async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<
         const atoms = await handleAtoms(state);
         const bonds = await handleBonds(state);
         structures.push({ molecule: state.molecule, atoms, bonds });
+        skipWhitespace(tokenizer);
     }
 
-    const result: Schema.Mol2File = { structures };
+    const result: Schema.Mol2File = { name, structures };
     return Result.success(result);
 }
 
-export function parse(data: string) {
+export function parseMol2(data: string, name: string) {
     return Task.create<Result<Schema.Mol2File>>('Parse MOL2', async ctx => {
-        return await parseInternal(data, ctx);
+        return await parseInternal(ctx, data, name);
     });
-}
-
-export default parse;
+}

+ 1 - 0
src/mol-io/reader/mol2/schema.d.ts

@@ -63,5 +63,6 @@ export interface Mol2Structure {
 }
 
 export interface Mol2File {
+    name: string
     structures: Mol2Structure[]
 }

+ 98 - 0
src/mol-model-formats/structure/mol2.ts

@@ -0,0 +1,98 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from '../../mol-data/db';
+import { Model } from '../../mol-model/structure/model';
+import { MoleculeType } from '../../mol-model/structure/model/types';
+import { RuntimeContext, Task } from '../../mol-task';
+import { createModels } from './basic/parser';
+import { BasicSchema, createBasic } from './basic/schema';
+import { ComponentBuilder } from './common/component';
+import { EntityBuilder } from './common/entity';
+import { ModelFormat } from '../format';
+import { IndexPairBonds } from './property/bonds/index-pair';
+import { Mol2File } from '../../mol-io/reader/mol2/schema';
+
+async function getModels(mol2: Mol2File, ctx: RuntimeContext): Promise<Model[]> {
+    const models: Model[] = [];
+
+    for (let i = 0, il = mol2.structures.length; i < il; ++i) {
+        const { atoms, bonds } = mol2.structures[i];
+
+        const A = Column.ofConst('A', atoms.count, Column.Schema.str);
+
+        const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
+            auth_asym_id: A,
+            auth_atom_id: Column.asArrayColumn(atoms.atom_type),
+            auth_comp_id: atoms.subst_name,
+            auth_seq_id: atoms.subst_id,
+            Cartn_x: Column.asArrayColumn(atoms.x, Float32Array),
+            Cartn_y: Column.asArrayColumn(atoms.y, Float32Array),
+            Cartn_z: Column.asArrayColumn(atoms.z, Float32Array),
+            id: Column.asArrayColumn(atoms.atom_id),
+
+            label_asym_id: A,
+            label_atom_id: Column.asArrayColumn(atoms.atom_type),
+            label_comp_id: atoms.subst_name,
+            label_seq_id: atoms.subst_id,
+            label_entity_id: Column.ofConst('1', atoms.count, Column.Schema.str),
+
+            occupancy: Column.ofConst(1, atoms.count, Column.Schema.float),
+            type_symbol: Column.asArrayColumn(atoms.atom_name),
+
+            pdbx_PDB_model_num: Column.ofConst(i, atoms.count, Column.Schema.int),
+        }, atoms.count);
+
+        const entityBuilder = new EntityBuilder();
+        entityBuilder.setNames([['MOL', 'Unknown Entity']]);
+        entityBuilder.getEntityId('MOL', MoleculeType.Unknown, 'A');
+
+        const componentBuilder = new ComponentBuilder(atoms.subst_id, atoms.atom_name);
+        for (let i = 0, il = atoms.subst_name.rowCount; i < il; ++i) {
+            componentBuilder.add(atoms.subst_name.value(i), i);
+        }
+
+        const basics = createBasic({
+            entity: entityBuilder.getEntityTable(),
+            chem_comp: componentBuilder.getChemCompTable(),
+            atom_site
+        });
+
+        const _models = await createModels(basics, Mol2Format.create(mol2), ctx);
+
+        if (_models.length > 0) {
+            const indexA = Column.ofIntArray(Column.mapToArray(bonds.origin_atom_id, x => x - 1, Int32Array));
+            const indexB = Column.ofIntArray(Column.mapToArray(bonds.target_atom_id, x => x - 1, Int32Array));
+            const order = Column.ofIntArray(Column.mapToArray(bonds.bond_type, x => x === 'ar' ? 1 : parseInt(x), Int8Array));
+            const pairBonds = IndexPairBonds.fromData({ pairs: { indexA, indexB, order }, count: bonds.count });
+            IndexPairBonds.Provider.set(_models[0], pairBonds);
+
+            models.push(_models[0]);
+        }
+    }
+
+    return models;
+}
+
+//
+
+export { Mol2Format };
+
+type Mol2Format = ModelFormat<Mol2File>
+
+namespace Mol2Format {
+    export function is(x: ModelFormat): x is Mol2Format {
+        return x.kind === 'mol2';
+    }
+
+    export function create(mol2: Mol2File): Mol2Format {
+        return { kind: 'mol2', name: mol2.name, data: mol2 };
+    }
+}
+
+export function trajectoryFromMol2(mol2: Mol2File): Task<Model.Trajectory> {
+    return Task.create('Parse MOL2', ctx => getModels(mol2, ctx));
+}

+ 10 - 1
src/mol-plugin-state/formats/trajectory.ts

@@ -122,6 +122,14 @@ export const MolProvider: TrajectoryFormatProvider = {
     visuals: defaultVisuals
 };
 
+export const Mol2Provider: TrajectoryFormatProvider = {
+    label: 'MOL2',
+    description: 'MOL2',
+    category: Category,
+    stringExtensions: ['mol2'],
+    parse: directTrajectory(StateTransforms.Model.TrajectoryFromMOL2),
+    visuals: defaultVisuals
+};
 
 export const BuiltInTrajectoryFormats = [
     ['mmcif', MmcifProvider] as const,
@@ -129,7 +137,8 @@ export const BuiltInTrajectoryFormats = [
     ['pdb', PdbProvider] as const,
     ['gro', GroProvider] as const,
     ['3dg', Provider3dg] as const,
-    ['mol', MolProvider] as const
+    ['mol', MolProvider] as const,
+    ['mol2', Mol2Provider] as const,
 ] as const;
 
 export type BuiltInTrajectoryFormat = (typeof BuiltInTrajectoryFormats)[number][0]

+ 21 - 0
src/mol-plugin-state/transforms/model.ts

@@ -35,6 +35,8 @@ import { parseMol } from '../../mol-io/reader/mol/parser';
 import { trajectoryFromMol } from '../../mol-model-formats/structure/mol';
 import { trajectoryFromCifCore } from '../../mol-model-formats/structure/cif-core';
 import { trajectoryFromCube } from '../../mol-model-formats/structure/cube';
+import { parseMol2 } from '../../mol-io/reader/mol2/parser';
+import { trajectoryFromMol2 } from '../../mol-model-formats/structure/mol2';
 
 export { CoordinatesFromDcd };
 export { TopologyFromPsf };
@@ -44,6 +46,7 @@ export { TrajectoryFromMmCif };
 export { TrajectoryFromPDB };
 export { TrajectoryFromGRO };
 export { TrajectoryFromMOL };
+export { TrajectoryFromMOL2 };
 export { TrajectoryFromCube };
 export { TrajectoryFromCifCore };
 export { TrajectoryFrom3DG };
@@ -235,6 +238,24 @@ const TrajectoryFromMOL = PluginStateTransform.BuiltIn({
     }
 });
 
+type TrajectoryFromMOL2 = typeof TrajectoryFromMOL
+const TrajectoryFromMOL2 = PluginStateTransform.BuiltIn({
+    name: 'trajectory-from-mol2',
+    display: { name: 'Parse MOL2', description: 'Parse MOL2 string and create trajectory.' },
+    from: [SO.Data.String],
+    to: SO.Molecule.Trajectory
+})({
+    apply({ a }) {
+        return Task.create('Parse MOL2', async ctx => {
+            const parsed = await parseMol2(a.data, a.label).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+            const models = await trajectoryFromMol2(parsed.result).runInContext(ctx);
+            const props = { label: `${models[0].entry}`, description: `${models.length} model${models.length === 1 ? '' : 's'}` };
+            return new SO.Molecule.Trajectory(models, props);
+        });
+    }
+});
+
 type TrajectoryFromCube = typeof TrajectoryFromCube
 const TrajectoryFromCube = PluginStateTransform.BuiltIn({
     name: 'trajectory-from-cube',