Browse Source

SDF delimiter bugfix + multi-molecule SDF support in mol-plugin

dsehnal 4 years ago
parent
commit
5df55e6bf7

+ 24 - 3
src/mol-io/reader/_spec/sdf.spec.ts

@@ -112,7 +112,22 @@ Phosphate ion
 > <SYNONYMS>
 Orthophosphate; Phosphate
 
-$$$$`;
+$$$$
+
+Comp 2
+
+5  4  0  0  0  0            999 V2000
+  0.0000    0.8250    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+ -0.8250    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+  0.0000   -0.8250    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+  0.0000    0.0000    0.0000 P   0  0  0  0  0  0  0  0  0  0  0  0
+  0.8250    0.0000    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+4  1  1  0  0  0  0
+4  2  2  0  0  0  0
+4  3  1  0  0  0  0
+4  5  1  0  0  0  0
+M  CHG  3   1  -1   3  -1   5  -1
+M  END`;
 
 describe('sdf reader', () => {
     it('basic', async () => {
@@ -120,14 +135,20 @@ describe('sdf reader', () => {
         if (parsed.isError) {
             throw new Error(parsed.message);
         }
-        const compound = parsed.result.compounds[0];
-        const { molFile, dataItems } = compound;
+        const compound1 = parsed.result.compounds[0];
+        const compound2 = parsed.result.compounds[1];
+        const { molFile, dataItems } = compound1;
         const { atoms, bonds } = molFile;
 
+        expect(parsed.result.compounds.length).toBe(2);
+
         // number of structures
         expect(atoms.count).toBe(5);
         expect(bonds.count).toBe(4);
 
+        expect(compound2.molFile.atoms.count).toBe(5);
+        expect(compound2.molFile.bonds.count).toBe(4);
+
         expect(atoms.x.value(0)).toBeCloseTo(0, 0.001);
         expect(atoms.y.value(0)).toBeCloseTo(0.8250, 0.0001);
         expect(atoms.z.value(0)).toBeCloseTo(0, 0.0001);

+ 21 - 6
src/mol-io/reader/sdf/parser.ts

@@ -22,6 +22,7 @@ export interface SdfFile {
     }[]
 }
 
+const delimiter = '$$$$';
 function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
     const dataHeader = TokenBuilder.create(tokenizer.data, 32);
     const data = TokenBuilder.create(tokenizer.data, 32);
@@ -29,6 +30,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da
     let sawHeaderToken = false;
     while (tokenizer.position < tokenizer.length) {
         const line = Tokenizer.readLine(tokenizer);
+        if (line.startsWith(delimiter)) break;
         if (!!line) {
             if (line.startsWith('> <')) {
                 TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1);
@@ -49,9 +51,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da
     };
 }
 
-function handleMolFile(data: string) {
-    const tokenizer = Tokenizer(data);
-
+function handleMolFile(tokenizer: Tokenizer) {
     const title = Tokenizer.readLine(tokenizer).trim();
     const program = Tokenizer.readLine(tokenizer).trim();
     const comment = Tokenizer.readLine(tokenizer).trim();
@@ -60,6 +60,15 @@ function handleMolFile(data: string) {
 
     const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3);
 
+    if (Number.isNaN(atomCount) || Number.isNaN(bondCount)) {
+        // try to skip to next molecule
+        while (tokenizer.position < tokenizer.length) {
+            const line = Tokenizer.readLine(tokenizer);
+            if (line.startsWith(delimiter)) break;
+        }
+        return;
+    }
+
     const atoms = handleAtoms(tokenizer, atomCount);
     const bonds = handleBonds(tokenizer, bondCount);
     const dataItems = handleDataItems(tokenizer);
@@ -70,10 +79,16 @@ function handleMolFile(data: string) {
     };
 }
 
-const delimiter = '$$$$';
 function parseInternal(data: string): Result<SdfFile> {
-    const result: SdfFile = { compounds: data.split(delimiter).map(d => handleMolFile(d)) };
-    return Result.success(result);
+    const tokenizer = Tokenizer(data);
+
+    const compounds: SdfFile['compounds'] = [];
+    while (tokenizer.position < tokenizer.length) {
+        const c = handleMolFile(tokenizer);
+        if (c) compounds.push(c);
+    }
+
+    return Result.success({ compounds });
 }
 
 export function parseSdf(data: string) {

+ 13 - 3
src/mol-plugin-state/formats/trajectory.ts

@@ -123,14 +123,23 @@ export const GroProvider: TrajectoryFormatProvider = {
 };
 
 export const MolProvider: TrajectoryFormatProvider = {
-    label: 'MOL/SDF',
-    description: 'MOL/SDF',
+    label: 'MOL',
+    description: 'MOL',
     category: TrajectoryFormatCategory,
-    stringExtensions: ['mol', 'sdf', 'sd'],
+    stringExtensions: ['mol'],
     parse: directTrajectory(StateTransforms.Model.TrajectoryFromMOL),
     visuals: defaultVisuals
 };
 
+export const SdfProvider: TrajectoryFormatProvider = {
+    label: 'SDF',
+    description: 'SDF',
+    category: TrajectoryFormatCategory,
+    stringExtensions: ['sdf', 'sd'],
+    parse: directTrajectory(StateTransforms.Model.TrajectoryFromSDF),
+    visuals: defaultVisuals
+};
+
 export const Mol2Provider: TrajectoryFormatProvider = {
     label: 'MOL2',
     description: 'MOL2',
@@ -148,6 +157,7 @@ export const BuiltInTrajectoryFormats = [
     ['gro', GroProvider] as const,
     ['xyz', XyzProvider] as const,
     ['mol', MolProvider] as const,
+    ['sdf', SdfProvider] as const,
     ['mol2', Mol2Provider] as const,
 ] as const;
 

+ 32 - 0
src/mol-plugin-state/transforms/model.ts

@@ -39,6 +39,7 @@ import { parseXtc } from '../../mol-io/reader/xtc/parser';
 import { coordinatesFromXtc } from '../../mol-model-formats/structure/xtc';
 import { parseXyz } from '../../mol-io/reader/xyz/parser';
 import { trajectoryFromXyz } from '../../mol-model-formats/structure/xyz';
+import { parseSdf } from '../../mol-io/reader/sdf/parser';
 
 export { CoordinatesFromDcd };
 export { CoordinatesFromXtc };
@@ -50,6 +51,7 @@ export { TrajectoryFromPDB };
 export { TrajectoryFromGRO };
 export { TrajectoryFromXYZ };
 export { TrajectoryFromMOL };
+export { TrajectoryFromSDF };
 export { TrajectoryFromMOL2 };
 export { TrajectoryFromCube };
 export { TrajectoryFromCifCore };
@@ -292,6 +294,36 @@ const TrajectoryFromMOL = PluginStateTransform.BuiltIn({
     }
 });
 
+type TrajectoryFromSDF = typeof TrajectoryFromSDF
+const TrajectoryFromSDF = PluginStateTransform.BuiltIn({
+    name: 'trajectory-from-sdf',
+    display: { name: 'Parse SDF', description: 'Parse SDF string and create trajectory.' },
+    from: [SO.Data.String],
+    to: SO.Molecule.Trajectory
+})({
+    apply({ a }) {
+        return Task.create('Parse SDF', async ctx => {
+            const parsed = await parseSdf(a.data).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+
+            const models: Model[] = [];
+
+            for (const { molFile } of parsed.result.compounds) {
+                const traj = await trajectoryFromMol(molFile).runInContext(ctx);
+                for (let i = 0; i < traj.frameCount; i++) {
+                    models.push(await Task.resolveInContext(traj.getFrameAtIndex(i), ctx));
+                }
+            }
+
+            const traj = new ArrayTrajectory(models);
+
+            const props = trajectoryProps(traj);
+            return new SO.Molecule.Trajectory(traj, props);
+        });
+    }
+});
+
+
 type TrajectoryFromMOL2 = typeof TrajectoryFromMOL
 const TrajectoryFromMOL2 = PluginStateTransform.BuiltIn({
     name: 'trajectory-from-mol2',