Browse Source

Merge pull request #71 from JonStargaryen/sdf

SDF/MOL/MOL2 export by ModelServer
David Sehnal 4 years ago
parent
commit
3384a8630b

+ 149 - 0
src/mol-io/reader/_spec/sdf.spec.ts

@@ -0,0 +1,149 @@
+
+import { parseSdf } from '../sdf/parser';
+
+const SdfString = `
+  Mrv1718007121815122D          
+
+  5  4  0  0  0  0            999 V2000
+    0.0000    0.8250    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+   -0.8250    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000   -0.8250    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+    0.0000    0.0000    0.0000 P   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8250    0.0000    0.0000 O   0  5  0  0  0  0  0  0  0  0  0  0
+  4  1  1  0  0  0  0
+  4  2  2  0  0  0  0
+  4  3  1  0  0  0  0
+  4  5  1  0  0  0  0
+M  CHG  3   1  -1   3  -1   5  -1
+M  END
+> <DATABASE_ID>
+DB14523
+
+> <DATABASE_NAME>
+drugbank
+
+> <SMILES>
+[O-]P([O-])([O-])=O
+
+> <INCHI_IDENTIFIER>
+InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-3
+
+> <INCHI_KEY>
+NBIIXXVUZAFLBC-UHFFFAOYSA-K
+
+> <FORMULA>
+O4P
+
+> <MOLECULAR_WEIGHT>
+94.9714
+
+> <EXACT_MASS>
+94.95342
+
+> <JCHEM_ACCEPTOR_COUNT>
+4
+
+> <JCHEM_ATOM_COUNT>
+5
+
+> <JCHEM_AVERAGE_POLARIZABILITY>
+4.932162910070488
+
+> <JCHEM_BIOAVAILABILITY>
+1
+
+> <JCHEM_DONOR_COUNT>
+0
+
+> <JCHEM_FORMAL_CHARGE>
+-3
+
+> <JCHEM_GHOSE_FILTER>
+0
+
+> <JCHEM_IUPAC>
+phosphate
+
+> <JCHEM_LOGP>
+-1.0201038226666665
+
+> <JCHEM_MDDR_LIKE_RULE>
+0
+
+> <JCHEM_NUMBER_OF_RINGS>
+0
+
+> <JCHEM_PHYSIOLOGICAL_CHARGE>
+-2
+
+> <JCHEM_PKA>
+6.951626889535468
+
+> <JCHEM_PKA_STRONGEST_ACIDIC>
+1.7961261340181292
+
+> <JCHEM_POLAR_SURFACE_AREA>
+86.25
+
+> <JCHEM_REFRACTIVITY>
+11.2868
+
+> <JCHEM_ROTATABLE_BOND_COUNT>
+0
+
+> <JCHEM_RULE_OF_FIVE>
+1
+
+> <JCHEM_TRADITIONAL_IUPAC>
+phosphate
+
+> <JCHEM_VEBER_RULE>
+0
+
+> <DRUGBANK_ID>
+DB14523
+
+> <DRUG_GROUPS>
+experimental
+
+> <GENERIC_NAME>
+Phosphate ion
+
+> <SYNONYMS>
+Orthophosphate; Phosphate
+
+$$$$`;
+
+describe('sdf reader', () => {
+    it('basic', async () => {
+        const parsed =  await parseSdf(SdfString).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const compound = parsed.result.compounds[0];
+        const { molFile, dataItems } = compound;
+        const { atoms, bonds } = molFile;
+
+        // number of structures
+        expect(atoms.count).toBe(5);
+        expect(bonds.count).toBe(4);
+
+        expect(atoms.x.value(0)).toBeCloseTo(0, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(0.8250, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(0, 0.0001);
+        expect(atoms.type_symbol.value(0)).toBe('O');
+
+        expect(bonds.atomIdxA.value(3)).toBe(4);
+        expect(bonds.atomIdxB.value(3)).toBe(5);
+        expect(bonds.order.value(3)).toBe(1);
+
+        expect(dataItems.dataHeader.value(0)).toBe('DATABASE_ID');
+        expect(dataItems.data.value(0)).toBe('DB14523');
+
+        expect(dataItems.dataHeader.value(1)).toBe('DATABASE_NAME');
+        expect(dataItems.data.value(1)).toBe('drugbank');
+
+        expect(dataItems.dataHeader.value(31)).toBe('SYNONYMS');
+        expect(dataItems.data.value(31)).toBe('Orthophosphate; Phosphate');
+    });
+});

+ 2 - 2
src/mol-io/reader/mol/parser.ts

@@ -30,7 +30,7 @@ export interface MolFile {
     }
 }
 
-function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
+export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
     const x = TokenBuilder.create(tokenizer.data, count * 2);
     const y = TokenBuilder.create(tokenizer.data, count * 2);
     const z = TokenBuilder.create(tokenizer.data, count * 2);
@@ -59,7 +59,7 @@ function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
     };
 }
 
-function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds'] {
+export function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds'] {
     const atomIdxA = TokenBuilder.create(tokenizer.data, count * 2);
     const atomIdxB = TokenBuilder.create(tokenizer.data, count * 2);
     const order = TokenBuilder.create(tokenizer.data, count * 2);

+ 83 - 0
src/mol-io/reader/sdf/parser.ts

@@ -0,0 +1,83 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Column } from '../../../mol-data/db';
+import { MolFile, handleAtoms, handleBonds } from '../mol/parser';
+import { Task } from '../../../mol-task';
+import { ReaderResult as Result } from '../result';
+import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
+import TokenColumn from '../common/text/column/token';
+
+/** http://c4.cabrillo.edu/404/ctfile.pdf - page 41 */
+export interface SdfFile {
+    readonly compounds: {
+        readonly molFile: MolFile,
+        readonly dataItems: {
+            readonly dataHeader: Column<string>,
+            readonly data: Column<string>
+        }
+    }[]
+}
+
+function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
+    const dataHeader = TokenBuilder.create(tokenizer.data, 32);
+    const data = TokenBuilder.create(tokenizer.data, 32);
+
+    let sawHeaderToken = false;
+    while (tokenizer.position < tokenizer.length) {
+        const line = Tokenizer.readLine(tokenizer);
+        if (!!line) {
+            if (line.startsWith('> <')) {
+                TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1);
+                sawHeaderToken = true;
+            } else if (sawHeaderToken) {
+                TokenBuilder.add(data, tokenizer.tokenStart, tokenizer.tokenEnd);
+                sawHeaderToken = false;
+                // TODO can there be multiline values?
+            }
+        } else {
+            sawHeaderToken = false;
+        }
+    }
+
+    return {
+        dataHeader: TokenColumn(dataHeader)(Column.Schema.str),
+        data: TokenColumn(data)(Column.Schema.str)
+    };
+}
+
+function handleMolFile(data: string) {
+    const tokenizer = Tokenizer(data);
+
+    const title = Tokenizer.readLine(tokenizer).trim();
+    const program = Tokenizer.readLine(tokenizer).trim();
+    const comment = Tokenizer.readLine(tokenizer).trim();
+
+    const counts = Tokenizer.readLine(tokenizer);
+
+    const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3);
+
+    const atoms = handleAtoms(tokenizer, atomCount);
+    const bonds = handleBonds(tokenizer, bondCount);
+    const dataItems = handleDataItems(tokenizer);
+
+    return {
+        molFile: { title, program, comment, atoms, bonds },
+        dataItems
+    };
+}
+
+const delimiter = '$$$$';
+function parseInternal(data: string): Result<SdfFile> {
+    const result: SdfFile = { compounds: data.split(delimiter).map(d => handleMolFile(d)) };
+    return Result.success(result);
+}
+
+export function parseSdf(data: string) {
+    return Task.create<Result<SdfFile>>('Parse Sdf', async () => {
+        return parseInternal(data);
+    });
+}

+ 163 - 0
src/mol-io/writer/ligand-encoder.ts

@@ -0,0 +1,163 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { StringBuilder } from '../../mol-util';
+import Writer from './writer';
+import { Encoder, Category, Field } from './cif/encoder';
+import { ComponentBond } from '../../mol-model-formats/structure/property/bonds/comp';
+
+interface Atom {
+    label_atom_id: string,
+    Cartn_x: number,
+    Cartn_y: number,
+    Cartn_z: number,
+    type_symbol: string
+}
+
+function Atom(partial: any): Atom {
+    return { ...partial };
+}
+
+export abstract class LigandEncoder implements Encoder<string> {
+    protected builder: StringBuilder;
+    protected meta: StringBuilder;
+    protected componentData: ComponentBond;
+    protected error = false;
+    protected encoded = false;
+    readonly isBinary = false;
+    binaryEncodingProvider = void 0;
+
+    abstract encode(): void;
+
+    protected abstract _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx): void;
+
+    protected abstract writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx): void;
+
+    writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx) {
+        if (this.encoded) {
+            throw new Error('The writer contents have already been encoded, no more writing.');
+        }
+
+        if (this.metaInformation && (category.name === 'model_server_result' || category.name === 'model_server_params' || category.name === 'model_server_stats')) {
+            this.writeFullCategory(this.meta, category, context);
+            return;
+        }
+
+        // if error: force writing of meta information
+        if (category.name === 'model_server_error') {
+            this.writeFullCategory(this.meta, category, context);
+            this.error = true;
+            return;
+        }
+
+        // only care about atom_site category when writing SDF
+        if (category.name !== 'atom_site') {
+            return;
+        }
+
+        this._writeCategory(category, context);
+    }
+
+    setComponentBondData(componentData: ComponentBond) {
+        this.componentData = componentData;
+    }
+
+    writeTo(stream: Writer) {
+        const chunks = StringBuilder.getChunks(this.builder);
+        for (let i = 0, _i = chunks.length; i < _i; i++) {
+            stream.writeString(chunks[i]);
+        }
+    }
+
+    getSize() {
+        return StringBuilder.getSize(this.builder);
+    }
+
+    getData() {
+        return StringBuilder.getString(this.builder);
+    }
+
+    protected getAtoms<Ctx>(instance: Category.Instance<Ctx>, source: any): Atom[] {
+        const sortedFields = this.getSortedFields(instance, ['Cartn_x', 'Cartn_y', 'Cartn_z', 'type_symbol']);
+        const label_atom_id = this.getField(instance, 'label_atom_id');
+        return this._getAtoms(source, sortedFields, label_atom_id);
+    }
+
+    private _getAtoms(source: any, fields: Field<any, any>[], label_atom_id: Field<any, any>): Atom[] {
+        const atoms: Atom[] = [];
+        let index = 0;
+
+        // is outer loop even needed?
+        for (let _c = 0; _c < source.length; _c++) {
+            const src = source[_c];
+            const data = src.data;
+
+            if (src.rowCount === 0) continue;
+
+            const it = src.keys();
+            while (it.hasNext)  {
+                const key = it.move();
+
+                const lai = label_atom_id.value(key, data, index) as string;
+                const label = this.getLabel(lai);
+                if (this.skipHydrogen(label)) {
+                    index++;
+                    continue;
+                }
+                const a: { [k: string]: (string | number) } = { 'label_atom_id': lai };
+
+                for (let _f = 0, _fl = fields.length; _f < _fl; _f++) {
+                    const f: Field<any, any> = fields[_f]!;
+                    a[f.name] = f.value(key, data, index);
+                }
+
+                atoms.push(Atom(a));
+                index++;
+            }
+        }
+
+        return atoms;
+    }
+
+    protected skipHydrogen(label: string) {
+        if (this.hydrogens) {
+            return false;
+        }
+        return label.startsWith('H');
+    }
+
+    protected getLabel(s: string) {
+        return s.replace(/[^A-Z]+/g, '');
+    }
+
+    private getSortedFields<Ctx>(instance: Category.Instance<Ctx>, names: string[]) {
+        return names.map(n => this.getField(instance, n));
+    }
+
+    private getField<Ctx>(instance: Category.Instance<Ctx>, name: string) {
+        return instance.fields.find(f => f.name === name)!;
+    }
+
+    protected getName<Ctx>(instance: Category.Instance<Ctx>, source: any): string {
+        const label_comp_id = this.getField(instance, 'label_comp_id');
+        return label_comp_id.value(source[0].keys().move(), source[0].data, 0) as string;
+    }
+
+    startDataBlock() {}
+
+    setFilter() {}
+
+    setFormatter() {}
+
+    isCategoryIncluded() {
+        return true;
+    }
+
+    constructor(readonly encoder: string, readonly metaInformation: boolean, readonly hydrogens: boolean) {
+        this.builder = StringBuilder.create();
+        this.meta = StringBuilder.create();
+    }
+}

+ 21 - 0
src/mol-io/writer/mol.ts

@@ -0,0 +1,21 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { MolEncoder } from './mol/encoder';
+import { Encoder } from './cif/encoder';
+
+export namespace MolWriter {
+    export interface EncoderParams {
+        encoderName?: string,
+        // whether to write hydrogen atoms
+        hydrogens?: boolean
+    }
+
+    export function createEncoder(params?: EncoderParams): Encoder {
+        const { encoderName = 'mol*', hydrogens = true } = params || {};
+        return new MolEncoder(encoderName, false, hydrogens);
+    }
+}

+ 106 - 0
src/mol-io/writer/mol/encoder.ts

@@ -0,0 +1,106 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { StringBuilder } from '../../../mol-util';
+import { Category } from '../cif/encoder';
+import { getCategoryInstanceData } from '../cif/encoder/util';
+import { LigandEncoder } from '../ligand-encoder';
+
+// specification: http://c4.cabrillo.edu/404/ctfile.pdf
+// SDF wraps MOL and allows for multiple molecules per file as well as additional properties
+// TODO add support for stereo/chiral flags, add charges
+export class MolEncoder extends LigandEncoder {
+    _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx) {
+        // use separate builder because we still need to write Counts and Bonds line
+        const ctab = StringBuilder.create();
+        const bonds = StringBuilder.create();
+        // write Atom block and gather data for Bonds and Charges
+        const { instance, source } = getCategoryInstanceData(category, context);
+
+        // write header
+        const name = this.getName(instance, source);
+        // 3rd lines must be present and can contain comments
+        StringBuilder.writeSafe(this.builder, `${name}\n  ${this.encoder}\n\n`);
+
+        const bondMap = this.componentData.entries.get(name)!;
+        let bondCount = 0;
+
+        // traverse once to determine all actually present atoms
+        const atoms = this.getAtoms(instance, source);
+        for (let i1 = 0, il = atoms.length; i1 < il; i1++) {
+            const atom = atoms[i1];
+            StringBuilder.writePadLeft(ctab, atom.Cartn_x.toFixed(4), 10);
+            StringBuilder.writePadLeft(ctab, atom.Cartn_y.toFixed(4), 10);
+            StringBuilder.writePadLeft(ctab, atom.Cartn_z.toFixed(4), 10);
+            StringBuilder.whitespace1(ctab);
+            StringBuilder.writePadRight(ctab, atom.type_symbol, 2);
+            StringBuilder.writeSafe(ctab, '  0  0  0  0  0  0  0  0  0  0  0  0\n');
+
+            bondMap.map.get(atom.label_atom_id)!.forEach((v, k) => {
+                const i2 = atoms.findIndex(e => e.label_atom_id === k);
+                const label2 = this.getLabel(k);
+                if (i1 < i2 && atoms.findIndex(e => e.label_atom_id === k) > -1 && !this.skipHydrogen(label2)) {
+                    const { order } = v;
+                    StringBuilder.writeIntegerPadLeft(bonds, i1 + 1, 3);
+                    StringBuilder.writeIntegerPadLeft(bonds, i2 + 1, 3);
+                    StringBuilder.writeIntegerPadLeft(bonds, order, 3);
+                    StringBuilder.writeSafe(bonds, '  0  0  0  0\n');
+                    bondCount++;
+                }
+            });
+        }
+
+        // write counts line
+        StringBuilder.writeIntegerPadLeft(this.builder, atoms.length, 3);
+        StringBuilder.writeIntegerPadLeft(this.builder, bondCount, 3);
+        StringBuilder.writeSafe(this.builder, '  0  0  0  0  0  0  0  0  0\n');
+
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(ctab));
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(bonds));
+
+        StringBuilder.writeSafe(this.builder, 'M  END\n');
+    }
+
+    protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
+        const { instance, source } = getCategoryInstanceData(category, context);
+        const fields = instance.fields;
+        const src = source[0];
+        const data = src.data;
+
+        const it = src.keys();
+        const key = it.move();
+        for (let _f = 0; _f < fields.length; _f++) {
+            const f = fields[_f]!;
+
+            StringBuilder.writeSafe(sb, `> <${category.name}.${f.name}>\n`);
+            const val = f.value(key, data, 0);
+            StringBuilder.writeSafe(sb, val as string);
+            StringBuilder.writeSafe(sb, '\n\n');
+        }
+    }
+
+    encode() {
+        // write meta-information, do so after ctab
+        if (this.error || this.metaInformation) {
+            StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
+        }
+
+        // terminate file (needed for SDF only)
+        if (!!this.terminator) {
+            StringBuilder.writeSafe(this.builder, `${this.terminator}\n`);
+        }
+
+        this.encoded = true;
+    }
+
+    constructor(encoder: string, metaInformation: boolean, hydrogens: boolean, readonly terminator: string = '') {
+        super(encoder, metaInformation, hydrogens);
+
+        if (metaInformation && !terminator) {
+            throw new Error('meta-information cannot be written for MOL files');
+        }
+    }
+}

+ 23 - 0
src/mol-io/writer/mol2.ts

@@ -0,0 +1,23 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Mol2Encoder } from './mol2/encoder';
+import { Encoder } from './cif/encoder';
+
+export namespace Mol2Writer {
+    export interface EncoderParams {
+        encoderName?: string,
+        // whether to write ModelServer meta-information (query & params)
+        metaInformation?: boolean,
+        // whether to write hydrogen atoms
+        hydrogens?: boolean
+    }
+
+    export function createEncoder(params?: EncoderParams): Encoder {
+        const { encoderName = 'mol*', metaInformation = true, hydrogens = true } = params || {};
+        return new Mol2Encoder(encoderName, metaInformation, hydrogens);
+    }
+}

+ 93 - 0
src/mol-io/writer/mol2/encoder.ts

@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Category } from '../cif/encoder';
+import { LigandEncoder } from '../ligand-encoder';
+import { StringBuilder } from '../../../mol-util';
+import { getCategoryInstanceData } from '../cif/encoder/util';
+import { BondType } from '../../../mol-model/structure/model/types';
+
+// specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
+// TODO amide (and real sp/sp2/sp3) support for bonds and SYBYL atom types: see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
+// TODO support charges
+export class Mol2Encoder extends LigandEncoder {
+    private out: StringBuilder;
+
+    _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx): void {
+        const a = StringBuilder.create();
+        const b = StringBuilder.create();
+        const { instance, source } = getCategoryInstanceData(category, context);
+
+        // write header
+        const name = this.getName(instance, source);
+        StringBuilder.writeSafe(this.builder, `# Name: ${name}\n# Created by ${this.encoder}\n\n`);
+
+        const bondMap = this.componentData.entries.get(name)!;
+        let bondCount = 0;
+
+        const atoms = this.getAtoms(instance, source);
+        StringBuilder.writeSafe(a, '@<TRIPOS>ATOM\n');
+        StringBuilder.writeSafe(b, '@<TRIPOS>BOND\n');
+        for (let i1 = 0, il = atoms.length; i1 < il; i1++) {
+            const atom = atoms[i1];
+
+            let aromatic = false;
+            bondMap.map.get(atom.label_atom_id)!.forEach((v, k) => {
+                const i2 = atoms.findIndex(e => e.label_atom_id === k);
+                const label2 = this.getLabel(k);
+                if (i1 < i2 && atoms.findIndex(e => e.label_atom_id === k) > -1 && !this.skipHydrogen(label2)) {
+                    const { order, flags } = v;
+                    const ar = flags === BondType.Flag.Aromatic;
+                    if (ar) aromatic = true;
+                    StringBuilder.writeSafe(b, `${++bondCount} ${i1 + 1} ${i2 + 1} ${ar ? 'ar' : order}`);
+                    StringBuilder.newline(b);
+                }
+            });
+
+            const sub = aromatic ? '.ar' : '';
+            StringBuilder.writeSafe(a, `${i1 + 1} ${atom.type_symbol} ${atom.Cartn_x.toFixed(3)} ${atom.Cartn_y.toFixed(3)} ${atom.Cartn_z.toFixed(3)} ${atom.type_symbol}${sub} 1 ${name} 0.000\n`);
+        }
+
+        StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.length} ${bondCount} 0 0 0\nSMALL\nNO_CHARGES\n\n`);
+        StringBuilder.writeSafe(this.out, StringBuilder.getString(a));
+        StringBuilder.writeSafe(this.out, StringBuilder.getString(b));
+        StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n${name} ${name} 1\n`);
+    }
+
+    protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
+        const { instance, source } = getCategoryInstanceData(category, context);
+        const fields = instance.fields;
+        const src = source[0];
+        const data = src.data;
+
+        const it = src.keys();
+        const key = it.move();
+        for (let _f = 0; _f < fields.length; _f++) {
+            const f = fields[_f]!;
+
+            StringBuilder.writeSafe(sb, `# ${category.name}.${f.name}: `);
+            const val = f.value(key, data, 0);
+            StringBuilder.writeSafe(sb, val as string);
+            StringBuilder.newline(sb);
+        }
+        StringBuilder.newline(sb);
+    }
+
+    encode(): void {
+        // write meta-information, do so after ctab
+        if (this.error || this.metaInformation) {
+            StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
+        }
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.out));
+
+        this.encoded = true;
+    }
+
+    constructor(encoder: string, metaInformation: boolean, hydrogens: boolean) {
+        super(encoder, metaInformation, hydrogens);
+        this.out = StringBuilder.create();
+    }
+}

+ 23 - 0
src/mol-io/writer/sdf.ts

@@ -0,0 +1,23 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { MolEncoder } from './mol/encoder';
+import { Encoder } from './cif/encoder';
+
+export namespace SdfWriter {
+    export interface EncoderParams {
+        encoderName?: string,
+        // whether to write ModelServer meta-information (query & params)
+        metaInformation?: boolean,
+        // whether to write hydrogen atoms
+        hydrogens?: boolean
+    }
+
+    export function createEncoder(params?: EncoderParams): Encoder {
+        const { encoderName = 'mol*', metaInformation = true, hydrogens = true } = params || {};
+        return new MolEncoder(encoderName, metaInformation, hydrogens, '$$$$');
+    }
+}

+ 2 - 2
src/servers/model/server/api-web-multiple.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { QueryName, QueryParams } from './api';
+import { QueryName, QueryParams, Encoding } from './api';
 
 export interface MultipleQueryEntry<Name extends QueryName = QueryName> {
     data_source?: string,
@@ -17,7 +17,7 @@ export interface MultipleQueryEntry<Name extends QueryName = QueryName> {
 
 export interface MultipleQuerySpec {
     queries: MultipleQueryEntry[],
-    encoding?: 'cif' | 'bcif',
+    encoding?: Encoding,
     asTarGz?: boolean
 }
 

+ 5 - 5
src/servers/model/server/api-web.ts

@@ -46,11 +46,11 @@ async function processNextJob() {
     }
 }
 
-export function createResultWriter(response: express.Response, isBinary: boolean, entryId?: string, queryName?: string) {
+export function createResultWriter(response: express.Response, encoding: string, entryId?: string, queryName?: string) {
     const filenameBase = entryId && queryName
         ? `${entryId}_${splitCamelCase(queryName.replace(/\s/g, '_'), '-').toLowerCase()}`
         : `result`;
-    return new SimpleResponseResultWriter(isBinary ? `${filenameBase}.bcif` : `${filenameBase}.cif`, response, isBinary);
+    return new SimpleResponseResultWriter(`${filenameBase}.${encoding}`, response, encoding === 'bcif');
 }
 
 function mapQuery(app: express.Express, queryName: string, queryDefinition: QueryDefinition) {
@@ -66,8 +66,8 @@ function mapQuery(app: express.Express, queryName: string, queryDefinition: Quer
                 modelNums: commonParams.model_nums,
                 copyAllCategories: !!commonParams.copy_all_categories
             })],
-            writer: createResultWriter(res, commonParams.encoding === 'bcif', entryId, queryName),
-            options: { binary: commonParams.encoding === 'bcif' }
+            writer: createResultWriter(res, commonParams.encoding!, entryId, queryName),
+            options: { binary: commonParams.encoding === 'bcif', encoding: commonParams.encoding }
         });
         responseMap.set(jobId, res);
         if (JobManager.size === 1) processNextJob();
@@ -122,7 +122,7 @@ function serveStatic(req: express.Request, res: express.Response) {
 function createMultiJob(spec: MultipleQuerySpec, res: express.Response) {
     const writer = spec.asTarGz
         ? new TarballResponseResultWriter(getMultiQuerySpecFilename(), res)
-        : createResultWriter(res, spec.encoding?.toLowerCase() === 'bcif');
+        : createResultWriter(res, spec.encoding!);
 
     if (spec.queries.length > ModelServerConfig.maxQueryManyQueries) {
         writer.doError(400, `query-many queries limit (${ModelServerConfig.maxQueryManyQueries}) exceeded.`);

+ 36 - 5
src/servers/model/server/api.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { Queries, Structure, StructureQuery, StructureSymmetry } from '../../../mol-model/structure';
+import { Queries, Structure, StructureQuery, StructureSymmetry, StructureProperties } from '../../../mol-model/structure';
 import { getAtomsTests } from '../query/atoms';
 import { CifWriter } from '../../../mol-io/writer/cif';
 import { QuerySchemas } from '../query/schemas';
@@ -33,7 +33,7 @@ export interface QueryDefinition<Params = any> {
     name: string,
     niceName: string,
     exampleId: string, // default is 1cbs
-    query: (params: Params, structure: Structure) => StructureQuery,
+    query: (params: Params, structure: Structure, numModels: number[]) => StructureQuery,
     description: string,
     jsonParams: QueryParamInfo[],
     restParams: QueryParamInfo[],
@@ -44,14 +44,15 @@ export interface QueryDefinition<Params = any> {
 
 export const CommonQueryParamsInfo: QueryParamInfo[] = [
     { name: 'model_nums', type: QueryParamType.String, description: `A comma-separated list of model ids (i.e. 1,2). If set, only include atoms with the corresponding '_atom_site.pdbx_PDB_model_num' field.` },
-    { name: 'encoding', type: QueryParamType.String, defaultValue: 'cif', description: `Determines the output encoding (text based 'CIF' or binary 'BCIF').`, supportedValues: ['cif', 'bcif'] },
+    { name: 'encoding', type: QueryParamType.String, defaultValue: 'cif', description: `Determines the output encoding (text based 'CIF' or binary 'BCIF'). Ligands can also be exported as 'SDF', 'MOL', or 'MOL2'.`, supportedValues: ['cif', 'bcif', 'sdf', 'mol', 'mol2'] },
     { name: 'copy_all_categories', type: QueryParamType.Boolean, defaultValue: false, description: 'If true, copy all categories from the input file.' },
     { name: 'data_source', type: QueryParamType.String, defaultValue: '', description: 'Allows to control how the provided data source ID maps to input file (as specified by the server instance config).' }
 ];
 
+export type Encoding = 'cif' | 'bcif' | 'sdf' | 'mol' | 'mol2';
 export interface CommonQueryParamsInfo {
     model_nums?: number[],
-    encoding?: 'cif' | 'bcif',
+    encoding?: Encoding,
     copy_all_categories?: boolean
     data_source?: string
 }
@@ -128,6 +129,21 @@ function Q<Params = any>(definition: Partial<QueryDefinition<Params>>) {
 
 const QueryMap = {
     'full': Q<{} | undefined>({ niceName: 'Full Structure', query: () => Queries.generators.all, description: 'The full structure.' }),
+    'ligand': Q<{ atom_site: AtomSiteSchema }>({
+        niceName: 'Ligand',
+        description: 'Coordinates of the first group satisfying the given criteria.',
+        query: (p, _s, numModels) => {
+            const tests = getAtomsTests(p.atom_site);
+            const ligands = Queries.combinators.merge(tests.map(test => Queries.generators.atoms({
+                ...test,
+                unitTest: ctx => StructureProperties.unit.model_num(ctx.element) === numModels[0],
+                groupBy: ctx => StructureProperties.residue.key(ctx.element)
+            })));
+            return Queries.filters.first(ligands);
+        },
+        jsonParams: [ AtomSiteTestJsonParam ],
+        restParams: AtomSiteTestRestParams
+    }),
     'atoms': Q<{ atom_site: AtomSiteSchema }>({
         niceName: 'Atoms',
         description: 'Atoms satisfying the given criteria.',
@@ -265,6 +281,21 @@ export function normalizeRestCommonParams(params: any): CommonQueryParamsInfo {
         model_nums: params.model_nums ? ('' + params.model_nums).split(',').map(n => n.trim()).filter(n => !!n).map(n => +n) : void 0,
         data_source: params.data_source,
         copy_all_categories: Boolean(params.copy_all_categories),
-        encoding: ('' + params.encoding).toLocaleLowerCase() === 'bcif' ? 'bcif' : 'cif'
+        encoding: mapEncoding(('' + params.encoding).toLocaleLowerCase())
     };
+}
+
+function mapEncoding(value: string) {
+    switch (value) {
+        case 'bcif':
+            return 'bcif';
+        case 'mol':
+            return 'mol';
+        case 'mol2':
+            return 'mol2';
+        case 'sdf':
+            return 'sdf';
+        default:
+            return 'cif';
+    }
 }

+ 7 - 4
src/servers/model/server/jobs.ts

@@ -5,13 +5,13 @@
  */
 
 import { UUID } from '../../../mol-util';
-import { getQueryByName, QueryDefinition, QueryName, QueryParams } from './api';
+import { getQueryByName, QueryDefinition, QueryName, QueryParams, Encoding } from './api';
 import { LinkedList } from '../../../mol-data/generic';
 import { ResultWriter } from '../utils/writer';
 
 export interface ResponseFormat {
     tarball: boolean,
-    isBinary: boolean
+    encoding: Encoding
 }
 
 export interface Job {
@@ -29,7 +29,7 @@ export interface Job {
 export interface JobDefinition {
     entries: JobEntry[],
     writer: ResultWriter,
-    options?: { outputFilename?: string, binary?: boolean, tarball?: boolean }
+    options?: { outputFilename?: string, binary?: boolean, tarball?: boolean, encoding?: Encoding }
 }
 
 export interface JobEntry {
@@ -78,7 +78,10 @@ export function createJob(definition: JobDefinition): Job {
         datetime_utc: `${new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '')}`,
         entries: definition.entries,
         writer: definition.writer,
-        responseFormat: { isBinary: !!(definition.options && definition.options.binary), tarball: !!definition?.options?.tarball },
+        responseFormat: {
+            tarball: !!definition?.options?.tarball,
+            encoding: definition?.options?.encoding ? definition.options.encoding : !!(definition.options && definition.options.binary) ? 'bcif' : 'cif'
+        },
         outputFilename: definition.options && definition.options.outputFilename
     };
     definition.entries.forEach(e => e.job = job);

+ 67 - 17
src/servers/model/server/query.ts

@@ -7,7 +7,7 @@
 import * as path from 'path';
 import { Column } from '../../../mol-data/db';
 import { CifWriter } from '../../../mol-io/writer/cif';
-import { Structure, StructureQuery, StructureSelection } from '../../../mol-model/structure';
+import { Structure, StructureQuery, StructureSelection, Model } from '../../../mol-model/structure';
 import { encode_mmCIF_categories } from '../../../mol-model/structure/export/mmcif';
 import { Progress } from '../../../mol-task';
 import { ConsoleLogger } from '../../../mol-util/console-logger';
@@ -20,6 +20,14 @@ import { Job, JobEntry } from './jobs';
 import { createStructureWrapperFromJobEntry, resolveStructures, StructureWrapper } from './structure-wrapper';
 import CifField = CifWriter.Field
 import { splitCamelCase } from '../../../mol-util/string';
+import { Encoder } from '../../../mol-io/writer/cif/encoder';
+import { Encoding } from './api';
+import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/comp';
+import { SdfWriter } from '../../../mol-io/writer/sdf';
+import { MolWriter } from '../../../mol-io/writer/mol';
+import { Mol2Writer } from '../../../mol-io/writer/mol2';
+import { MolEncoder } from '../../../mol-io/writer/mol/encoder';
+import { Mol2Encoder } from '../../../mol-io/writer/mol2/encoder';
 
 export interface Stats {
     structure: StructureWrapper,
@@ -45,14 +53,59 @@ export async function resolveJob(job: Job) {
     }
 }
 
-async function resolveSingleFile(job: Job) {
-    ConsoleLogger.logId(job.id, 'Query', 'Starting.');
+const SharedParams = {
+    encoderName: `ModelServer ${Version}`
+};
+
+const SharedLigandWritingParams = {
+    ...SharedParams,
+    hydrogens: true
+};
+
+function createEncoder(job: Job): Encoder {
+    switch (job.responseFormat.encoding) {
+        case 'bcif':
+            return CifWriter.createEncoder({
+                ...SharedParams,
+                binary: true,
+                binaryAutoClassifyEncoding: true
+            });
+        case 'sdf':
+            ensureCompatibleQueryType(job);
+            return SdfWriter.createEncoder({
+                ...SharedLigandWritingParams
+            });
+        case 'mol':
+            ensureCompatibleQueryType(job);
+            return MolWriter.createEncoder({
+                ...SharedLigandWritingParams
+            });
+        case 'mol2':
+            ensureCompatibleQueryType(job);
+            return Mol2Writer.createEncoder({
+                ...SharedLigandWritingParams
+            });
+        default:
+            return CifWriter.createEncoder({
+                ...SharedParams,
+                binary: false,
+                binaryAutoClassifyEncoding: true
+            });
+    }
+}
 
-    const encoder = CifWriter.createEncoder({
-        binary: job.responseFormat.isBinary,
-        encoderName: `ModelServer ${Version}`,
-        binaryAutoClassifyEncoding: true
+function ensureCompatibleQueryType(job: Job) {
+    job.entries.forEach(e => {
+        if (e.queryDefinition.niceName !== 'Ligand') {
+            throw Error("sdf, mol and mol2 encoding are only available for queries of type 'Ligand'");
+        }
     });
+}
+
+async function resolveSingleFile(job: Job) {
+    ConsoleLogger.logId(job.id, 'Query', `Starting (format: ${job.responseFormat.encoding}).`);
+
+    const encoder = createEncoder(job);
 
     const headerMap = new Map<string, number>();
 
@@ -60,7 +113,6 @@ async function resolveSingleFile(job: Job) {
         let hasDataBlock = false;
         try {
             const structure = await createStructureWrapperFromJobEntry(entry, propertyProvider());
-
             let header = structure.cifFrame.header.toUpperCase();
             if (headerMap.has(header)) {
                 const i = headerMap.get(header)! + 1;
@@ -91,8 +143,8 @@ async function resolveSingleFile(job: Job) {
     encoder.writeTo(job.writer);
 }
 
-function getFilename(i: number, entry: JobEntry, header: string, isBinary: boolean) {
-    return `${i}_${header.toLowerCase()}_${splitCamelCase(entry.queryDefinition.name.replace(/\s/g, '_'), '-').toLowerCase()}.${isBinary ? 'bcif' : 'cif'}`;
+function getFilename(i: number, entry: JobEntry, header: string, encoding: Encoding) {
+    return `${i}_${header.toLowerCase()}_${splitCamelCase(entry.queryDefinition.name.replace(/\s/g, '_'), '-').toLowerCase()}.${encoding}`;
 }
 
 async function resolveMultiFile(job: Job) {
@@ -101,11 +153,7 @@ async function resolveMultiFile(job: Job) {
     let i = 0;
     for (const entry of job.entries) {
 
-        const encoder = CifWriter.createEncoder({
-            binary: job.responseFormat.isBinary,
-            encoderName: `ModelServer ${Version}`,
-            binaryAutoClassifyEncoding: true
-        });
+        const encoder = createEncoder(job);
 
         let hasDataBlock = false;
         let header = '';
@@ -126,7 +174,7 @@ async function resolveMultiFile(job: Job) {
         ConsoleLogger.logId(job.id, 'Query', `Encoding ${entry.key}/${entry.queryDefinition.name}`);
         encoder.encode();
 
-        job.writer.beginEntry(getFilename(++i, entry, header, job.responseFormat.isBinary), encoder.getSize());
+        job.writer.beginEntry(getFilename(++i, entry, header, job.responseFormat.encoding), encoder.getSize());
         encoder.writeTo(job.writer);
         job.writer.endEntry();
         ConsoleLogger.logId(job.id, 'Query', `Written ${entry.key}/${entry.queryDefinition.name}`);
@@ -160,7 +208,8 @@ async function resolveJobEntry(entry: JobEntry, structure: StructureWrapper, enc
             }
         }
 
-        const queries = structures.map(s => entry.queryDefinition.query(entry.normalizedParams, s));
+        const modelNums = entry.modelNums || (structure.models as Model[]).map(m => m.modelNum);
+        const queries = structures.map(s => entry.queryDefinition.query(entry.normalizedParams, s, modelNums));
         const result: Structure[] = [];
         for (let i = 0; i < structures.length; i++) {
             const s = StructureSelection.unionStructure(StructureQuery.run(queries[i], structures[i], { timeoutMs: Config.queryTimeoutMs }));
@@ -178,6 +227,7 @@ async function resolveJobEntry(entry: JobEntry, structure: StructureWrapper, enc
         encoder.writeCategory(_model_server_result, entry);
         encoder.writeCategory(_model_server_params, entry);
 
+        if (encoder instanceof MolEncoder || encoder instanceof Mol2Encoder) encoder.setComponentBondData(ComponentBond.Provider.get(structure.models[0])!);
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter(entry.queryDefinition.filter);
         if (result.length > 0) encode_mmCIF_categories(encoder, result, { copyAllCategories: entry.copyAllCategories });
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter();