JonStargaryen 4 years ago
parent
commit
8e350617f2

+ 20 - 0
src/mol-io/writer/sdf.ts

@@ -0,0 +1,20 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { SdfEncoder } from './sdf/encoder';
+import { Encoder } from './cif/encoder';
+
+export namespace SdfWriter {
+    export interface EncoderParams {
+        encoderName?: string,
+        hideMetaInformation?: boolean
+    }
+
+    export function createEncoder(params?: EncoderParams): Encoder {
+        const { encoderName = 'mol*', hideMetaInformation = false } = params || {};
+        return new SdfEncoder(encoderName, hideMetaInformation);
+    }
+}

+ 174 - 0
src/mol-io/writer/sdf/encoder.ts

@@ -0,0 +1,174 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { StringBuilder } from '../../../mol-util';
+import Writer from '../writer';
+import { Encoder, Category, Field } from '../cif/encoder';
+import { getCategoryInstanceData } from '../cif/encoder/util';
+
+// specification: http://c4.cabrillo.edu/404/ctfile.pdf
+export class SdfEncoder implements Encoder<string> {
+    private builder: StringBuilder;
+    private meta: StringBuilder;
+    private encoded = false;
+    private dataBlockCreated = false;
+    private error = false;
+
+    writeTo(stream: Writer) {
+        const chunks = StringBuilder.getChunks(this.builder);
+        for (let i = 0, _i = chunks.length; i < _i; i++) {
+            stream.writeString(chunks[i]);
+        }
+    }
+
+    getSize() {
+        return StringBuilder.getSize(this.builder);
+    }
+
+    getData() {
+        return StringBuilder.getString(this.builder);
+    }
+
+    startDataBlock(name: string) {
+        this.dataBlockCreated = true;
+        StringBuilder.write(this.builder, `${name}\nCreated by ${this.encoder}\n\n`);
+    }
+
+    writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx) {
+        if (this.encoded) {
+            throw new Error('The writer contents have already been encoded, no more writing.');
+        }
+
+        if (!this.dataBlockCreated) {
+            throw new Error('No data block created.');
+        }
+
+        if (!this.hideMetaInformation && (category.name === 'model_server_result' || category.name === 'model_server_params' || category.name === 'model_server_stats' || category.name === 'model_server_error')) {
+            this.writeFullCategory(this.meta, category, context);
+            // if error: force writing of meta information
+            if (category.name === 'model_server_error') {
+                this.error = true;
+            }
+            return;
+        }
+
+        // ignore meta, error, and misc categories when writing SDF
+        if (category.name !== 'atom_site') {
+            return;
+        }
+
+        // use separate builder because we still need to write Counts and Bonds line
+        const ctab = StringBuilder.create();
+        const bonds = StringBuilder.create();
+        const charges = StringBuilder.create();
+
+        // write Atom block and gather data for Bonds and Charges
+        // 'Specifies the atomic symbol and any mass difference, charge, stereochemistry, and associated hydrogens for each atom.'
+        const { instance, source, rowCount: atomCount } = getCategoryInstanceData(category, context);
+        const fields = this.getSortedFields(instance);
+        const fieldCount = fields.length;
+        let bondCount = 0;
+
+        let index = 0;
+        for (let _c = 0; _c < source.length; _c++) {
+            const src = source[_c];
+            const data = src.data;
+
+            if (src.rowCount === 0) continue;
+
+            const it = src.keys();
+            while (it.hasNext)  {
+                const key = it.move();
+
+                for (let _f = 0; _f < fieldCount; _f++) {
+                    const f: Field<any, any> = fields[_f]!;
+                    const val = f.value(key, data, index);
+                    this.writeValue(ctab, val, f.type);
+                }
+                
+                StringBuilder.writeSafe(ctab, '  0  0  0  0  0\n');
+
+                index++;
+            }
+        }
+
+        // write counts line
+        // 'Important specifications here relate to the number of atoms, bonds, and atom lists, the chiral flag setting, and the Ctab version.'
+        StringBuilder.writeIntegerPadLeft(this.builder, atomCount, 3);
+        StringBuilder.writeIntegerPadLeft(this.builder, bondCount, 3);
+        StringBuilder.write(this.builder, '  0  0  0  0  0  0  0  0999 V2000\n');
+
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(ctab));
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(bonds));
+        StringBuilder.writeSafe(this.builder, StringBuilder.getString(charges));
+        
+        StringBuilder.writeSafe(this.builder, 'M  END\n');
+    }
+
+    private writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
+        const { instance, source } = getCategoryInstanceData(category, context);
+        const fields = instance.fields;
+        const src = source[0];
+        const data = src.data;
+
+        const it = src.keys();
+        const key = it.move();
+        for (let _f = 0; _f < fields.length; _f++) {
+            const f = fields[_f]!;
+    
+            StringBuilder.writeSafe(sb, `> <${category.name}.${f.name}>\n`);
+            const val = f.value(key, data, 0);
+            StringBuilder.writeSafe(sb, val as string);
+            StringBuilder.writeSafe(sb, '\n\n');
+        }
+    }
+
+    private writeValue(sb: StringBuilder, val: string | number, t: Field.Type, floatPrecision: number = 4) {
+        if (t === Field.Type.Str) {
+            // type_symbol is the only string field - width 2, right-padded
+            StringBuilder.whitespace1(sb);
+            StringBuilder.writePadRight(sb, val as string, 2);
+        } else if (t === Field.Type.Int) {
+            StringBuilder.writeInteger(sb, val as number);
+        } else {
+            // coordinates have width 10 and are left-padded
+            StringBuilder.writePadLeft(sb, (val as number).toFixed(floatPrecision), 10);
+        }
+    }
+
+    private getSortedFields<Ctx>(instance: Category.Instance<Ctx>) {
+        return ['Cartn_x', 'Cartn_y', 'Cartn_z', 'type_symbol']
+            .map(n => instance.fields.find(f => f.name === n));
+    }
+
+    readonly isBinary = false;
+    binaryEncodingProvider = void 0;
+
+    encode() {
+        // write meta-information, do so after ctab
+        if (this.error || !this.hideMetaInformation) {
+            StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
+        }
+
+        // terminate file
+        StringBuilder.writeSafe(this.builder, '$$$$\n');
+
+        this.encoded = true;
+    }
+
+    setFilter(filter?: Category.Filter) {}
+
+    setFormatter(formatter?: Category.Formatter) {}
+
+    isCategoryIncluded(name: string) {
+        return true;
+    }
+
+    constructor(readonly encoder: string, readonly hideMetaInformation: boolean) {
+        this.builder = StringBuilder.create();
+        this.meta = StringBuilder.create();
+    }
+}

+ 4 - 4
src/servers/model/config.ts

@@ -62,7 +62,7 @@ const DefaultModelServerConfig = {
         sources: [
             // 'pdbe',
             // 'rcsb',
-            // 'wwpdb'
+            'wwpdb'
         ],
         params: {
             // PDBe: {
@@ -81,9 +81,9 @@ const DefaultModelServerConfig = {
             //         assembly_symmetry: 'https://rest-staging.rcsb.org/graphql'
             //     }
             // },
-            // wwPDB: {
-            //     chemCompBondTablePath: ''
-            // }
+            wwPDB: {
+                chemCompBondTablePath: '/Users/sebastian/Downloads/ccb.bcif'
+            }
         }
     },
 

+ 2 - 2
src/servers/model/server/api-web-multiple.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { QueryName, QueryParams } from './api';
+import { QueryName, QueryParams, Encoding } from './api';
 
 export interface MultipleQueryEntry<Name extends QueryName = QueryName> {
     data_source?: string,
@@ -17,7 +17,7 @@ export interface MultipleQueryEntry<Name extends QueryName = QueryName> {
 
 export interface MultipleQuerySpec {
     queries: MultipleQueryEntry[],
-    encoding?: 'cif' | 'bcif',
+    encoding?: Encoding,
     asTarGz?: boolean
 }
 

+ 5 - 5
src/servers/model/server/api-web.ts

@@ -46,11 +46,11 @@ async function processNextJob() {
     }
 }
 
-export function createResultWriter(response: express.Response, isBinary: boolean, entryId?: string, queryName?: string) {
+export function createResultWriter(response: express.Response, encoding: string, entryId?: string, queryName?: string) {
     const filenameBase = entryId && queryName
         ? `${entryId}_${splitCamelCase(queryName.replace(/\s/g, '_'), '-').toLowerCase()}`
         : `result`;
-    return new SimpleResponseResultWriter(isBinary ? `${filenameBase}.bcif` : `${filenameBase}.cif`, response, isBinary);
+    return new SimpleResponseResultWriter(`${filenameBase}.${encoding}`, response, encoding === 'bcif');
 }
 
 function mapQuery(app: express.Express, queryName: string, queryDefinition: QueryDefinition) {
@@ -66,8 +66,8 @@ function mapQuery(app: express.Express, queryName: string, queryDefinition: Quer
                 modelNums: commonParams.model_nums,
                 copyAllCategories: !!commonParams.copy_all_categories
             })],
-            writer: createResultWriter(res, commonParams.encoding === 'bcif', entryId, queryName),
-            options: { binary: commonParams.encoding === 'bcif' }
+            writer: createResultWriter(res, commonParams.encoding!, entryId, queryName),
+            options: { binary: commonParams.encoding === 'bcif', encoding: commonParams.encoding }
         });
         responseMap.set(jobId, res);
         if (JobManager.size === 1) processNextJob();
@@ -122,7 +122,7 @@ function serveStatic(req: express.Request, res: express.Response) {
 function createMultiJob(spec: MultipleQuerySpec, res: express.Response) {
     const writer = spec.asTarGz
         ? new TarballResponseResultWriter(getMultiQuerySpecFilename(), res)
-        : createResultWriter(res, spec.encoding?.toLowerCase() === 'bcif');
+        : createResultWriter(res, spec.encoding!);
 
     if (spec.queries.length > ModelServerConfig.maxQueryManyQueries) {
         writer.doError(400, `query-many queries limit (${ModelServerConfig.maxQueryManyQueries}) exceeded.`);

+ 18 - 3
src/servers/model/server/api.ts

@@ -44,14 +44,15 @@ export interface QueryDefinition<Params = any> {
 
 export const CommonQueryParamsInfo: QueryParamInfo[] = [
     { name: 'model_nums', type: QueryParamType.String, description: `A comma-separated list of model ids (i.e. 1,2). If set, only include atoms with the corresponding '_atom_site.pdbx_PDB_model_num' field.` },
-    { name: 'encoding', type: QueryParamType.String, defaultValue: 'cif', description: `Determines the output encoding (text based 'CIF' or binary 'BCIF').`, supportedValues: ['cif', 'bcif'] },
+    { name: 'encoding', type: QueryParamType.String, defaultValue: 'cif', description: `Determines the output encoding (text based 'CIF' or binary 'BCIF'). Ligands can also be exported as 'SDF' or 'MOL2'.`, supportedValues: ['cif', 'bcif', 'mol2', 'sdf'] },
     { name: 'copy_all_categories', type: QueryParamType.Boolean, defaultValue: false, description: 'If true, copy all categories from the input file.' },
     { name: 'data_source', type: QueryParamType.String, defaultValue: '', description: 'Allows to control how the provided data source ID maps to input file (as specified by the server instance config).' }
 ];
 
+export type Encoding = 'cif' | 'bcif' | 'mol2' | 'sdf';
 export interface CommonQueryParamsInfo {
     model_nums?: number[],
-    encoding?: 'cif' | 'bcif',
+    encoding?: Encoding,
     copy_all_categories?: boolean
     data_source?: string
 }
@@ -261,10 +262,24 @@ export function normalizeRestQueryParams(query: QueryDefinition, params: any) {
 }
 
 export function normalizeRestCommonParams(params: any): CommonQueryParamsInfo {
+    console.log(params);
     return {
         model_nums: params.model_nums ? ('' + params.model_nums).split(',').map(n => n.trim()).filter(n => !!n).map(n => +n) : void 0,
         data_source: params.data_source,
         copy_all_categories: Boolean(params.copy_all_categories),
-        encoding: ('' + params.encoding).toLocaleLowerCase() === 'bcif' ? 'bcif' : 'cif'
+        encoding: mapEncoding(('' + params.encoding).toLocaleLowerCase())
     };
+}
+
+function mapEncoding(value: string) {
+    switch (value) {
+        case 'bcif':
+            return 'bcif';
+        case 'mol2':
+            return 'mol2';
+        case 'sdf':
+            return 'sdf';
+        default:
+            return 'cif';
+    }
 }

+ 7 - 4
src/servers/model/server/jobs.ts

@@ -5,13 +5,13 @@
  */
 
 import { UUID } from '../../../mol-util';
-import { getQueryByName, QueryDefinition, QueryName, QueryParams } from './api';
+import { getQueryByName, QueryDefinition, QueryName, QueryParams, Encoding } from './api';
 import { LinkedList } from '../../../mol-data/generic';
 import { ResultWriter } from '../utils/writer';
 
 export interface ResponseFormat {
     tarball: boolean,
-    isBinary: boolean
+    encoding: Encoding
 }
 
 export interface Job {
@@ -29,7 +29,7 @@ export interface Job {
 export interface JobDefinition {
     entries: JobEntry[],
     writer: ResultWriter,
-    options?: { outputFilename?: string, binary?: boolean, tarball?: boolean }
+    options?: { outputFilename?: string, binary?: boolean, tarball?: boolean, encoding?: Encoding }
 }
 
 export interface JobEntry {
@@ -78,7 +78,10 @@ export function createJob(definition: JobDefinition): Job {
         datetime_utc: `${new Date().toISOString().replace(/T/, ' ').replace(/\..+/, '')}`,
         entries: definition.entries,
         writer: definition.writer,
-        responseFormat: { isBinary: !!(definition.options && definition.options.binary), tarball: !!definition?.options?.tarball },
+        responseFormat: { 
+            tarball: !!definition?.options?.tarball, 
+            encoding: definition?.options?.encoding ? definition.options.encoding : !!(definition.options && definition.options.binary) ? 'bcif' : 'cif'
+        },
         outputFilename: definition.options && definition.options.outputFilename
     };
     definition.entries.forEach(e => e.job = job);

+ 35 - 16
src/servers/model/server/query.ts

@@ -7,6 +7,7 @@
 import * as path from 'path';
 import { Column } from '../../../mol-data/db';
 import { CifWriter } from '../../../mol-io/writer/cif';
+import { SdfWriter } from '../../../mol-io/writer/sdf';
 import { Structure, StructureQuery, StructureSelection } from '../../../mol-model/structure';
 import { encode_mmCIF_categories } from '../../../mol-model/structure/export/mmcif';
 import { Progress } from '../../../mol-task';
@@ -16,10 +17,12 @@ import { PerformanceMonitor } from '../../../mol-util/performance-monitor';
 import { ModelServerConfig as Config } from '../config';
 import { createModelPropertiesProviderFromConfig, ModelPropertiesProvider } from '../property-provider';
 import Version from '../version';
-import { Job, JobEntry } from './jobs';
+import { Job, JobEntry, ResponseFormat } from './jobs';
 import { createStructureWrapperFromJobEntry, resolveStructures, StructureWrapper } from './structure-wrapper';
 import CifField = CifWriter.Field
 import { splitCamelCase } from '../../../mol-util/string';
+import { Encoder } from '../../../mol-io/writer/cif/encoder';
+import { Encoding } from './api';
 
 export interface Stats {
     structure: StructureWrapper,
@@ -45,14 +48,34 @@ export async function resolveJob(job: Job) {
     }
 }
 
+function createEncoder(responseFormat: ResponseFormat): Encoder {
+    switch (responseFormat.encoding) {
+        case 'bcif':
+            return CifWriter.createEncoder({
+            binary: true,
+            encoderName: `ModelServer ${Version}`,
+            binaryAutoClassifyEncoding: true
+        });
+        case 'sdf': 
+            return SdfWriter.createEncoder({ 
+                encoderName: `ModelServer ${Version}`,
+                hideMetaInformation: true
+            })
+        case 'mol2':
+            throw Error('impl me');
+        default:
+            return CifWriter.createEncoder({
+                binary: false,
+                encoderName: `ModelServer ${Version}`,
+                binaryAutoClassifyEncoding: true
+            });
+    }
+}
+
 async function resolveSingleFile(job: Job) {
-    ConsoleLogger.logId(job.id, 'Query', 'Starting.');
+    ConsoleLogger.logId(job.id, 'Query', `Starting (format: ${job.responseFormat.encoding}).`);
 
-    const encoder = CifWriter.createEncoder({
-        binary: job.responseFormat.isBinary,
-        encoderName: `ModelServer ${Version}`,
-        binaryAutoClassifyEncoding: true
-    });
+    const encoder = createEncoder(job.responseFormat);
 
     const headerMap = new Map<string, number>();
 
@@ -60,7 +83,6 @@ async function resolveSingleFile(job: Job) {
         let hasDataBlock = false;
         try {
             const structure = await createStructureWrapperFromJobEntry(entry, propertyProvider());
-
             let header = structure.cifFrame.header.toUpperCase();
             if (headerMap.has(header)) {
                 const i = headerMap.get(header)! + 1;
@@ -91,8 +113,8 @@ async function resolveSingleFile(job: Job) {
     encoder.writeTo(job.writer);
 }
 
-function getFilename(i: number, entry: JobEntry, header: string, isBinary: boolean) {
-    return `${i}_${header.toLowerCase()}_${splitCamelCase(entry.queryDefinition.name.replace(/\s/g, '_'), '-').toLowerCase()}.${isBinary ? 'bcif' : 'cif'}`;
+function getFilename(i: number, entry: JobEntry, header: string, encoding: Encoding) {
+    return `${i}_${header.toLowerCase()}_${splitCamelCase(entry.queryDefinition.name.replace(/\s/g, '_'), '-').toLowerCase()}.${encoding}`;
 }
 
 async function resolveMultiFile(job: Job) {
@@ -101,11 +123,7 @@ async function resolveMultiFile(job: Job) {
     let i = 0;
     for (const entry of job.entries) {
 
-        const encoder = CifWriter.createEncoder({
-            binary: job.responseFormat.isBinary,
-            encoderName: `ModelServer ${Version}`,
-            binaryAutoClassifyEncoding: true
-        });
+        const encoder = createEncoder(job.responseFormat);
 
         let hasDataBlock = false;
         let header = '';
@@ -126,7 +144,7 @@ async function resolveMultiFile(job: Job) {
         ConsoleLogger.logId(job.id, 'Query', `Encoding ${entry.key}/${entry.queryDefinition.name}`);
         encoder.encode();
 
-        job.writer.beginEntry(getFilename(++i, entry, header, job.responseFormat.isBinary), encoder.getSize());
+        job.writer.beginEntry(getFilename(++i, entry, header, job.responseFormat.encoding), encoder.getSize());
         encoder.writeTo(job.writer);
         job.writer.endEntry();
         ConsoleLogger.logId(job.id, 'Query', `Written ${entry.key}/${entry.queryDefinition.name}`);
@@ -177,6 +195,7 @@ async function resolveJobEntry(entry: JobEntry, structure: StructureWrapper, enc
         // TODO: this actually needs to "reversible" in case of error.
         encoder.writeCategory(_model_server_result, entry);
         encoder.writeCategory(_model_server_params, entry);
+        console.log(structure.models[0]._staticPropertyData['chem_comp_data'].entries['THA']);
 
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter(entry.queryDefinition.filter);
         if (result.length > 0) encode_mmCIF_categories(encoder, result, { copyAllCategories: entry.copyAllCategories });