Browse Source

Basic model-server preprocessor to create "updated" (Binary)CIF

David Sehnal 6 years ago
parent
commit
20ee414967

+ 1 - 1
src/mol-io/reader/cif/data-model.ts

@@ -117,7 +117,7 @@ export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Sch
     let floatCount = 0, hasString = false;
     for (let i = 0, _i = field.rowCount; i < _i; i++) {
         const k = field.valueKind(i);
-        if (k !== Column.ValueKind.Present) continue
+        if (k !== Column.ValueKind.Present) continue;
         const type = getNumberType(field.str(i));
         if (type === NumberType.Int) continue;
         else if (type === NumberType.Float) floatCount++;

+ 1 - 1
src/mol-io/reader/common/text/number-parser.ts

@@ -128,5 +128,5 @@ export function getNumberType(str: string): NumberType {
         }
         else break;
     }
-    return NumberType.Int;
+    return start === end ? NumberType.Int : NumberType.NaN;
 }

+ 12 - 2
src/mol-model/structure/export/mmcif.ts

@@ -20,6 +20,12 @@ export interface CifExportContext {
     cache: any
 }
 
+export namespace CifExportContext {
+    export function create(structure: Structure, model: Model): CifExportContext {
+        return { structure, model, cache: Object.create(null) };
+    }
+}
+
 function copy_mmCif_category(name: keyof mmCIF_Schema): CifCategory<CifExportContext> {
     return {
         name,
@@ -87,14 +93,17 @@ export const mmCIF_Export_Filters = {
 }
 
 /** Doesn't start a data block */
-export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure) {
+export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure, params?: { skipCategoryNames?: Set<string>, exportCtx?: CifExportContext }) {
     const models = structure.models;
     if (models.length !== 1) throw 'Can\'t export stucture composed from multiple models.';
     const model = models[0];
 
-    const ctx: CifExportContext[] = [{ structure, model, cache: Object.create(null) }];
+    const _params = params || { };
+
+    const ctx: CifExportContext[] = [_params.exportCtx ? _params.exportCtx : CifExportContext.create(structure, model)];
 
     for (const cat of Categories) {
+        if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue;
         encoder.writeCategory(cat, ctx);
     }
     for (const customProp of model.customProperties.all) {
@@ -103,6 +112,7 @@ export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: S
         const prefix = customProp.cifExport.prefix;
         const cats = customProp.cifExport.categories;
         for (const cat of cats) {
+            if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue;
             if (cat.name.indexOf(prefix) !== 0) throw new Error(`Custom category '${cat.name}' name must start with prefix '${prefix}.'`);
             encoder.writeCategory(cat, ctx);
         }

+ 31 - 0
src/servers/model/preprocess.ts

@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import * as argparse from 'argparse'
+import { preprocessFile } from './preprocess/preprocess';
+
+const cmdParser = new argparse.ArgumentParser({
+    addHelp: true,
+    description: 'Preprocess CIF files to include custom properties and convert them to BinaryCIF format.'
+});
+cmdParser.addArgument(['--input', '-i'], { help: 'Input filename', required: true });
+cmdParser.addArgument(['--outCIF', '-oc'], { help: 'Output CIF filename', required: false });
+cmdParser.addArgument(['--outBCIF', '-ob'], { help: 'Output BinaryCIF filename', required: false });
+
+// TODO: "bulk" mode
+
+interface CmdArgs {
+    input: string,
+    outCIF?: string,
+    outBCIF?: string
+}
+
+const cmdArgs = cmdParser.parseArgs() as CmdArgs;
+
+if (cmdArgs.input) preprocessFile(cmdArgs.input, cmdArgs.outCIF, cmdArgs.outBCIF);
+
+// example:
+// node build\node_modules\servers\model\preprocess -i e:\test\Quick\1cbs_updated.cif -oc e:\test\mol-star\model\1cbs.cif -ob e:\test\mol-star\model\1cbs.bcif

+ 51 - 0
src/servers/model/preprocess/converter.ts

@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { CifCategory, CifField, CifFrame, getCifFieldType } from 'mol-io/reader/cif';
+import { CifWriter } from 'mol-io/writer/cif';
+import { Task } from 'mol-task';
+import { showProgress } from './util';
+
+function getCategoryInstanceProvider(cat: CifCategory, fields: CifWriter.Field[]): CifWriter.Category {
+    return {
+        name: cat.name,
+        instance: () => ({ data: cat, fields, rowCount: cat.rowCount })
+    };
+}
+
+function classify(name: string, field: CifField): CifWriter.Field {
+    const type = getCifFieldType(field);
+    if (type['@type'] === 'str') {
+        return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind };
+    } else if (type['@type'] === 'float') {
+        return CifWriter.Field.float(name, field.float, { valueKind: field.valueKind, typedArray: Float64Array });
+    } else {
+        return CifWriter.Field.int(name, field.int, { valueKind: field.valueKind, typedArray: Int32Array });
+    }
+}
+
+export function classifyCif(frame: CifFrame) {
+    return Task.create('Classify CIF Data', async ctx => {
+        let maxProgress = 0;
+        for (const c of frame.categoryNames) maxProgress += frame.categories[c].fieldNames.length;
+
+        const ret: CifWriter.Category[] = [];
+
+        let current = 0;
+        for (const c of frame.categoryNames) {
+            const cat = frame.categories[c];
+            const fields: CifWriter.Field[] = [];
+            for (const f of cat.fieldNames) {
+                const cifField = classify(f, cat.getField(f)!);
+                fields.push(cifField);
+                current++;
+                if (ctx.shouldUpdate) await ctx.update({ message: 'Classifying...', current, max: maxProgress });
+            }
+            ret.push(getCategoryInstanceProvider(cat, fields));
+        }
+        return ret;
+    }).run(showProgress, 250);
+}

+ 63 - 0
src/servers/model/preprocess/preprocess.ts

@@ -0,0 +1,63 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { readStructure } from '../server/structure-wrapper';
+import { classifyCif } from './converter';
+import { ConsoleLogger } from 'mol-util/console-logger';
+import { Structure } from 'mol-model/structure';
+import { CifWriter } from 'mol-io/writer/cif';
+import Writer from 'mol-io/writer/writer';
+import { wrapFileToWriter } from '../server/api-local';
+import { Task } from 'mol-task';
+import { showProgress, clearLine } from './util';
+import { encode_mmCIF_categories, CifExportContext } from 'mol-model/structure/export/mmcif';
+
+// TODO: error handling, bulk mode
+
+export async function preprocessFile(filename: string, outputCif?: string, outputBcif?: string) {
+    ConsoleLogger.log('ModelServer', `Reading ${filename}...`);
+    const input = await readStructure('entry', '_local_', filename);
+    ConsoleLogger.log('ModelServer', `Classifying CIF categories...`);
+    const categories = await classifyCif(input.cifFrame);
+    clearLine();
+
+    const exportCtx = CifExportContext.create(input.structure, input.structure.models[0]);
+
+    if (outputCif) {
+        ConsoleLogger.log('ModelServer', `Encoding CIF...`);
+        const writer = wrapFileToWriter(outputCif);
+        const encoder = CifWriter.createEncoder({ binary: false });
+        await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer);
+        clearLine();
+        writer.end();
+    }
+
+    if (outputBcif) {
+        ConsoleLogger.log('ModelServer', `Encoding BinaryCIF...`);
+        const writer = wrapFileToWriter(outputBcif);
+        const encoder = CifWriter.createEncoder({ binary: true, binaryAutoClassifyEncoding: true });
+        await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer);
+        clearLine();
+        writer.end();
+    }
+    ConsoleLogger.log('ModelServer', `Done.`);
+}
+
+function encode(structure: Structure, header: string, categories: CifWriter.Category[], encoder: CifWriter.Encoder, exportCtx: CifExportContext, writer: Writer) {
+    return Task.create('Encode', async ctx => {
+        const skipCategoryNames = new Set<string>(categories.map(c => c.name));
+        encoder.startDataBlock(header);
+        let current = 0;
+        for (const cat of categories){
+            encoder.writeCategory(cat);
+            current++;
+            if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: categories.length });
+        }
+        encode_mmCIF_categories(encoder, structure, { skipCategoryNames, exportCtx });
+        encoder.encode();
+        encoder.writeTo(writer);
+    }).run(showProgress, 250);
+}

+ 17 - 0
src/servers/model/preprocess/util.ts

@@ -0,0 +1,17 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Progress } from 'mol-task';
+
+export function showProgress(p: Progress) {
+    process.stdout.write(`\r${new Array(80).join(' ')}`);
+    process.stdout.write(`\r${Progress.format(p)}`);
+}
+
+export function clearLine() {
+    process.stdout.write(`\r${new Array(80).join(' ')}`);
+    process.stdout.write(`\r`);
+}

+ 3 - 3
src/servers/model/server/api-local.ts

@@ -39,7 +39,7 @@ export async function runLocal(input: LocalInput) {
     while (job) {
         try {
             const encoder = await resolveJob(job);
-            const writer = wrapFile(job.outputFilename!);
+            const writer = wrapFileToWriter(job.outputFilename!);
             encoder.writeTo(writer);
             writer.end();
             ConsoleLogger.logId(job.id, 'Query', 'Written.');
@@ -61,7 +61,7 @@ export async function runLocal(input: LocalInput) {
     StructureCache.expireAll();
 }
 
-function wrapFile(fn: string) {
+export function wrapFileToWriter(fn: string) {
     const w = {
         open(this: any) {
             if (this.opened) return;
@@ -71,7 +71,7 @@ function wrapFile(fn: string) {
         },
         writeBinary(this: any, data: Uint8Array) {
             this.open();
-            fs.writeSync(this.file, new Buffer(data));
+            fs.writeSync(this.file, new Buffer(data.buffer));
             return true;
         },
         writeString(this: any, data: string) {

+ 13 - 11
src/servers/model/server/structure-wrapper.ts

@@ -8,7 +8,7 @@ import { Structure, Model, Format } from 'mol-model/structure';
 import { PerformanceMonitor } from 'mol-util/performance-monitor';
 import { Cache } from './cache';
 import Config from '../config';
-import CIF from 'mol-io/reader/cif'
+import CIF, { CifFrame } from 'mol-io/reader/cif'
 import * as util from 'util'
 import * as fs from 'fs'
 import * as zlib from 'zlib'
@@ -34,21 +34,22 @@ export interface StructureInfo {
     entryId: string
 }
 
-export class StructureWrapper {
-    info: StructureInfo;
+export interface StructureWrapper {
+    info: StructureInfo,
 
-    key: string;
-    approximateSize: number;
-    structure: Structure;
+    key: string,
+    approximateSize: number,
+    structure: Structure,
+    cifFrame: CifFrame
 }
 
-export async function getStructure(job: Job): Promise<StructureWrapper> {
-    if (Config.cacheParams.useCache) {
+export async function getStructure(job: Job, allowCache = true): Promise<StructureWrapper> {
+    if (allowCache && Config.cacheParams.useCache) {
         const ret = StructureCache.get(job.key);
         if (ret) return ret;
     }
     const ret = await readStructure(job.key, job.sourceId, job.entryId);
-    if (Config.cacheParams.useCache) {
+    if (allowCache && Config.cacheParams.useCache) {
         StructureCache.add(ret);
     }
     return ret;
@@ -84,7 +85,7 @@ async function parseCif(data: string|Uint8Array) {
     return parsed.result;
 }
 
-async function readStructure(key: string, sourceId: string, entryId: string) {
+export async function readStructure(key: string, sourceId: string | '_local_', entryId: string) {
     const filename = sourceId === '_local_' ? entryId : Config.mapFile(sourceId, entryId);
     if (!filename) throw new Error(`Cound not map '${key}' to a valid filename.`);
     if (!fs.existsSync(filename)) throw new Error(`Could not find source file for '${key}'.`);
@@ -127,7 +128,8 @@ async function readStructure(key: string, sourceId: string, entryId: string) {
         },
         key,
         approximateSize: typeof data === 'string' ? 2 * data.length : data.length,
-        structure
+        structure,
+        cifFrame: frame
     };
 
     return ret;