Browse Source

CIF parser now uses mol-task

David Sehnal 7 years ago
parent
commit
a8070e4136

+ 2 - 1
src/apps/cif2bcif/converter.ts

@@ -9,10 +9,11 @@ import CIF, { Category } from 'mol-io/reader/cif'
 import * as Encoder from 'mol-io/writer/cif'
 import * as fs from 'fs'
 import classify from './field-classifier'
+import { Run } from 'mol-task'
 
 async function getCIF(path: string) {
     const str = fs.readFileSync(path, 'utf8');
-    const parsed = await CIF.parseText(str)();
+    const parsed = await Run(CIF.parseText(str));
     if (parsed.isError) {
         throw new Error(parsed.toString());
     }

+ 5 - 13
src/apps/combine-mmcif/index.ts

@@ -13,11 +13,11 @@ require('util.promisify').shim();
 const readFile = util.promisify(fs.readFile);
 const writeFile = util.promisify(fs.writeFile);
 
+import { Run, Progress } from 'mol-task'
 import { Database, Table, DatabaseCollection } from 'mol-data/db'
 import CIF from 'mol-io/reader/cif'
 // import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd'
 import * as Encoder from 'mol-io/writer/cif'
-import Computation from 'mol-util/computation'
 import { mmCIF_Schema, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
 import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd';
 import { BIRD_Schema } from 'mol-io/reader/cif/schema/bird';
@@ -48,10 +48,6 @@ export async function ensureDataAvailable() {
     await ensureAvailable(BIRD_PATH, BIRD_URL)
 }
 
-function showProgress(tag: string, p: Computation.Progress) {
-    console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`)
-}
-
 export async function readFileAsCollection<S extends Database.Schema>(path: string, schema: S) {
     const parsed = await parseCif(await readFile(path, 'utf8'))
     return CIF.toDatabaseCollection(schema, parsed.result)
@@ -80,14 +76,10 @@ export async function getBIRD() {
 }
 
 async function parseCif(data: string|Uint8Array) {
-    const comp = CIF.parse(data)
-    const ctx = Computation.observable({
-        updateRateMs: 250,
-        observer: p => showProgress(`cif parser ${typeof data === 'string' ? 'string' : 'binary'}`, p)
-    });
-    console.time('parse cif')
-    const parsed = await comp(ctx);
-    console.timeEnd('parse cif')
+    const comp = CIF.parse(data);
+    console.time('parse cif');
+    const parsed = await Run(comp, p => console.log(Progress.format(p)), 250);
+    console.timeEnd('parse cif');
     if (parsed.isError) throw parsed;
     return parsed
 }

+ 2 - 1
src/apps/schema-generator/schema-from-mmcif-dic.ts

@@ -14,11 +14,12 @@ import CIF from 'mol-io/reader/cif'
 import { generateSchema } from './util/cif-dic'
 import { generate } from './util/generate'
 import { Filter, mergeFilters } from './util/json-schema'
+import { Run } from 'mol-task';
 
 async function runGenerateSchema(name: string, fieldNamesPath?: string, minCount = 0, typescript = false, out?: string) {
     await ensureMmcifDicAvailable()
     const comp = CIF.parseText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8'))
-    const parsed = await comp();
+    const parsed = await Run(comp);
     if (parsed.isError) throw parsed
 
     // console.log(fieldNamesPath, minCount)

+ 3 - 11
src/apps/structure-info/index.ts

@@ -10,20 +10,12 @@ require('util.promisify').shim();
 
 // import { Table } from 'mol-data/db'
 import CIF from 'mol-io/reader/cif'
-import Computation from 'mol-util/computation'
 import { Model } from 'mol-model/structure'
-
-function showProgress(tag: string, p: Computation.Progress) {
-    console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`)
-}
+import { Run, Progress } from 'mol-task'
 
 async function parseCif(data: string|Uint8Array) {
-    const comp = CIF.parse(data)
-    const ctx = Computation.observable({
-        updateRateMs: 250,
-        observer: p => showProgress(`cif parser ${typeof data === 'string' ? 'string' : 'binary'}`, p)
-    });
-    const parsed = await comp(ctx);
+    const comp = CIF.parse(data);
+    const parsed = await Run(comp, p => console.log(Progress.format(p)), 250);
     if (parsed.isError) throw parsed;
     return parsed
 }

+ 0 - 1
src/examples/computation.ts → src/examples/task.ts

@@ -23,7 +23,6 @@ function messageTree(root: Progress.Node, prefix = ''): string {
     const subTree = root.children.map(c => messageTree(c, newPrefix));
     if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}\n${subTree.join('\n')}`;
     return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}\n${subTree.join('\n')}`;
-
 }
 
 function createTask<T>(delayMs: number, r: T): Task<T> {

+ 2 - 2
src/mol-io/reader/cif/binary/parser.ts

@@ -9,7 +9,7 @@ import { EncodedCategory, EncodedFile } from '../../../common/binary-cif'
 import Field from './field'
 import Result from '../../result'
 import decodeMsgPack from '../../../common/msgpack/decode'
-import Computation from 'mol-util/computation'
+import { Task } from 'mol-task'
 
 function checkVersions(min: number[], current: number[]) {
     for (let i = 0; i < 2; i++) {
@@ -37,7 +37,7 @@ function Category(data: EncodedCategory): Data.Category {
 }
 
 export default function parse(data: Uint8Array) {
-    return Computation.create<Result<Data.File>>(async ctx => {
+    return Task.create<Result<Data.File>>('Parse BinaryCIF', async ctx => {
         const minVersion = [0, 3];
 
         try {

+ 20 - 22
src/mol-io/reader/cif/text/parser.ts

@@ -26,7 +26,7 @@ import * as Data from '../data-model'
 import Field from './field'
 import { Tokens, TokenBuilder } from '../../common/text/tokenizer'
 import Result from '../../result'
-import Computation from 'mol-util/computation'
+import { Task, RuntimeContext, ChunkedSubtask } from 'mol-task'
 
 /**
  * Types of supported mmCIF tokens.
@@ -42,18 +42,18 @@ const enum CifTokenType {
 }
 
 interface TokenizerState {
-    data: string;
+    data: string,
 
-    position: number;
-    length: number;
-    isEscaped: boolean;
+    position: number,
+    length: number,
+    isEscaped: boolean,
 
-    lineNumber: number;
-    tokenType: CifTokenType;
-    tokenStart: number;
-    tokenEnd: number;
+    lineNumber: number,
+    tokenType: CifTokenType,
+    tokenStart: number,
+    tokenEnd: number,
 
-    chunker: Computation.Chunker
+    runtimeCtx: RuntimeContext
 }
 
 /**
@@ -387,7 +387,7 @@ function moveNext(state: TokenizerState) {
     while (state.tokenType === CifTokenType.Comment) moveNextInternal(state);
 }
 
-function createTokenizer(data: string, ctx: Computation.Context): TokenizerState {
+function createTokenizer(data: string, runtimeCtx: RuntimeContext): TokenizerState {
     return {
         data,
         length: data.length,
@@ -398,7 +398,7 @@ function createTokenizer(data: string, ctx: Computation.Context): TokenizerState
         lineNumber: 1,
         isEscaped: false,
 
-        chunker: Computation.chunker(ctx, 1000000)
+        runtimeCtx
     };
 }
 
@@ -481,11 +481,9 @@ function readLoopChunk(state: LoopReadState, chunkSize: number) {
     return counter;
 }
 
-function readLoopChunks(state: LoopReadState) {
-    return state.tokenizer.chunker.process(
-        chunkSize => readLoopChunk(state, chunkSize),
-        update => update({ message: 'Parsing...', current: state.tokenizer.position, max: state.tokenizer.data.length }));
-}
+const readLoopChunks = ChunkedSubtask(1000000,
+    (size, state: LoopReadState) => readLoopChunk(state, size),
+    (ctx, state) => ctx.update({ message: 'Parsing...', current: state.tokenizer.position, max: state.tokenizer.data.length }));
 
 /**
  * Reads a loop.
@@ -514,7 +512,7 @@ async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise
         tokens
     };
 
-    await readLoopChunks(state);
+    await readLoopChunks(tokenizer.runtimeCtx, state);
 
     if (state.tokenCount % fieldCount !== 0) {
         return {
@@ -560,9 +558,9 @@ function result(data: Data.File) {
  *
  * @returns CifParserResult wrapper of the result.
  */
-async function parseInternal(data: string, ctx: Computation.Context) {
+async function parseInternal(data: string, runtimeCtx: RuntimeContext) {
     const dataBlocks: Data.Block[] = [];
-    const tokenizer = createTokenizer(data, ctx);
+    const tokenizer = createTokenizer(data, runtimeCtx);
     let blockHeader = '';
 
     let blockCtx = FrameContext();
@@ -574,7 +572,7 @@ async function parseInternal(data: string, ctx: Computation.Context) {
     let saveCtx = FrameContext();
     let saveFrame: Data.Frame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, '');
 
-    ctx.update({ message: 'Parsing...', current: 0, max: data.length });
+    runtimeCtx.update({ message: 'Parsing...', current: 0, max: data.length });
 
     moveNext(tokenizer);
     while (tokenizer.tokenType !== CifTokenType.End) {
@@ -641,7 +639,7 @@ async function parseInternal(data: string, ctx: Computation.Context) {
 }
 
 export default function parse(data: string) {
-    return Computation.create<Result<Data.File>>(async ctx => {
+    return Task.create<Result<Data.File>>('Parse CIF', async ctx => {
         return await parseInternal(data, ctx);
     });
 }

+ 15 - 0
src/mol-task/execution/progress.ts

@@ -19,6 +19,21 @@ namespace Progress {
     }
 
     export interface Observer { (progress: Progress): void }
+
+    function _format(root: Progress.Node, prefix = ''): string {
+        const p = root.progress;
+        if (!root.children.length) {
+            if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}`;
+            return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}`;
+        }
+
+        const newPrefix = prefix + '  |_ ';
+        const subTree = root.children.map(c => _format(c, newPrefix));
+        if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}\n${subTree.join('\n')}`;
+        return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}\n${subTree.join('\n')}`;
+    }
+
+    export function format(p: Progress) { return _format(p.root); }
 }
 
 export { Progress }

+ 2 - 1
src/perf-tests/structure.ts

@@ -15,6 +15,7 @@ import { Structure, Model, Queries as Q, Atom, AtomGroup, AtomSet, Selection, Sy
 import { Segmentation } from 'mol-data/int'
 
 import to_mmCIF from 'mol-model/structure/export/mmcif'
+import { Run } from 'mol-task';
 
 require('util.promisify').shim();
 const readFileAsync = util.promisify(fs.readFile);
@@ -59,7 +60,7 @@ export async function readCIF(path: string) {
 
     console.time('parse');
     const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
-    const parsed = await comp();
+    const parsed = await Run(comp);
     console.timeEnd('parse');
     if (parsed.isError) {
         throw parsed;