Просмотр исходного кода

first attempt at cif dictionary parsing

Alexander Rose 7 лет назад
Родитель
Сommit
6d842b96a9
3 измененных файлов с 78 добавлено и 38 удалено
  1. 14 2
      src/reader/cif/data-model.ts
  2. 29 31
      src/reader/cif/text/parser.ts
  3. 35 5
      src/script.ts

+ 14 - 2
src/reader/cif/data-model.ts

@@ -17,16 +17,28 @@ export function File(blocks: ArrayLike<Block>, name?: string): File {
 
 export interface Block {
     readonly header: string,
-    readonly categories: { readonly [name: string]: Category }
+    readonly categories: Categories
+    readonly saveFrames: SafeFrame[]
 }
 
-export function Block(categories: { readonly [name: string]: Category }, header: string): Block {
+export function Block(categories: Categories, header: string, saveFrames: SafeFrame[] = []): Block {
     if (Object.keys(categories).some(k => k[0] !== '_')) {
         throw new Error(`Category names must start with '_'.`);
     }
+    return { header, categories, saveFrames };
+}
+
+export interface SafeFrame {
+    readonly header: string,
+    readonly categories: Categories
+}
+
+export function SafeFrame(categories: Categories, header: string): SafeFrame {
     return { header, categories };
 }
 
+export type Categories = { readonly [name: string]: Category }
+
 export interface Category {
     readonly rowCount: number,
     getField(name: string): Field | undefined

+ 29 - 31
src/reader/cif/text/parser.ts

@@ -551,9 +551,12 @@ async function parseInternal(data: string, ctx: Computation.Context) {
     let blockHeader: string = '';
     let blockCategories = Object.create(null);
 
-    //saveFrame = new DataBlock(data, "empty"),
-    //inSaveFrame = false,
-    //blockSaveFrames: any;
+    let inSaveFrame = false
+
+    // the next three initial values are never used in valid files
+    let saveFrames: Data.SafeFrame[] = [];
+    let saveCategories = Object.create(null);
+    let saveFrame: Data.SafeFrame = Data.SafeFrame(saveCategories, '');
 
     ctx.update({ message: 'Parsing...', current: 0, max: data.length });
 
@@ -563,63 +566,58 @@ async function parseInternal(data: string, ctx: Computation.Context) {
 
         // Data block
         if (token === CifTokenType.Data) {
-            // if (inSaveFrame) {
-            //     return error(tokenizer.currentLineNumber, "Unexpected data block inside a save frame.");
-            // }
+            if (inSaveFrame) {
+                return error(tokenizer.lineNumber, "Unexpected data block inside a save frame.");
+            }
             if (Object.keys(blockCategories).length > 0) {
-                dataBlocks.push(Data.Block(blockCategories, blockHeader));
+                dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames));
             }
             blockHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd);
             blockCategories = Object.create(null);
+            saveFrames = []
             moveNext(tokenizer);
-        }
-         /*   // Save frame
+        // Save frame
         } else if (token === CifTokenType.Save) {
-            id = data.substring(tokenizer.currentTokenStart + 5, tokenizer.currentTokenEnd);
-
-            if (id.length === 0) {
-                if (saveFrame.categories.length > 0) {
-                    blockSaveFrames = blockCategories.additionalData["saveFrames"];
-                    if (!blockSaveFrames) {
-                        blockSaveFrames = [];
-                        blockCategories.additionalData["saveFrames"] = blockSaveFrames;
-                    }
-                    blockSaveFrames[blockSaveFrames.length] = saveFrame;
+            const saveHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd);
+            if (saveHeader.length === 0) {
+                if (Object.keys(saveCategories).length > 0) {
+                    saveFrames[saveFrames.length] = saveFrame
                 }
                 inSaveFrame = false;
             } else {
                 if (inSaveFrame) {
-                    return error(tokenizer.currentLineNumber, "Save frames cannot be nested.");
+                    return error(tokenizer.lineNumber, "Save frames cannot be nested.");
                 }
                 inSaveFrame = true;
-                saveFrame = new DataBlock(data, id);
+                saveCategories = Object.create(null);
+                saveFrame = Data.SafeFrame(saveCategories, saveHeader);
             }
             moveNext(tokenizer);
-            // Loop
-        } */ else if (token === CifTokenType.Loop) {
-            const cat = await handleLoop(tokenizer, /*inSaveFrame ? saveFrame : */ blockCategories);
+        // Loop
+        } else if (token === CifTokenType.Loop) {
+            const cat = await handleLoop(tokenizer, inSaveFrame ? saveCategories : blockCategories);
             if (cat.hasError) {
                 return error(cat.errorLine, cat.errorMessage);
             }
-            // Single row
+        // Single row
         } else if (token === CifTokenType.ColumnName) {
-            const cat = handleSingle(tokenizer, /*inSaveFrame ? saveFrame :*/ blockCategories);
+            const cat = handleSingle(tokenizer, inSaveFrame ? saveCategories : blockCategories);
             if (cat.hasError) {
                 return error(cat.errorLine, cat.errorMessage);
             }
-            // Out of options
+        // Out of options
         } else {
             return error(tokenizer.lineNumber, 'Unexpected token. Expected data_, loop_, or data name.');
         }
     }
 
     // Check if the latest save frame was closed.
-    // if (inSaveFrame) {
-    //     return error(tokenizer.currentLineNumber, "Unfinished save frame (`" + saveFrame.header + "`).");
-    // }
+    if (inSaveFrame) {
+        return error(tokenizer.lineNumber, "Unfinished save frame (`" + saveFrame.header + "`).");
+    }
 
     if (Object.keys(blockCategories).length > 0) {
-        dataBlocks.push(Data.Block(blockCategories, blockHeader));
+        dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames));
     }
 
     return result(Data.File(dataBlocks));

+ 35 - 5
src/script.ts

@@ -5,7 +5,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-// import * as util from 'util'
+import * as util from 'util'
 import * as fs from 'fs'
 
 import Gro from './reader/gro/parser'
@@ -74,7 +74,7 @@ async function runGro(input: string) {
     console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1])
 }
 
-function _gro() {
+export function _gro() {
     fs.readFile(`./examples/${file}`, 'utf8', function (err, input) {
         if (err) {
             return console.log(err);
@@ -83,7 +83,7 @@ function _gro() {
     });
 }
 
-_gro()
+// _gro()
 
 async function runCIF(input: string | Uint8Array) {
     console.time('parseCIF');
@@ -110,7 +110,7 @@ async function runCIF(input: string | Uint8Array) {
 
 export function _cif() {
     let path = `./examples/1cbs_updated.cif`;
-    path = 'c:/test/quick/3j3q.cif';
+    path = '../test/3j3q.cif'  // lets have a relative path for big test files
     fs.readFile(path, 'utf8', function (err, input) {
         if (err) {
             return console.log(err);
@@ -134,7 +134,37 @@ export function _cif() {
     });
 }
 
-_cif();
+// _cif();
+
+async function runDic(input: string | Uint8Array) {
+    console.time('parseDic');
+    const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
+
+    const ctx = Computation.observable({ updateRateMs: 250, observer: p => showProgress('DIC', p) });
+    const parsed = await comp(ctx);
+    console.timeEnd('parseDic');
+    if (parsed.isError) {
+        console.log(parsed);
+        return;
+    }
+
+    const data = parsed.result.blocks[0];
+    console.log(util.inspect(data.saveFrames, {showHidden: false, depth: 3}))
+}
+
+export function _dic() {
+    let path = '../test/mmcif_pdbx_v50.dic'
+    fs.readFile(path, 'utf8', function (err, input) {
+        if (err) {
+            return console.log(err);
+        }
+        console.log('------------------');
+        console.log('Text DIC:');
+        runDic(input);
+    });
+}
+
+_dic();
 
 import Computation from './utils/computation'
 const comp = Computation.create(async ctx => {