Browse Source

CIF data model no longer includes '_' at start of category names

David Sehnal 7 years ago
parent
commit
4d2c7996da

+ 2 - 2
src/apps/cif2bcif/converter.ts

@@ -45,8 +45,8 @@ export default async function convert(path: string, asText = false) {
     const encoder = asText ? new TextCIFEncoder() : new BinaryCIFEncoder('mol* cif2bcif');
     for (const b of cif.blocks) {
         encoder.startDataBlock(b.header);
-        for (const _c of Object.keys(b.categories)) {
-            encoder.writeCategory(getCategoryInstanceProvider(b.categories[_c]));
+        for (const c of b.categoryNames) {
+            encoder.writeCategory(getCategoryInstanceProvider(b.categories[c]));
         }
     }
     return encoder.getData();

+ 2 - 2
src/mol-io/reader/_spec/cif.spec.ts

@@ -14,8 +14,8 @@ const columnData = `123abc`;
 const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3);
 const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3);
 
-const testBlock = Data.Block({
-    _atoms: Data.Category(3, {
+const testBlock = Data.Block(['atoms'], {
+    atoms: Data.Category('atoms', 3, ['x', 'name'], {
         x: intField,
         name: strField
     })

+ 3 - 3
src/mol-io/reader/cif/binary/parser.ts

@@ -24,7 +24,7 @@ function Category(data: EncodedCategory): Data.Category {
     for (const col of data.columns) map[col.name] = col;
     return {
         rowCount: data.rowCount,
-        name: data.name,
+        name: data.name.substr(1),
         fieldNames: data.columns.map(c => c.name),
         getField(name) {
             const col = map[name];
@@ -47,8 +47,8 @@ export default function parse(data: Uint8Array) {
             }
             const file = Data.File(unpacked.dataBlocks.map(block => {
                 const cats = Object.create(null);
-                for (const cat of block.categories) cats[cat.name] = Category(cat);
-                return Data.Block(cats, block.header);
+                for (const cat of block.categories) cats[cat.name.substr(1)] = Category(cat);
+                return Data.Block(block.categories.map(c => c.name.substr(1)), cats, block.header);
             }));
             return Result.success(file);
         } catch (e) {

+ 5 - 7
src/mol-io/reader/cif/data-model.ts

@@ -17,6 +17,7 @@ export function File(blocks: ArrayLike<Block>, name?: string): File {
 
 export interface Frame {
     readonly header: string,
+    readonly categoryNames: ReadonlyArray<string>,
     readonly categories: Categories
 }
 
@@ -24,15 +25,12 @@ export interface Block extends Frame {
     readonly saveFrames: Frame[]
 }
 
-export function Block(categories: Categories, header: string, saveFrames: Frame[] = []): Block {
-    if (Object.keys(categories).some(k => k[0] !== '_')) {
-        throw new Error(`Category names must start with '_'.`);
-    }
-    return { header, categories, saveFrames };
+export function Block(categoryNames: string[], categories: Categories, header: string, saveFrames: Frame[] = []): Block {
+    return { categoryNames, header, categories, saveFrames };
 }
 
-export function SafeFrame(categories: Categories, header: string): Frame {
-    return { header, categories };
+export function SafeFrame(categoryNames: string[], categories: Categories, header: string): Frame {
+    return { categoryNames, header, categories };
 }
 
 export type Categories = { readonly [name: string]: Category }

+ 1 - 1
src/mol-io/reader/cif/schema.ts

@@ -87,6 +87,6 @@ function createDatabase(schema: Database.Schema, frame: Data.Frame): Database<an
 }
 
 function createTable(key: string, schema: Table.Schema, frame: Data.Frame) {
-    const cat = frame.categories[key[0] === '_' ? key : '_' + key];
+    const cat = frame.categories[key];
     return new CategoryTable(cat || Data.Category.empty(key), schema, !!cat);
 }

+ 3 - 3
src/mol-io/reader/cif/schema/utils.ts

@@ -87,7 +87,7 @@ function getField ( category: string, field: string, d: Data.Frame, ctx: FrameDa
 }
 
 function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined {
-    const value = getField('_item_enumeration', 'value', d, ctx)
+    const value = getField('item_enumeration', 'value', d, ctx)
     if (value) {
         const enums: string[] = []
         for (let i = 0; i < value.rowCount; ++i) {
@@ -101,7 +101,7 @@ function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined {
 }
 
 function getCode (d: Data.Frame, ctx: FrameData): [string, string[]]|undefined {
-    const code = getField('_item_type', 'code', d, ctx)
+    const code = getField('item_type', 'code', d, ctx)
     if (code) {
         let c = code.str(0)
         let e = []
@@ -147,7 +147,7 @@ export function generateSchema (dic: Data.Block) {  // todo Block needs to be sp
     dic.saveFrames.forEach(d => {
         if (d.header[0] !== '_') return
         categories[d.header] = d
-        const item_linked = d.categories['_item_linked']
+        const item_linked = d.categories['item_linked']
         if (item_linked) {
             const child_name = item_linked.getField('child_name')
             const parent_name = item_linked.getField('parent_name')

+ 34 - 20
src/mol-io/reader/cif/text/parser.ts

@@ -411,10 +411,19 @@ interface CifCategoryResult {
     errorMessage: string;
 }
 
+type FrameContext = {
+    categoryNames: string[],
+    categories: { [name: string]: Data.Category }
+}
+
+function FrameContext(): FrameContext {
+    return { categoryNames: [], categories: Object.create(null) };
+}
+
 /**
  * Reads a category containing a single row.
  */
-function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): CifCategoryResult {
+function handleSingle(tokenizer: TokenizerState, ctx: FrameContext): CifCategoryResult {
     const nsStart = tokenizer.tokenStart, nsEnd = getNamespaceEnd(tokenizer);
     const name = getNamespace(tokenizer, nsEnd);
     const fields = Object.create(null);
@@ -441,7 +450,9 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
         moveNext(tokenizer);
     }
 
-    categories[name] = Data.Category(name.substr(1), 1, fieldNames, fields);
+    const catName = name.substr(1);
+    ctx.categories[catName] = Data.Category(catName, 1, fieldNames, fields);
+    ctx.categoryNames.push(catName);
 
     return {
         hasError: false,
@@ -479,7 +490,7 @@ function readLoopChunks(state: LoopReadState) {
 /**
  * Reads a loop.
  */
-async function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): Promise<CifCategoryResult> {
+async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise<CifCategoryResult> {
     const loopLine = tokenizer.lineNumber;
 
     moveNext(tokenizer);
@@ -491,7 +502,7 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string
         moveNext(tokenizer);
     }
 
-    const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
+    const rowCountEstimate = name === 'atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
     const tokens: Tokens[] = [];
     const fieldCount = fieldNames.length;
     for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate);
@@ -519,7 +530,9 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string
         fields[fieldNames[i]] = Field(tokens[i], rowCount);
     }
 
-    categories[name] = Data.Category(name.substr(1), rowCount, fieldNames, fields);
+    const catName = name.substr(1);
+    ctx.categories[catName] = Data.Category(catName, rowCount, fieldNames, fields);
+    ctx.categoryNames.push(catName);
 
     return {
         hasError: false,
@@ -551,14 +564,15 @@ async function parseInternal(data: string, ctx: Computation.Context) {
     const dataBlocks: Data.Block[] = [];
     const tokenizer = createTokenizer(data, ctx);
     let blockHeader: string = '';
-    let blockCategories = Object.create(null);
 
-    let inSaveFrame = false
+    let blockCtx = FrameContext();
+
+    let inSaveFrame = false;
 
     // the next three initial values are never used in valid files
     let saveFrames: Data.Frame[] = [];
-    let saveCategories = Object.create(null);
-    let saveFrame: Data.Frame = Data.SafeFrame(saveCategories, '');
+    let saveCtx = FrameContext();
+    let saveFrame: Data.Frame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, '');
 
     ctx.update({ message: 'Parsing...', current: 0, max: data.length });
 
@@ -571,19 +585,19 @@ async function parseInternal(data: string, ctx: Computation.Context) {
             if (inSaveFrame) {
                 return error(tokenizer.lineNumber, 'Unexpected data block inside a save frame.');
             }
-            if (Object.keys(blockCategories).length > 0) {
-                dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames));
+            if (blockCtx.categoryNames.length > 0) {
+                dataBlocks.push(Data.Block(blockCtx.categoryNames, blockCtx.categories, blockHeader, saveFrames));
             }
             blockHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd);
-            blockCategories = Object.create(null);
+            blockCtx = FrameContext();
             saveFrames = []
             moveNext(tokenizer);
         // Save frame
         } else if (token === CifTokenType.Save) {
             const saveHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd);
             if (saveHeader.length === 0) {
-                if (Object.keys(saveCategories).length > 0) {
-                    saveFrames[saveFrames.length] = saveFrame
+                if (saveCtx.categoryNames.length > 0) {
+                    saveFrames[saveFrames.length] = saveFrame;
                 }
                 inSaveFrame = false;
             } else {
@@ -591,19 +605,19 @@ async function parseInternal(data: string, ctx: Computation.Context) {
                     return error(tokenizer.lineNumber, 'Save frames cannot be nested.');
                 }
                 inSaveFrame = true;
-                saveCategories = Object.create(null);
-                saveFrame = Data.SafeFrame(saveCategories, saveHeader);
+                saveCtx = FrameContext();
+                saveFrame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, '');
             }
             moveNext(tokenizer);
         // Loop
         } else if (token === CifTokenType.Loop) {
-            const cat = await handleLoop(tokenizer, inSaveFrame ? saveCategories : blockCategories);
+            const cat = await handleLoop(tokenizer, inSaveFrame ? saveCtx : blockCtx);
             if (cat.hasError) {
                 return error(cat.errorLine, cat.errorMessage);
             }
         // Single row
         } else if (token === CifTokenType.ColumnName) {
-            const cat = handleSingle(tokenizer, inSaveFrame ? saveCategories : blockCategories);
+            const cat = handleSingle(tokenizer, inSaveFrame ? saveCtx : blockCtx);
             if (cat.hasError) {
                 return error(cat.errorLine, cat.errorMessage);
             }
@@ -618,8 +632,8 @@ async function parseInternal(data: string, ctx: Computation.Context) {
         return error(tokenizer.lineNumber, 'Unfinished save frame (`' + saveFrame.header + '`).');
     }
 
-    if (Object.keys(blockCategories).length > 0) {
-        dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames));
+    if (blockCtx.categoryNames.length > 0) {
+        dataBlocks.push(Data.Block(blockCtx.categoryNames, blockCtx.categories, blockHeader, saveFrames));
     }
 
     return result(Data.File(dataBlocks));

+ 3 - 2
src/perf-tests/structure.ts

@@ -45,6 +45,7 @@ export async function readCIF(path: string) {
     const data = parsed.result.blocks[0];
     console.time('schema')
     const mmcif = CIF.schema.mmCIF(data);
+
     console.timeEnd('schema')
     console.time('buildModels')
     const models = Model.create({ kind: 'mmCIF', data: mmcif });
@@ -237,9 +238,9 @@ export namespace PropertyAccess {
     // }
 
     export async function run() {
-        const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
+        //const { structures, models } = await readCIF('./examples/1cbs_full.bcif');
         //const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif');
-        //const { structures, models } = await readCIF('e:/test/quick/3j3q_updated.cif');
+        const { structures, models } = await readCIF('e:/test/quick/1cbs_updated.cif');
 
         //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');