Browse Source

Merge branch 'master' into mol2-2

Zepei Xu 7 years ago
parent
commit
3e6aa856b0

+ 85 - 0
docs/cif-schemas.md

@@ -0,0 +1,85 @@
+How CIF schemas work
+========
+
+CIF representation (simplified):
+
+```ts
+type Frame = (name: string) => Category | undefined // Frame is either a data block or a save frame
+type Category = (name: string) => Field | undefined
+type Field = { rowCount: number, getNumber: (row) => number, getString: (row) => string }
+```
+
+This is obviously not strongly typed and the "fields" don't know what type they are. To solve this, we create a type to describe what a field contains and how to map it to a "typed column":
+
+```ts
+type FieldSchema<T> = { T: T /* remember the type */, createColumn: (field: Field) => Column<T> }
+```
+
+where column is just a simple interface that returns a value of ``T`` for a given row:
+
+```ts
+type Column<T> = { rowCount: number, get: (row: number) => T }
+```
+
+Category schema is just an object whose properties are all instances of "field schemas", its "shape" has the type:
+
+```ts
+type CategorySchema = { [fieldName: string]: FieldSchema<any> }
+```
+
+We can declare our first category "schema":
+
+```ts
+const my_category = {
+  num_field: { T: 0 as number, createColumn: f => ({ rowCount: f.rowCount, get: f.getNumber }) }
+  str_field: { T: '' as string, createColumn: f => ({ rowCount: f.rowCount, get: f.getString }) }
+}
+```
+
+Notice that the type of ``my_category`` is not specified. Assigning it explictly would hide the actual property names which we do not want. Moreover, the names of the properties must match the names of the fields in the actual category (optionally, a field ``alias`` can be added to the field schema).
+
+Given a category schema, we need to construct a type that defines the typed category itself:
+
+```ts
+type TypedCategory<Schema extends CategorySchema> = { [F in keyof Schema]: Column<Schema[F]['T']> }
+```
+
+In other words, the type ``TypedCategory`` has a property of type ``Column<_>`` for each property of the schema. ``Schema[F]['T']`` just says: extract the type of property called ``T`` from property ``F`` in ``Schema`` (see [mapped types in Typescript](https://www.typescriptlang.org/docs/handbook/advanced-types.html)). ``Schema extends CategorySchema`` says that all properties of ``Schema`` must be of type ``FieldSchema<any>``.
+
+Finally, we just define a mapping, ``toTypedCategory``:
+
+```ts
+function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Category): TypedCategory<Schema> {
+    const typedCategory: any = {};
+    for (const key in Object.keys(schema)) {
+        // remember a category is just a function that assigns a Field to a name
+        const field = category(key);
+        typedCategory[key] = field 
+            ? schema[key].createFolumn(field)
+            : UndefinedColumn(schema[key].T); // a column that always returns 0 or empty string depending on type
+    }
+    return typedCategory;
+}
+```
+
+This transforms the ''untyped'' ``Category`` to some typed category and gives us code-completion for CIF files:
+
+```ts
+const typed = toTypedCategory(my_category, ...);
+typed.n /* shows code completion for num_field */
+const num = typed.num_field.get(0); /* num has type number number */
+```
+
+And that's all there is to it. Extending the types to the "frame" level is left as an exercise to the reader.
+
+The advantage of this approach is that the types are generated directly from the data. This means we only need to define them once (as opposed to defining the data interfaces separately) and on top of that, the "schemas" also serve as a template for how to actually performs the transformation to the typed version of CIF (again without the need to do this "manually" except the one time definition of the schema).
+
+----------------
+
+
+**Note:** To create a type alias for a category defined this way we can do:
+
+```ts
+type MyCategory = TypedCategory<typeof my_category>
+function makeMyTypedCategory(c: Category): MyCategory { return toTypedCategory(my_category, c); }
+```

+ 7 - 4
package.json

@@ -13,7 +13,8 @@
     "test": "./node_modules/.bin/jest",
     "dist": "./node_modules/.bin/uglifyjs build/js/molio.dev.js -cm > dist/molio.js && cp build/js/molio.esm.js dist/molio.esm.js",
     "script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js",
-    "runscript": "npm run script && node build/js/script.js"
+    "runscript": "npm run script && node build/js/script.js",
+    "download-dics": "./node_modules/.bin/download -o build/dics http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic && ./node_modules/.bin/download -o build/dics http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ddl.dic"
   },
   "jest": {
     "moduleFileExtensions": [
@@ -29,7 +30,8 @@
   "license": "MIT",
   "devDependencies": {
     "@types/jest": "^21.1.2",
-    "@types/node": "^8.0.32",
+    "@types/node": "^8.0.34",
+    "download-cli": "^1.0.5",
     "jest": "^21.2.1",
     "rollup": "^0.50.0",
     "rollup-plugin-buble": "^0.16.0",
@@ -37,10 +39,11 @@
     "rollup-plugin-json": "^2.3.0",
     "rollup-plugin-node-resolve": "^3.0.0",
     "rollup-watch": "^4.3.1",
-    "ts-jest": "^21.0.1",
+    "ts-jest": "^21.1.2",
     "tslint": "^5.7.0",
     "typescript": "^2.5.3",
-    "uglify-js": "^3.1.3"
+    "uglify-js": "^3.1.3",
+    "util.promisify": "^1.0.0"
   },
   "dependencies": {}
 }

+ 9 - 5
src/reader/cif/binary/field.ts

@@ -13,7 +13,7 @@ import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../co
 export default function Field(column: EncodedColumn): Data.Field {
     const mask = column.mask ? decode(column.mask) as number[] : void 0;
     const data = decode(column.data);
-    const isNumeric = (data as any).buffer && (data as any).byteLength && (data as any).BYTES_PER_ELEMENT;
+    const isNumeric = Column.isTypedArray(data);
 
     const str: Data.Field['str'] = isNumeric
         ? mask
@@ -45,9 +45,13 @@ export default function Field(column: EncodedColumn): Data.Field {
         float,
         presence,
         areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB],
-        stringEquals(row, v) { return str(row) === v; },
-        toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); },
-        toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); },
-        toFloatArray(params)  { return Column.createAndFillArray(rowCount, float, params); }
+        stringEquals: (row, v) => str(row) === v,
+        toStringArray: params => Column.createAndFillArray(rowCount, str, params),
+        toIntArray: isNumeric
+            ? params => Column.typedArrayWindow(data, params)
+            : params => Column.createAndFillArray(rowCount, int, params),
+        toFloatArray: isNumeric
+            ? params => Column.typedArrayWindow(data, params)
+            : params => Column.createAndFillArray(rowCount, float, params)
     };
 }

+ 7 - 9
src/reader/cif/data-model.ts

@@ -15,25 +15,23 @@ export function File(blocks: ArrayLike<Block>, name?: string): File {
     return { name, blocks: blocks as any };
 }
 
-export interface Block {
+export interface Frame {
     readonly header: string,
     readonly categories: Categories
-    readonly saveFrames: SafeFrame[]
 }
 
-export function Block(categories: Categories, header: string, saveFrames: SafeFrame[] = []): Block {
+export interface Block extends Frame {
+    readonly saveFrames: Frame[]
+}
+
+export function Block(categories: Categories, header: string, saveFrames: Frame[] = []): Block {
     if (Object.keys(categories).some(k => k[0] !== '_')) {
         throw new Error(`Category names must start with '_'.`);
     }
     return { header, categories, saveFrames };
 }
 
-export interface SafeFrame {
-    readonly header: string,
-    readonly categories: Categories
-}
-
-export function SafeFrame(categories: Categories, header: string): SafeFrame {
+export function SafeFrame(categories: Categories, header: string): Frame {
     return { header, categories };
 }
 

+ 1 - 1
src/reader/cif/index.ts

@@ -7,7 +7,7 @@
 import parseText from './text/parser'
 import parseBinary from './binary/parser'
 import { Block } from './data-model'
-import { apply as applySchema } from './schema'
+import { toTypedFrame as applySchema } from './schema'
 import mmCIF from './schema/mmcif'
 
 export default {

+ 28 - 34
src/reader/cif/schema.ts

@@ -25,35 +25,30 @@ import StringPool from '../../utils/short-string-pool'
 
 //////////////////////////////////////////////
 
-export function apply<Schema extends Block.Schema>(schema: Schema, block: Data.Block): Block.Instance<Schema> {
-    return createBlock(schema, block) as Block.Instance<Schema>;
+export function toTypedFrame<Schema extends FrameSchema>(schema: Schema, frame: Data.Frame): TypedFrame<Schema> {
+    return createTypedFrame(schema, frame) as TypedFrame<Schema>;
 }
 
-export type Block<Categories> = Categories & {
-    readonly _header?: string,
-    /** For accessing 'non-standard' categories */
-    _getCategory(name: string): Data.Category | undefined
+export function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Data.Category): TypedCategory<Schema> {
+    return new _TypedCategory(category, schema, true) as TypedCategory<any>;
 }
 
-export namespace Block {
-    export type Schema = { [category: string]: Category.Schema }
-    export type Instance<T extends Schema> = Block<{ [C in keyof T]: Category.Instance<T[C]> }>
-}
+export type FrameSchema = { [category: string]: CategorySchema }
+export type TypedFrame<Schema extends FrameSchema> = {
+    readonly _header?: string,
+    readonly _frame: Data.Frame
+} & { [C in keyof Schema]: TypedCategory<Schema[C]> }
+
 
-export type Category<Fields> = Fields & {
+export type CategorySchema = { [field: string]: Field.Schema<any> }
+export type TypedCategory<Schema extends CategorySchema> = {
     readonly _rowCount: number,
     readonly _isDefined: boolean,
-    /** For accessing 'non-standard' fields */
-    _getField(name: string): Data.Field | undefined
-}
-
-export namespace Category {
-    export type Schema = { [field: string]: Field.Schema<any> }
-    export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }>
-}
+    readonly _category: Data.Category
+} & { [F in keyof Schema]: Column.Column<Schema[F]['T']> }
 
 export namespace Field {
-    export interface Schema<T> { type: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
+    export interface Schema<T> { T: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
     export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string }
 
     export function alias(name: string): Schema<any> { return { alias: name } as any; }
@@ -101,25 +96,24 @@ export namespace Field {
         }
     }
 
+    // spec argument is to allow for specialised implementation for undefined fields
     function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>): Schema<T> {
-        return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
+        return { T: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
     }
 }
 
-class _Block implements Block<any> { // tslint:disable-line:class-name
-    header = this._block.header;
-    getCategory(name: string) { return this._block.categories[name]; }
-    constructor(private _block: Data.Block, schema: Block.Schema) {
+class _TypedFrame implements TypedFrame<any> { // tslint:disable-line:class-name
+    header = this._frame.header;
+    constructor(public _frame: Data.Frame, schema: FrameSchema) {
         for (const k of Object.keys(schema)) {
-            Object.defineProperty(this, k, { value: createCategory(k, schema[k], _block), enumerable: true, writable: false, configurable: false });
+            Object.defineProperty(this, k, { value: createTypedCategory(k, schema[k], _frame), enumerable: true, writable: false, configurable: false });
         }
     }
 }
 
-class _Category implements Category<any> { // tslint:disable-line:class-name
+class _TypedCategory implements TypedCategory<any> { // tslint:disable-line:class-name
     _rowCount = this._category.rowCount;
-    _getField(name: string) { return this._category.getField(name); }
-    constructor(private _category: Data.Category, schema: Category.Schema, public _isDefined: boolean) {
+    constructor(public _category: Data.Category, schema: CategorySchema, public _isDefined: boolean) {
         const fieldKeys = Object.keys(schema).filter(k => k !== '@alias');
         const cache = Object.create(null);
         for (const k of fieldKeys) {
@@ -139,13 +133,13 @@ class _Category implements Category<any> { // tslint:disable-line:class-name
     }
 }
 
-function createBlock(schema: Block.Schema, block: Data.Block): any {
-    return new _Block(block, schema);
+function createTypedFrame(schema: FrameSchema, frame: Data.Frame): any {
+    return new _TypedFrame(frame, schema);
 }
 
-function createCategory(key: string, schema: Category.Schema, block: Data.Block) {
+function createTypedCategory(key: string, schema: CategorySchema, frame: Data.Frame) {
     const alias = (schema['@alias'] && schema['@alias'].alias) || key;
     const name = alias[0] === '_' ? alias : '_' + alias;
-    const cat = block.categories[name];
-    return new _Category(cat || Data.Category.Empty, schema, !!cat);
+    const cat = frame.categories[name];
+    return new _TypedCategory(cat || Data.Category.Empty, schema, !!cat);
 }

+ 2 - 0
src/reader/cif/schema/ddl.ts

@@ -0,0 +1,2 @@
+
+// TODO save frame schema for ddl http://mmcif.wwpdb.org/dictionaries/mmcif_ddl.dic/Index/

+ 73 - 0
src/reader/cif/schema/dic.ts

@@ -0,0 +1,73 @@
+/**
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Field, TypedFrame } from '../schema'
+
+const str = Field.str()
+const float = Field.float()
+
+const datablock = {
+    id: str,
+    description: str
+}
+
+const dictionary = {
+    title: str,
+    datablock_id: str,
+    version: str
+}
+
+const dictionary_history = {
+    version: str,
+    update: str,
+    revision: str
+}
+
+const sub_category = {
+    id: str,
+    description: str
+}
+
+const category_group_list = {
+    id: str,
+    parent_id: str,
+    description: str
+}
+
+const item_type_list = {
+    code: str,
+    primitive_code: str,
+    construct: str,
+    detail: str
+}
+
+const item_units_list = {
+    code: str,
+    detail: str
+}
+
+const item_units_conversion = {
+    from_code: str,
+    to_code: str,
+    operator: str,
+    factor: float
+}
+
+// TODO save frame dic schema
+
+const dic = {
+    datablock,
+    dictionary,
+    dictionary_history,
+    sub_category,
+    category_group_list,
+    item_type_list,
+    item_units_list,
+    item_units_conversion
+}
+
+type dic = TypedFrame<typeof dic>
+export default dic

+ 2 - 2
src/reader/cif/schema/mmcif.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { Field, Block } from '../schema'
+import { Field, TypedFrame } from '../schema'
 
 const pooledStr = Field.pooledStr();
 const str = Field.str();
@@ -243,5 +243,5 @@ const mmCIF = {
     pdbx_struct_mod_residue,
     atom_site
 };
-type mmCIF = Block.Instance<typeof mmCIF>
+type mmCIF = TypedFrame<typeof mmCIF>
 export default mmCIF;

+ 203 - 0
src/reader/cif/schema/utils.ts

@@ -0,0 +1,203 @@
+
+// import dic from './dic'
+import * as Data from '../data-model'
+
+export function getFieldType (type: string, values?: string[]) {
+    switch (type) {
+        case 'code':
+        case 'ucode':
+            if (values && values.length) {
+                return `str as Field.Schema<'${values.join("'|'")}'>`
+            } else {
+                return 'str'
+            }
+        case 'line':
+        case 'uline':
+        case 'text':
+        case 'name':
+        case 'idname':
+        case 'any':
+        case 'atcode':
+        case 'fax':
+        case 'phone':
+        case 'email':
+        case 'code30':
+        case 'ec-type':
+        case 'seq-one-letter-code':
+        case 'author':
+        case 'orcid_id':
+        case 'sequence_dep':
+        case 'pdb_id':
+        case 'emd_id':
+        // todo, consider adding specialised fields
+        case 'yyyy-mm-dd':
+        case 'yyyy-mm-dd:hh:mm':
+        case 'yyyy-mm-dd:hh:mm-flex':
+        case 'int-range':
+        case 'float-range':
+        case 'binary':
+        case 'operation_expression':
+        case 'ucode-alphanum-csv':
+        case 'point_symmetry':
+        case 'id_list':
+        case '4x3_matrix':
+        case 'point_group':
+        case 'point_group_helical':
+        case 'boolean':
+        case 'symmetry_operation':
+        case 'date_dep':
+            return 'str'
+        case 'uchar3':
+        case 'uchar1':
+        case 'symop':
+            return 'pooledStr'
+        case 'int':
+        case 'non_negative_int':
+        case 'positive_int':
+            return 'int'
+        case 'float':
+            return 'float'
+    }
+    console.log(`unknown type '${type}'`)
+    return 'str'
+}
+
+type FrameCategories = { [category: string]: Data.Frame }
+type FrameLinks = { [k: string]: string }
+
+interface FrameData {
+    categories: FrameCategories
+    links: FrameLinks
+}
+
+// get field from given or linked category
+function getField ( category: string, field: string, d: Data.Frame, ctx: FrameData): Data.Field|undefined {
+    const { categories, links } = ctx
+
+    const cat = d.categories[category]
+    if (cat) {
+        return cat.getField(field)
+    } else {
+        if (d.header in links) {
+            return getField(category, field, categories[links[d.header]], ctx)
+        } else {
+            // console.log(`no links found for '${d.header}'`)
+        }
+    }
+}
+
+function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined {
+    const value = getField('_item_enumeration', 'value', d, ctx)
+    if (value) {
+        const enums: string[] = []
+        for (let i = 0; i < value.rowCount; ++i) {
+            enums.push(value.str(i))
+            // console.log(value.str(i))
+        }
+        return enums
+    } else {
+        // console.log(`item_enumeration.value not found for '${d.header}'`)
+    }
+}
+
+function getCode (d: Data.Frame, ctx: FrameData): [string, string[]]|undefined {
+    const code = getField('_item_type', 'code', d, ctx)
+    if (code) {
+        let c = code.str(0)
+        let e = []
+        if (c === 'ucode') {
+            const enums = getEnums(d, ctx)
+            if (enums) e.push(...enums)
+        }
+        return [c, e]
+    } else {
+        console.log(`item_type.code not found for '${d.header}'`)
+    }
+}
+
+const header = `/**
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Your friendly code generator
+ */
+
+import { Field, TypedFrame } from '../schema'
+
+const pooledStr = Field.pooledStr();
+const str = Field.str();
+const int = Field.int();
+const float = Field.float();`
+
+const footer = `
+type mmCIF = TypedFrame<typeof mmCIF>
+export default mmCIF;`
+
+export function generateSchema (dic: Data.Block) {  // todo Block needs to be specialized with safe frames as well
+    // const schema: FrameSchema = {}  // { [category: string]: Category.Schema } = {}
+    const schema: { [category: string]: { [field: string]: string } } = {}
+
+    const codeLines: string[] = []
+
+    // TODO: for fields with finite allowed values, generate:
+    // type FieldValue = 'a' | 'b' | 'c'
+    // const catetegory = { field: <type> as Field.Schema<FieldValue> }
+
+    const categories: FrameCategories = {}
+    const links: FrameLinks = {}
+    dic.saveFrames.forEach(d => {
+        if (d.header[0] !== '_') return
+        categories[d.header] = d
+        const item_linked = d.categories['_item_linked']
+        if (item_linked) {
+            const child_name = item_linked.getField('child_name')
+            const parent_name = item_linked.getField('parent_name')
+            if (child_name && parent_name) {
+                for (let i = 0; i < item_linked.rowCount; ++i) {
+                    const childName = child_name.str(i)
+                    const parentName = parent_name.str(i)
+                    if (childName in links && links[childName] !== parentName) {
+                        console.log(`${childName} linked to ${links[childName]}, ignoring link to ${parentName}`)
+                    }
+                    links[childName] = parentName
+                }
+            }
+        }
+    })
+
+    Object.keys(categories).forEach(fullName => {
+        const d = categories[fullName]
+        const categoryName = d.header.substring(1, d.header.indexOf('.'))
+        const itemName = d.header.substring(d.header.indexOf('.') + 1)
+        let fields
+        if (categoryName in schema) {
+            fields = schema[categoryName]
+        } else {
+            fields = {}
+            schema[categoryName] = fields
+        }
+
+        const code = getCode(d, { categories, links })
+        if (code) {
+            fields[itemName] = getFieldType(code[0], code[1])
+        } else {
+            console.log(`could not determine code for '${d.header}'`)
+        }
+    })
+
+    schema.entry = { id: 'str' }
+
+    codeLines.push(`const mmCIF = {`)
+    Object.keys(schema).forEach(category => {
+        codeLines.push(`\t${category}: {`)
+        const fields = schema[category]
+        Object.keys(fields).forEach(field => {
+            const type = fields[field]
+            // TODO: check if quoting is required
+            codeLines.push(`\t\t'${field}': ${type},`)
+        })
+        codeLines.push('\t},')
+    })
+    codeLines.push('}')
+
+    return `${header}\n\n${codeLines.join('\n')}\n${footer}`
+}

+ 4 - 4
src/reader/cif/text/field.ts

@@ -44,7 +44,7 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Fie
         float,
         presence,
         areValuesEqual: TokenColumn.areValuesEqualProvider(tokens),
-        stringEquals(row, v) {
+        stringEquals: (row, v) => {
             const s = indices[2 * row];
             const value = v || '';
             if (!value && presence(row) !== Data.ValuePresence.Present) return true;
@@ -55,8 +55,8 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Fie
             }
             return true;
         },
-        toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); },
-        toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); },
-        toFloatArray(params)  { return Column.createAndFillArray(rowCount, float, params); }
+        toStringArray: params => Column.createAndFillArray(rowCount, str, params),
+        toIntArray: params => Column.createAndFillArray(rowCount, int, params),
+        toFloatArray: params => Column.createAndFillArray(rowCount, float, params)
     }
 }

+ 2 - 2
src/reader/cif/text/parser.ts

@@ -554,9 +554,9 @@ async function parseInternal(data: string, ctx: Computation.Context) {
     let inSaveFrame = false
 
     // the next three initial values are never used in valid files
-    let saveFrames: Data.SafeFrame[] = [];
+    let saveFrames: Data.Frame[] = [];
     let saveCategories = Object.create(null);
-    let saveFrame: Data.SafeFrame = Data.SafeFrame(saveCategories, '');
+    let saveFrame: Data.Frame = Data.SafeFrame(saveCategories, '');
 
     ctx.update({ message: 'Parsing...', current: 0, max: data.length });
 

+ 5 - 0
src/reader/common/binary/column.ts

@@ -0,0 +1,5 @@
+/**
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */

+ 51 - 9
src/reader/common/column.ts

@@ -37,24 +37,57 @@ export function UndefinedColumn<T extends ColumnType>(rowCount: number, type: T)
         isDefined: false,
         rowCount,
         value,
-        isValueDefined(row) { return false; },
-        toArray(params) {
+        isValueDefined: row => false,
+        toArray: params => {
             const { array } = createArray(rowCount, params);
             for (let i = 0, _i = array.length; i < _i; i++) array[i] = value(0)
             return array;
         },
-        stringEquals(row, value) { return !value; },
-        areValuesEqual(rowA, rowB) { return true; }
+        stringEquals: (row, value) => !value,
+        areValuesEqual: (rowA, rowB) => true
+    }
+}
+
+export function ArrayColumn<T>(array: ArrayLike<T>): Column<T> {
+    const rowCount = array.length;
+    const value: Column<T>['value'] = row => array[row];
+    const isTyped = isTypedArray(array);
+    return {
+        isDefined: false,
+        rowCount,
+        value,
+        isValueDefined: row => true,
+        toArray: isTyped
+            ? params => typedArrayWindow(array, params) as any as ReadonlyArray<T>
+            : params => {
+                const { start, end } = getArrayBounds(rowCount, params);
+                const ret = new Array(end - start);
+                for (let i = 0, _i = end - start; i < _i; i++) ret[i] = array[start + i];
+                return ret;
+            },
+        stringEquals: isTyped
+            ? (row, value) => (array as any)[row] === +value
+            : (row, value) => {
+                const v = array[row];
+                if (typeof v !== 'string') return '' + v === value;
+                return v === value;
+            },
+        areValuesEqual: (rowA, rowB) => array[rowA] === array[rowB]
     }
 }
 
+/** A helped function for Column.toArray */
+export function getArrayBounds(rowCount: number, params?: ToArrayParams) {
+    const start = params && typeof params.start !== 'undefined' ? Math.max(Math.min(params.start, rowCount - 1), 0) : 0;
+    const end = params && typeof params.end !== 'undefined' ? Math.min(params.end, rowCount) : rowCount;
+    return { start, end };
+}
+
 /** A helped function for Column.toArray */
 export function createArray(rowCount: number, params?: ToArrayParams) {
-    const { array, start, end } = params || ({} as ToArrayParams);
-    const c = typeof array !== 'undefined' ? array : Array;
-    const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0;
-    const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount;
-    return { array: new c(e - s) as any[], start: s, end: e };
+    const c = params && typeof params.array !== 'undefined' ? params.array : Array;
+    const { start, end } = getArrayBounds(rowCount, params);
+    return { array: new c(end - start) as any[], start, end };
 }
 
 /** A helped function for Column.toArray */
@@ -69,4 +102,13 @@ export function createAndFillArray(rowCount: number, value: (row: number) => any
     return fillArrayValues(value, array, start);
 }
 
+export function isTypedArray(data: any) {
+    return data.buffer && typeof data.byteLength === 'number' && data.BYTES_PER_ELEMENT;
+}
 
+export function typedArrayWindow(data: any, params?: ToArrayParams): ReadonlyArray<number> {
+    const { constructor, buffer, length, byteOffset, BYTES_PER_ELEMENT } = data;
+    const { start, end } = getArrayBounds(length, params);
+    if (start === 0 && end === length) return data;
+    return new constructor(buffer, byteOffset + BYTES_PER_ELEMENT * start, Math.min(length, end - start));
+}

+ 4 - 6
src/reader/common/text/column/fixed.ts

@@ -45,11 +45,9 @@ export function FixedColumn<T extends ColumnType>(lines: Tokens, offset: number,
         isDefined: true,
         rowCount,
         value,
-        isValueDefined(row) { return true; },
-        toArray(params) { return createAndFillArray(rowCount, value, params); },
-        stringEquals(row, v) { return value(row) === v; },
-        areValuesEqual(rowA, rowB) {
-            return value(rowA) === value(rowB);
-        }
+        isValueDefined: row => true,
+        toArray: params => createAndFillArray(rowCount, value, params),
+        stringEquals: (row, v) => value(row) === v,
+        areValuesEqual: (rowA, rowB) => value(rowA) === value(rowB)
     };
 }

+ 3 - 3
src/reader/common/text/column/token.ts

@@ -33,9 +33,9 @@ export function TokenColumn<T extends ColumnType>(tokens: Tokens, type: T): Colu
         isDefined: true,
         rowCount,
         value,
-        isValueDefined(row) { return true; },
-        toArray(params) { return createAndFillArray(rowCount, value, params); },
-        stringEquals(row, v) {
+        isValueDefined: row => true,
+        toArray: params => createAndFillArray(rowCount, value, params),
+        stringEquals: (row, v) => {
             const s = indices[2 * row];
             const value = v || '';
             const len = value.length;

+ 1 - 1
src/reader/spec/cif.spec.ts

@@ -26,7 +26,7 @@ namespace TestSchema {
 }
 
 describe('schema', () => {
-    const data = Schema.apply(TestSchema.schema, testBlock);
+    const data = Schema.toTypedFrame(TestSchema.schema, testBlock);
     it('property access', () => {
         const { x, name } = data.atoms;
         expect(x.value(0)).toBe(1);

+ 15 - 1
src/reader/spec/text-column.spec.ts → src/reader/spec/column.spec.ts

@@ -7,7 +7,7 @@
 
 import FixedColumn from '../common/text/column/fixed'
 import TokenColumn from '../common/text/column/token'
-import { ColumnType } from '../common/column'
+import { ColumnType, typedArrayWindow } from '../common/column'
 
 const lines = [
     '1.123 abc',
@@ -60,3 +60,17 @@ describe('token text column', () => {
         expect(col1.value(2)).toBe(1);
     })
 });
+
+describe('binary column', () => {
+    it('window works', () => {
+        const xs = new Float64Array([1, 2, 3, 4]);
+        const w1 = typedArrayWindow(xs, { start: 1 });
+        const w2 = typedArrayWindow(xs, { start: 2, end: 4 });
+
+        expect(w1.length).toBe(3);
+        for (let i = 0; i < w1.length; i++) expect(w1[i]).toBe(xs[i + 1]);
+
+        expect(w2.length).toBe(2);
+        for (let i = 0; i < w2.length; i++) expect(w2[i]).toBe(xs[i + 2]);
+    });
+})

+ 47 - 40
src/script.ts

@@ -8,9 +8,16 @@
 import * as util from 'util'
 import * as fs from 'fs'
 
+require('util.promisify').shim();
+const readFileAsync = util.promisify(fs.readFile);
+const writeFileAsync = util.promisify(fs.writeFile);
+
 import Gro from './reader/gro/parser'
 import CIF from './reader/cif/index'
 
+// import { toTypedFrame as applySchema } from './reader/cif/schema'
+import { generateSchema } from './reader/cif/schema/utils'
+
 const file = '1crn.gro'
 // const file = 'water.gro'
 // const file = 'test.gro'
@@ -74,13 +81,9 @@ async function runGro(input: string) {
     console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1])
 }
 
-export function _gro() {
-    fs.readFile(`./examples/${file}`, 'utf8', function (err, input) {
-        if (err) {
-            return console.log(err);
-        }
-        runGro(input)
-    });
+export async function _gro() {
+    const input = await readFileAsync(`./examples/${file}`, 'utf8')
+    runGro(input)
 }
 
 // _gro()
@@ -106,35 +109,38 @@ async function runCIF(input: string | Uint8Array) {
     console.log(mmcif.atom_site.Cartn_x.value(0));
     console.log(mmcif.entity.type.toArray());
     console.log(mmcif.pdbx_struct_oper_list.matrix.value(0));
+
+    // const schema = await _dic()
+    // if (schema) {
+    //     const mmcif2 = applySchema(schema, data)
+    //     // console.log(util.inspect(mmcif2.atom_site, {showHidden: false, depth: 3}))
+    //     console.log(mmcif2.atom_site.Cartn_x.value(0));
+    //     console.log(mmcif2.entity.type.toArray());
+    //     // console.log(mmcif2.pdbx_struct_oper_list.matrix.value(0)); // TODO
+    // } else {
+    //     console.log('error getting mmcif schema from dic')
+    // }
 }
 
-export function _cif() {
+export async function _cif() {
     let path = `./examples/1cbs_updated.cif`;
-    path = '../test/3j3q.cif'  // lets have a relative path for big test files
-    fs.readFile(path, 'utf8', function (err, input) {
-        if (err) {
-            return console.log(err);
-        }
-        console.log('------------------');
-        console.log('Text CIF:');
-        runCIF(input);
-    });
+    // path = '../test/3j3q.cif'  // lets have a relative path for big test files
+    const input = await readFileAsync(path, 'utf8')
+    console.log('------------------');
+    console.log('Text CIF:');
+    runCIF(input);
 
     path = `./examples/1cbs_full.bcif`;
     // const path = 'c:/test/quick/3j3q.cif';
-    fs.readFile(path, function (err, input) {
-        if (err) {
-            return console.log(err);
-        }
-        console.log('------------------');
-        console.log('BinaryCIF:');
-        const data = new Uint8Array(input.byteLength);
-        for (let i = 0; i < input.byteLength; i++) data[i] = input[i];
-        runCIF(input);
-    });
+    const input2 = await readFileAsync(path)
+    console.log('------------------');
+    console.log('BinaryCIF:');
+    const data = new Uint8Array(input2.byteLength);
+    for (let i = 0; i < input2.byteLength; i++) data[i] = input2[i];
+    runCIF(input2);
 }
 
-// _cif();
+_cif();
 
 async function runDic(input: string | Uint8Array) {
     console.time('parseDic');
@@ -148,20 +154,21 @@ async function runDic(input: string | Uint8Array) {
         return;
     }
 
-    const data = parsed.result.blocks[0];
-    console.log(util.inspect(data.saveFrames, {showHidden: false, depth: 3}))
+    const schema = generateSchema(parsed.result.blocks[0])
+    // console.log(schema)
+    // console.log(util.inspect(Object.keys(schema).length, {showHidden: false, depth: 1}))
+
+    await writeFileAsync('./src/reader/cif/schema/mmcif-gen.ts', schema, 'utf8')
+
+    return schema
 }
 
-export function _dic() {
-    let path = '../test/mmcif_pdbx_v50.dic'
-    fs.readFile(path, 'utf8', function (err, input) {
-        if (err) {
-            return console.log(err);
-        }
-        console.log('------------------');
-        console.log('Text DIC:');
-        runDic(input);
-    });
+export async function _dic() {
+    let path = './build/dics/mmcif_pdbx_v50.dic'
+    const input = await readFileAsync(path, 'utf8')
+    console.log('------------------');
+    console.log('Text DIC:');
+    return runDic(input);
 }
 
 _dic();