Sfoglia il codice sorgente

Merge branch 'master' into structure-data-model

David Sehnal 7 anni fa
parent
commit
0353b9f829

+ 85 - 0
docs/cif-schemas.md

@@ -0,0 +1,85 @@
+How CIF schemas work
+========
+
+CIF representation (simplified):
+
+```ts
+type Frame = (name: string) => Category | undefined // Frame is either a data block or a save frame
+type Category = (name: string) => Field | undefined
+type Field = { rowCount: number, getNumber: (row) => number, getString: (row) => string }
+```
+
+This is obviously not strongly typed and the "fields" don't know what type they are. To solve this, we create a type to describe what a field contains and how to map it to a "typed column":
+
+```ts
+type FieldSchema<T> = { T: T /* remember the type */, createColumn: (field: Field) => Column<T> }
+```
+
+where column is just a simple interface that returns a value of ``T`` for a given row:
+
+```ts
+type Column<T> = { rowCount: number, get: (row: number) => T }
+```
+
+Category schema is just an object whose properties are all instances of "field schemas", its "shape" has the type:
+
+```ts
+type CategorySchema = { [fieldName: string]: FieldSchema<any> }
+```
+
+We can declare our first category "schema":
+
+```ts
+const my_category = {
+  num_field: { T: 0 as number, createColumn: f => ({ rowCount: f.rowCount, get: f.getNumber }) }
+  str_field: { T: '' as string, createColumn: f => ({ rowCount: f.rowCount, get: f.getString }) }
+}
+```
+
+Notice that the type of ``my_category`` is not specified. Assigning it explictly would hide the actual property names which we do not want. Moreover, the names of the properties must match the names of the fields in the actual category (optionally, a field ``alias`` can be added to the field schema).
+
+Given a category schema, we need to construct a type that defines the typed category itself:
+
+```ts
+type TypedCategory<Schema extends CategorySchema> = { [F in keyof Schema]: Column<Schema[F]['T']> }
+```
+
+In other words, the type ``TypedCategory`` has a property of type ``Column<_>`` for each property of the schema. ``Schema[F]['T']`` just says: extract the type of property called ``T`` from property ``F`` in ``Schema`` (see [mapped types in Typescript](https://www.typescriptlang.org/docs/handbook/advanced-types.html)). ``Schema extends CategorySchema`` says that all properties of ``Schema`` must be of type ``FieldSchema<any>``.
+
+Finally, we just define a mapping, ``toTypedCategory``:
+
+```ts
+function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Category): TypedCategory<Schema> {
+    const typedCategory: any = {};
+    for (const key in Object.keys(schema)) {
+        // remember a category is just a function that assigns a Field to a name
+        const field = category(key);
+        typedCategory[key] = field 
+            ? schema[key].createFolumn(field)
+            : UndefinedColumn(schema[key].T); // a column that always returns 0 or empty string depending on type
+    }
+    return typedCategory;
+}
+```
+
+This transforms the ''untyped'' ``Category`` to some typed category and gives us code-completion for CIF files:
+
+```ts
+const typed = toTypedCategory(my_category, ...);
+typed.n /* shows code completion for num_field */
+const num = typed.num_field.get(0); /* num has type number number */
+```
+
+And that's all there is to it. Extending the types to the "frame" level is left as an exercise to the reader.
+
+The advantage of this approach is that the types are generated directly from the data. This means we only need to define them once (as opposed to defining the data interfaces separately) and on top of that, the "schemas" also serve as a template for how to actually performs the transformation to the typed version of CIF (again without the need to do this "manually" except the one time definition of the schema).
+
+----------------
+
+
+**Note:** To create a type alias for a category defined this way we can do:
+
+```ts
+type MyCategory = TypedCategory<typeof my_category>
+function makeMyTypedCategory(c: Category): MyCategory { return toTypedCategory(my_category, c); }
+```

+ 7 - 9
src/reader/cif/data-model.ts

@@ -15,25 +15,23 @@ export function File(blocks: ArrayLike<Block>, name?: string): File {
     return { name, blocks: blocks as any };
 }
 
-export interface Block {
+export interface Frame {
     readonly header: string,
     readonly categories: Categories
-    readonly saveFrames: SafeFrame[]
 }
 
-export function Block(categories: Categories, header: string, saveFrames: SafeFrame[] = []): Block {
+export interface Block extends Frame {
+    readonly saveFrames: Frame[]
+}
+
+export function Block(categories: Categories, header: string, saveFrames: Frame[] = []): Block {
     if (Object.keys(categories).some(k => k[0] !== '_')) {
         throw new Error(`Category names must start with '_'.`);
     }
     return { header, categories, saveFrames };
 }
 
-export interface SafeFrame {
-    readonly header: string,
-    readonly categories: Categories
-}
-
-export function SafeFrame(categories: Categories, header: string): SafeFrame {
+export function SafeFrame(categories: Categories, header: string): Frame {
     return { header, categories };
 }
 

+ 1 - 1
src/reader/cif/index.ts

@@ -7,7 +7,7 @@
 import parseText from './text/parser'
 import parseBinary from './binary/parser'
 import { Block } from './data-model'
-import { apply as applySchema } from './schema'
+import { toTypedFrame as applySchema } from './schema'
 import mmCIF from './schema/mmcif'
 
 export default {

+ 27 - 34
src/reader/cif/schema.ts

@@ -25,35 +25,30 @@ import StringPool from '../../utils/short-string-pool'
 
 //////////////////////////////////////////////
 
-export function apply<Schema extends Block.Schema>(schema: Schema, block: Data.Block): Block.Instance<Schema> {
-    return createBlock(schema, block) as Block.Instance<Schema>;
+export function toTypedFrame<Schema extends FrameSchema>(schema: Schema, frame: Data.Frame): TypedFrame<Schema> {
+    return createTypedFrame(schema, frame) as TypedFrame<Schema>;
 }
 
-export type Block<Categories> = Categories & {
-    readonly _header?: string,
-    /** For accessing 'non-standard' categories */
-    _getCategory(name: string): Data.Category | undefined
+export function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Data.Category): TypedCategory<Schema> {
+    return new _TypedCategory(category, schema, true) as TypedCategory<any>;
 }
 
-export namespace Block {
-    export type Schema = { [category: string]: Category.Schema }
-    export type Instance<T extends Schema> = Block<{ [C in keyof T]: Category.Instance<T[C]> }>
-}
+export type FrameSchema = { [category: string]: CategorySchema }
+export type TypedFrame<Schema extends FrameSchema> = {
+    readonly _header?: string,
+    readonly _frame: Data.Frame
+} & { [C in keyof Schema]: TypedCategory<Schema[C]> }
+
 
-export type Category<Fields> = Fields & {
+export type CategorySchema = { [field: string]: Field.Schema<any> }
+export type TypedCategory<Schema extends CategorySchema> = {
     readonly _rowCount: number,
     readonly _isDefined: boolean,
-    /** For accessing 'non-standard' fields */
-    _getField(name: string): Data.Field | undefined
-}
-
-export namespace Category {
-    export type Schema = { [field: string]: Field.Schema<any> }
-    export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }>
-}
+    readonly _category: Data.Category
+} & { [F in keyof Schema]: Column.Column<Schema[F]['T']> }
 
 export namespace Field {
-    export interface Schema<T> { type: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
+    export interface Schema<T> { T: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
     export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string }
 
     export function alias(name: string): Schema<any> { return { alias: name } as any; }
@@ -103,24 +98,22 @@ export namespace Field {
 
     // spec argument is to allow for specialised implementation for undefined fields
     function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>): Schema<T> {
-        return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
+        return { T: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
     }
 }
 
-class _Block implements Block<any> { // tslint:disable-line:class-name
-    header = this._block.header;
-    getCategory(name: string) { return this._block.categories[name]; }
-    constructor(private _block: Data.Block, schema: Block.Schema) {
+class _TypedFrame implements TypedFrame<any> { // tslint:disable-line:class-name
+    header = this._frame.header;
+    constructor(public _frame: Data.Frame, schema: FrameSchema) {
         for (const k of Object.keys(schema)) {
-            Object.defineProperty(this, k, { value: createCategory(k, schema[k], _block), enumerable: true, writable: false, configurable: false });
+            Object.defineProperty(this, k, { value: createTypedCategory(k, schema[k], _frame), enumerable: true, writable: false, configurable: false });
         }
     }
 }
 
-class _Category implements Category<any> { // tslint:disable-line:class-name
+class _TypedCategory implements TypedCategory<any> { // tslint:disable-line:class-name
     _rowCount = this._category.rowCount;
-    _getField(name: string) { return this._category.getField(name); }
-    constructor(private _category: Data.Category, schema: Category.Schema, public _isDefined: boolean) {
+    constructor(public _category: Data.Category, schema: CategorySchema, public _isDefined: boolean) {
         const fieldKeys = Object.keys(schema).filter(k => k !== '@alias');
         const cache = Object.create(null);
         for (const k of fieldKeys) {
@@ -140,13 +133,13 @@ class _Category implements Category<any> { // tslint:disable-line:class-name
     }
 }
 
-function createBlock(schema: Block.Schema, block: Data.Block): any {
-    return new _Block(block, schema);
+function createTypedFrame(schema: FrameSchema, frame: Data.Frame): any {
+    return new _TypedFrame(frame, schema);
 }
 
-function createCategory(key: string, schema: Category.Schema, block: Data.Block) {
+function createTypedCategory(key: string, schema: CategorySchema, frame: Data.Frame) {
     const alias = (schema['@alias'] && schema['@alias'].alias) || key;
     const name = alias[0] === '_' ? alias : '_' + alias;
-    const cat = block.categories[name];
-    return new _Category(cat || Data.Category.Empty, schema, !!cat);
+    const cat = frame.categories[name];
+    return new _TypedCategory(cat || Data.Category.Empty, schema, !!cat);
 }

+ 2 - 2
src/reader/cif/schema/dic.ts

@@ -4,7 +4,7 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import { Field, Block } from '../schema'
+import { Field, TypedFrame } from '../schema'
 
 const str = Field.str()
 const float = Field.float()
@@ -69,5 +69,5 @@ const dic = {
     item_units_conversion
 }
 
-type dic = Block.Instance<typeof dic>
+type dic = TypedFrame<typeof dic>
 export default dic

+ 2 - 2
src/reader/cif/schema/mmcif.ts

@@ -4,7 +4,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { Field, Block } from '../schema'
+import { Field, TypedFrame } from '../schema'
 
 const pooledStr = Field.pooledStr();
 const str = Field.str();
@@ -243,5 +243,5 @@ const mmCIF = {
     pdbx_struct_mod_residue,
     atom_site
 };
-type mmCIF = Block.Instance<typeof mmCIF>
+type mmCIF = TypedFrame<typeof mmCIF>
 export default mmCIF;

+ 10 - 6
src/reader/cif/schema/utils.ts

@@ -1,6 +1,6 @@
 
 // import dic from './dic'
-import { Field, Block, Category } from '../schema'
+import { Field, FrameSchema } from '../schema'
 import * as Data from '../data-model'
 
 const pooledStr = Field.pooledStr()
@@ -63,7 +63,7 @@ export function getFieldType (type: string) {
     return str
 }
 
-type SafeFrameCategories = { [category: string]: Data.SafeFrame }
+type SafeFrameCategories = { [category: string]: Data.Frame }
 type SafeFrameLinks = { [k: string]: string }
 
 interface SafeFrameData {
@@ -72,7 +72,7 @@ interface SafeFrameData {
 }
 
 // get field from given or linked category
-function getField ( category: string, field: string, d: Data.SafeFrame, ctx: SafeFrameData): Data.Field|undefined {
+function getField ( category: string, field: string, d: Data.Frame, ctx: SafeFrameData): Data.Field|undefined {
     const { categories, links } = ctx
 
     const cat = d.categories[category]
@@ -101,7 +101,7 @@ function getField ( category: string, field: string, d: Data.SafeFrame, ctx: Saf
 //     }
 // }
 
-function getCode (d: Data.SafeFrame, ctx: SafeFrameData): string|undefined {
+function getCode (d: Data.Frame, ctx: SafeFrameData): string|undefined {
     const code = getField('_item_type', 'code', d, ctx)
     if (code) {
         let c = code.str(0)
@@ -116,7 +116,11 @@ function getCode (d: Data.SafeFrame, ctx: SafeFrameData): string|undefined {
 }
 
 export function getSchema (dic: Data.Block) {  // todo Block needs to be specialized with safe frames as well
-    const schema: Block.Schema = {}  // { [category: string]: Category.Schema } = {}
+    const schema: FrameSchema = {}  // { [category: string]: Category.Schema } = {}
+
+    // TODO: for fields with finite allowed values, generate:
+    // type FieldValue = 'a' | 'b' | 'c'
+    // const catetegory = { field: <type> as Field.Schema<FieldValue> }
 
     const categories: SafeFrameCategories = {}
     const links: SafeFrameLinks = {}
@@ -160,5 +164,5 @@ export function getSchema (dic: Data.Block) {  // todo Block needs to be special
         }
     })
 
-    return schema as Block.Instance<any>
+    return schema;
 }

+ 2 - 2
src/reader/cif/text/parser.ts

@@ -554,9 +554,9 @@ async function parseInternal(data: string, ctx: Computation.Context) {
     let inSaveFrame = false
 
     // the next three initial values are never used in valid files
-    let saveFrames: Data.SafeFrame[] = [];
+    let saveFrames: Data.Frame[] = [];
     let saveCategories = Object.create(null);
-    let saveFrame: Data.SafeFrame = Data.SafeFrame(saveCategories, '');
+    let saveFrame: Data.Frame = Data.SafeFrame(saveCategories, '');
 
     ctx.update({ message: 'Parsing...', current: 0, max: data.length });
 

+ 1 - 1
src/reader/spec/cif.spec.ts

@@ -26,7 +26,7 @@ namespace TestSchema {
 }
 
 describe('schema', () => {
-    const data = Schema.apply(TestSchema.schema, testBlock);
+    const data = Schema.toTypedFrame(TestSchema.schema, testBlock);
     it('property access', () => {
         const { x, name } = data.atoms;
         expect(x.value(0)).toBe(1);

+ 1 - 1
src/script.ts

@@ -14,7 +14,7 @@ const readFileAsync = util.promisify(fs.readFile);
 import Gro from './reader/gro/parser'
 import CIF from './reader/cif/index'
 
-import { apply as applySchema } from './reader/cif/schema'
+import { toTypedFrame as applySchema } from './reader/cif/schema'
 import { getSchema } from './reader/cif/schema/utils'
 
 const file = '1crn.gro'