Browse Source

Updated to new data model

David Sehnal 7 years ago
parent
commit
87dbbe3323

+ 2 - 1
src/data/data.ts

@@ -65,5 +65,6 @@ export interface Field {
     stringEquals(row: number, value: string | null): boolean,
 
     toStringArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<string>,
-    toNumberArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>
+    toIntArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>,
+    toFloatArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>
 }

+ 10 - 6
src/data/schema.ts

@@ -40,6 +40,7 @@ export namespace Block {
 
 export type Category<Fields> = Fields & {
     readonly _rowCount: number,
+    readonly _isDefined: boolean,
     /** For accessing 'non-standard' fields */
     _getField(name: string): Data.Field | undefined
 }
@@ -66,14 +67,16 @@ export namespace Field {
     export function str(spec?: Spec) { return createSchema(spec, Str); }
     export function int(spec?: Spec) { return createSchema(spec, Int); }
     export function float(spec?: Spec) { return createSchema(spec, Float); }
+    export function value<T>(spec?: Spec): Schema<T> { return createSchema(spec, Value); }
 
     function create<T>(field: Data.Field, value: (row: number) => T, toArray: Field<T>['toArray']): Field<T> {
         return { isDefined: field.isDefined, value, presence: field.presence, areValuesEqual: field.areValuesEqual, stringEquals: field.stringEquals, toArray };
     }
 
     function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); }
-    function Int(field: Data.Field) { return create(field, field.int, field.toNumberArray); }
-    function Float(field: Data.Field) { return create(field, field.float, field.toNumberArray); }
+    function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); }
+    function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); }
+    function Value(field: Data.Field) { return create(field, field.value, () => { throw Error('not supported'); }); }
 
     const DefaultUndefined: Data.Field = {
         isDefined: false,
@@ -92,7 +95,8 @@ export namespace Field {
             for (let i = 0; i < count; i++) { ret[i] = null; }
             return ret;
         },
-        toNumberArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any
+        toIntArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any,
+        toFloatArray: (startRow, endRowExclusive, ctor) => new Float32Array(endRowExclusive - startRow) as any
     };
 
     function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Field<T>): Schema<T> {
@@ -113,7 +117,7 @@ class _Block implements Block<any> { // tslint:disable-line:class-name
 class _Category implements Category<any> { // tslint:disable-line:class-name
     _rowCount = this._category.rowCount;
     _getField(name: string) { return this._category.getField(name); }
-    constructor(private _category: Data.Category, schema: Category.Schema) {
+    constructor(private _category: Data.Category, schema: Category.Schema, public _isDefined: boolean) {
         const fieldKeys = Object.keys(schema).filter(k => k !== '@alias');
         const cache = Object.create(null);
         for (const k of fieldKeys) {
@@ -137,6 +141,6 @@ function createBlock(schema: Block.Schema, block: Data.Block): any {
 }
 
 function createCategory(key: string, schema: Category.Schema, block: Data.Block) {
-    const cat = block.categories[schema['@alias'] || key] || Data.Category.Empty;
-    return new _Category(cat, schema);
+    const cat = block.categories[schema['@alias'] || key];
+    return new _Category(cat || Data.Category.Empty, schema, !!cat);
 }

+ 7 - 1
src/data/spec/schema.spec.ts

@@ -25,7 +25,13 @@ function Field(values: any[]): Data.Field {
             for (let i = 0; i < count; i++) { ret[i] = values[startRow + i]; }
             return ret;
         },
-        toNumberArray: (startRow, endRowExclusive, ctor) => {
+        toIntArray: (startRow, endRowExclusive, ctor) => {
+            const count = endRowExclusive - startRow;
+            const ret = ctor(count) as any;
+            for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; }
+            return ret;
+        },
+        toFloatArray: (startRow, endRowExclusive, ctor) => {
             const count = endRowExclusive - startRow;
             const ret = ctor(count) as any;
             for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; }

+ 1 - 6
src/index.d.ts

@@ -4,9 +4,4 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-export { ParserResult, ParserError, ParserSuccess } from './parser'
-
-export { Category } from './relational/category'
-export { Column } from './relational/column'
-
-export { parse as groReader } from './reader/gro'
+// TODO: fix me

+ 1 - 4
src/index.ts

@@ -4,7 +4,4 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-export { Category } from './relational/category'
-export { Column } from './relational/column'
-
-export { parse as groReader } from './reader/gro'
+// TODO: fix me

+ 0 - 0
src/reader/common/data.ts


+ 115 - 0
src/reader/common/text/data.ts

@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import * as Data from '../../../data/data'
+import { parseInt as fastParseInt, parseFloat as fastParseFloat } from './number-parser'
+import { Tokens } from './tokenizer'
+import ShortStringPool from '../../../utils/short-string-pool'
+
+export function createCategory(data: string, fields: string[], tokens: Tokens, rowCount: number) {
+    const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices };
+
+    const categoryFields = Object.create(null);
+    for (let i = 0; i < fi.fieldCount; ++i) {
+        categoryFields[fields[i]] = TokenField(fi, i);
+    }
+    return Data.Category(rowCount, categoryFields);
+}
+
+export interface TokenFieldInfo {
+    data: string,
+    tokens: ArrayLike<number>,
+    fieldCount: number,
+    isCif?: boolean
+}
+
+export function TokenField(info: TokenFieldInfo, index: number): Data.Field {
+    const { data, tokens, fieldCount, isCif = false } = info;
+    const stringPool = ShortStringPool.create();
+
+    const str: Data.Field['str'] = isCif ? row => {
+        const i = (row * fieldCount + index) * 2;
+        const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
+        if (ret === '.' || ret === '?') return null;
+        return ret;
+    } : row => {
+        const i = (row * fieldCount + index) * 2;
+        return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
+    };
+
+    const int: Data.Field['int'] = row => {
+        const i = (row * fieldCount + index) * 2;
+        return fastParseInt(data, tokens[i], tokens[i + 1]) || 0;
+    };
+
+    const float: Data.Field['float'] = row => {
+        const i = (row * fieldCount + index) * 2;
+        return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0;
+    };
+
+    const presence: Data.Field['presence'] = isCif ? row => {
+        const i = 2 * (row * fieldCount + index);
+        const s = tokens[i];
+        if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present;
+        const v = data.charCodeAt(s);
+        if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
+        if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
+        return Data.ValuePresence.Present;
+    } : row => {
+        const i = 2 * (row * fieldCount + index);
+        return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present
+    };
+
+    return {
+        isDefined: true,
+        str,
+        int,
+        float,
+        value: str,
+        presence,
+        areValuesEqual: (rowA, rowB) => {
+            const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI];
+            const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI];
+            const len = tokens[aI + 1] - aS;
+            if (len !== tokens[bI + 1] - bS) return false;
+            for (let i = 0; i < len; i++) {
+                if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
+                    return false;
+                }
+            }
+            return true;
+        },
+        stringEquals: (row, value) => {
+            const aI = (row * fieldCount + index) * 2;
+            const s = tokens[aI];
+            if (!value) return presence(row) !== Data.ValuePresence.Present;
+            const len = value.length;
+            if (len !== tokens[aI + 1] - s) return false;
+            for (let i = 0; i < len; i++) {
+                if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
+            }
+            return true;
+        },
+        toStringArray: (startRow, endRowExclusive, ctor) => {
+            const count = endRowExclusive - startRow;
+            const ret = ctor(count) as any;
+            for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); }
+            return ret;
+        },
+        toIntArray: (startRow, endRowExclusive, ctor) => {
+            const count = endRowExclusive - startRow;
+            const ret = ctor(count) as any;
+            for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); }
+            return ret;
+        },
+        toFloatArray: (startRow, endRowExclusive, ctor) => {
+            const count = endRowExclusive - startRow;
+            const ret = ctor(count) as any;
+            for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); }
+            return ret;
+        }
+    }
+}

+ 0 - 0
src/utils/number-parser.ts → src/reader/common/text/number-parser.ts


+ 173 - 0
src/reader/common/text/tokenizer.ts

@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * mostly from https://github.com/dsehnal/CIFTools.js
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+export interface State<Info = any, TokenType = any> {
+    data: string
+
+    position: number
+    length: number
+
+    currentLineNumber: number
+    currentTokenStart: number
+    currentTokenEnd: number
+
+    currentTokenType: TokenType,
+
+    info: Info
+}
+
+export function State<Info, TokenType>(data: string, info?: Info, initialTokenType?: TokenType): State<Info, TokenType> {
+    return {
+        data,
+        position: 0,
+        length: data.length,
+        currentLineNumber: 1,
+        currentTokenStart: 0,
+        currentTokenEnd: 0,
+        currentTokenType: initialTokenType!,
+        info: info!
+    };
+}
+
+/**
+ * Eat everything until a newline occurs.
+ */
+export function eatLine(state: State) {
+    while (state.position < state.length) {
+        switch (state.data.charCodeAt(state.position)) {
+            case 10: // \n
+                state.currentTokenEnd = state.position
+                ++state.position
+                ++state.currentLineNumber
+                return
+            case 13: // \r
+                state.currentTokenEnd = state.position
+                ++state.position
+                ++state.currentLineNumber
+                if (state.data.charCodeAt(state.position) === 10) {
+                    ++state.position
+                }
+                return
+            default:
+                ++state.position
+        }
+    }
+    state.currentTokenEnd = state.position;
+}
+
+/**
+ * Eat everything until a whitespace/newline occurs.
+ */
+export function eatValue(state: State) {
+    while (state.position < state.length) {
+        switch (state.data.charCodeAt(state.position)) {
+            case 9:  // \t
+            case 10: // \n
+            case 13: // \r
+            case 32: // ' '
+                state.currentTokenEnd = state.position;
+                return;
+            default:
+                ++state.position;
+                break;
+        }
+    }
+    state.currentTokenEnd = state.position;
+}
+
+/**
+ * Skips all the whitespace - space, tab, newline, CR
+ * Handles incrementing line count.
+ */
+export function skipWhitespace(state: State): number {
+    let prev = 10;
+    while (state.position < state.length) {
+        let c = state.data.charCodeAt(state.position);
+        switch (c) {
+            case 9: // '\t'
+            case 32: // ' '
+                prev = c;
+                ++state.position;
+                break;
+            case 10: // \n
+                // handle \r\n
+                if (prev !== 13) {
+                    ++state.currentLineNumber;
+                }
+                prev = c;
+                ++state.position;
+                break;
+            case 13: // \r
+                prev = c;
+                ++state.position;
+                ++state.currentLineNumber;
+                break;
+            default:
+                return prev;
+        }
+    }
+    return prev;
+}
+
+/** Trims spaces and tabs */
+export function trim(state: State, start: number, end: number) {
+    const { data } = state;
+    let s = start, e = end - 1;
+
+    let c = data.charCodeAt(s);
+    while (c === 9 || c === 32) c = data.charCodeAt(++s);
+    c = data.charCodeAt(e);
+    while (c === 9 || c === 32) c = data.charCodeAt(--e);
+
+    state.currentTokenStart = s;
+    state.currentTokenEnd = e + 1;
+    state.position = end;
+}
+
+export interface Tokens {
+    indicesLenMinus2: number,
+    count: number,
+    indices: Uint32Array
+}
+
+export namespace Tokens {
+    function resize(tokens: Tokens) {
+        // scale the size using golden ratio, because why not.
+        const newBuffer = new Uint32Array((1.61 * tokens.indices.length) | 0);
+        newBuffer.set(tokens.indices);
+        tokens.indices = newBuffer;
+        tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0;
+    }
+
+    export function add(tokens: Tokens, start: number, end: number) {
+        if (tokens.count > tokens.indicesLenMinus2) {
+            resize(tokens);
+        }
+        tokens.indices[tokens.count++] = start;
+        tokens.indices[tokens.count++] = end;
+    }
+
+    export function addUnchecked(tokens: Tokens, start: number, end: number) {
+        tokens.indices[tokens.count++] = start;
+        tokens.indices[tokens.count++] = end;
+    }
+
+    export function create(size: number): Tokens {
+        return {
+            indicesLenMinus2: (size - 2) | 0,
+            count: 0,
+            indices: new Uint32Array(size)
+        }
+    }
+}
+
+
+/**
+ * A helper for building a typed array of token indices.
+ */
+export default Tokens

+ 0 - 266
src/reader/gro.ts

@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { parseInt } from '../utils/number-parser'
-import { eatLine, eatValue, skipWhitespace } from '../utils/helper'
-import { Tokens } from '../utils/tokens'
-import { TokenizerState } from '../utils/tokenizer-state'
-
-import { TextFile } from '../relational/text-file'
-import { TextBlock } from '../relational/text-block'
-import { TextCategory } from '../relational/text-category'
-
-import { ParserResult } from '../parser'
-
-/**
- * http://manual.gromacs.org/current/online/gro.html
- */
-
-export const GroCategories = {
-    'header': '',
-    'atoms': ''
-}
-
-// type GroCategories = keyof typeof GroCategories
-
-export const GroAtomBasicColumns = {
-    'residueNumber': '',
-    'residueName': '',
-    'atomName': '',
-    'atomNumber': '',
-    'x': '',
-    'y': '',
-    'z': ''
-}
-export type GroAtomBasicColumns = keyof typeof GroAtomBasicColumns
-
-export const GroAtomVelocityColumns = Object.assign({
-    'vx': '',
-    'vy': '',
-    'vz': ''
-}, GroAtomBasicColumns)
-export type GroAtomVelocityColumns = keyof typeof GroAtomVelocityColumns
-
-export const GroHeaderColumns = {
-    'title': '',
-    'timeInPs': '',
-    'numberOfAtoms': '',
-    'boxX': '',
-    'boxY': '',
-    'boxZ': ''
-}
-export type GroHeaderColumns = keyof typeof GroHeaderColumns
-
-export interface GroState extends TokenizerState {
-    numberOfAtoms: number
-    hasVelocities: boolean
-    numberOfDecimalPlaces: number
-}
-
-export function createTokenizer(data: string): GroState {
-    return {
-        data,
-
-        position: 0,
-        length: data.length,
-
-        currentLineNumber: 1,
-        currentTokenStart: 0,
-        currentTokenEnd: 0,
-
-        numberOfAtoms: 0,
-        hasVelocities: false,
-        numberOfDecimalPlaces: 3
-    };
-}
-
-/**
- * title string (free format string, optional time in ps after 't=')
- */
-function handleTitleString (state: GroState, tokens: Tokens) {
-    eatLine(state)
-    // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd))
-    let start = state.currentTokenStart
-    let end = state.currentTokenEnd
-    let valueStart = state.currentTokenStart
-    let valueEnd = start
-
-    while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd;
-
-    if (isTime(state.data, valueEnd)) {
-        let timeStart = valueEnd + 2
-
-        while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd;
-        Tokens.add(tokens, valueStart, valueEnd)  // title
-
-        while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart;
-        while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd;
-        Tokens.add(tokens, timeStart, end)  // time
-    } else {
-        Tokens.add(tokens, valueStart, valueEnd)  // title
-        Tokens.add(tokens, valueEnd, valueEnd)  // empty token for time
-    }
-}
-
-function isSpaceOrComma(data: string, position: number): boolean {
-    const c = data.charCodeAt(position);
-    return c === 32 || c === 44
-}
-
-function isTime(data: string, position: number): boolean {
-    // T/t
-    const c = data.charCodeAt(position);
-    if (c !== 84 && c !== 116) return false;
-    // =
-    if (data.charCodeAt(position + 1) !== 61) return false;
-
-    return true;
-}
-
-// function isDot(state: TokenizerState): boolean {
-//     // .
-//     if (state.data.charCodeAt(state.currentTokenStart) !== 46) return false;
-
-//     return true;
-// }
-
-// function numberOfDecimalPlaces (state: TokenizerState) {
-//     // var ndec = firstLines[ 2 ].length - firstLines[ 2 ].lastIndexOf('.') - 1
-//     const start = state.currentTokenStart
-//     const end = state.currentTokenEnd
-//     for (let i = end; start < i; --i) {
-//         // .
-//         if (state.data.charCodeAt(i) === 46) return end - start - i
-//     }
-//     throw new Error('Could not determine number of decimal places')
-// }
-
-/**
- * number of atoms (free format integer)
- */
-function handleNumberOfAtoms (state: GroState, tokens: Tokens) {
-    skipWhitespace(state)
-    state.currentTokenStart = state.position
-    eatValue(state)
-    state.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd)
-    Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd)
-    eatLine(state)
-}
-
-// function checkForVelocities (state: GroState) {
-
-// }
-
-/**
- * This format is fixed, ie. all columns are in a fixed position.
- * Optionally (for now only yet with trjconv) you can write gro files
- * with any number of decimal places, the format will then be n+5
- * positions with n decimal places (n+1 for velocities) in stead
- * of 8 with 3 (with 4 for velocities). Upon reading, the precision
- * will be inferred from the distance between the decimal points
- * (which will be n+5). Columns contain the following information
- * (from left to right):
- *     residue number (5 positions, integer)
- *     residue name (5 characters)
- *     atom name (5 characters)
- *     atom number (5 positions, integer)
- *     position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
- *     velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
- */
-function handleAtoms (state: GroState, block: TextBlock) {
-    console.log('MOINMOIN')
-    const name = 'atoms'
-
-    const columns = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ]
-    if (state.hasVelocities) {
-        columns.push('vx', 'vy', 'vz')
-    }
-    const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ]
-
-    const columnCount = columns.length
-    const tokens = Tokens.create(state.numberOfAtoms * 2 * columnCount)
-
-    let start: number
-    let end: number
-    let valueStart: number
-    let valueEnd: number = state.position
-
-    for (let i = 0; i < state.numberOfAtoms; ++i) {
-        state.currentTokenStart = state.position
-        end = state.currentTokenStart
-        for (let j = 0; j < columnCount; ++j) {
-            start = end
-            end = start + fieldSizes[j]
-
-            // trim
-            valueStart = start
-            valueEnd = end
-            while (valueStart < valueEnd && state.data.charCodeAt(valueStart) === 32) ++valueStart;
-            while (valueEnd > valueStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd;
-
-            Tokens.addUnchecked(tokens, valueStart, valueEnd)
-        }
-        state.position = valueEnd
-        eatLine(state)
-    }
-
-    block.addCategory(new TextCategory(state.data, name, columns, tokens));
-}
-
-/**
- * box vectors (free format, space separated reals), values:
- * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
- * the last 6 values may be omitted (they will be set to zero).
- * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
- */
-function handleBoxVectors (state: GroState, tokens: Tokens) {
-    // just read the first three values, ignore any remaining
-    for (let i = 0; i < 3; ++i) {
-        skipWhitespace(state)
-        state.currentTokenStart = state.position
-        eatValue(state)
-        Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd)
-    }
-}
-
-/**
- * Creates an error result.
- */
-// function error(line: number, message: string) {
-//     return ParserResult.error<TextFile>(message, line);
-// }
-
-/**
- * Creates a data result.
- */
-function result(data: TextFile) {
-    return ParserResult.success(data);
-}
-
-function parseInternal(data: string): ParserResult<TextFile> {
-    const state = createTokenizer(data)
-    const file = new TextFile(data)
-    file.blocks
-
-    let block = new TextBlock(data)
-    file.blocks.push(block)
-
-    const headerColumns = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']
-    const headerTokens = Tokens.create(2 * headerColumns.length)
-    let header = new TextCategory(state.data, 'header', headerColumns, headerTokens)
-    block.addCategory(header)
-
-    handleTitleString(state, headerTokens)
-    handleNumberOfAtoms(state, headerTokens)
-    handleAtoms(state, block)
-    handleBoxVectors(state, headerTokens)
-
-    return result(file);
-}
-
-export function parse(data: string) {
-    return parseInternal(data);
-}

+ 7 - 3
src/relational/file.ts → src/reader/gro/index.ts

@@ -2,9 +2,13 @@
  * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-export interface File<T> {
-    blocks: T[];
-}
+import schema from './schema'
+import parse from './parser'
 
+export default {
+    parse,
+    schema
+};

+ 163 - 0
src/reader/gro/parser.ts

@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { State as TokenizerState, Tokens, eatLine, skipWhitespace, eatValue, trim } from '../common/text/tokenizer'
+import { parseInt } from '../common/text/number-parser'
+import { createCategory } from '../common/text/data'
+import * as Data from '../../data/data'
+import Result from '../result'
+
+interface StateInfo {
+    numberOfAtoms: number
+    hasVelocities: boolean
+    numberOfDecimalPlaces: number
+}
+
+type State = TokenizerState<StateInfo>
+
+function createState(data: string): State {
+    return TokenizerState(data, { numberOfAtoms: 0, hasVelocities: false, numberOfDecimalPlaces: 3 });
+}
+
+/**
+ * title string (free format string, optional time in ps after 't=')
+ */
+function handleTitleString(state: State, tokens: Tokens) {
+    eatLine(state)
+    // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd))
+    let start = state.currentTokenStart
+    let end = state.currentTokenEnd
+    let valueStart = state.currentTokenStart
+    let valueEnd = start
+
+    while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd;
+
+    if (isTime(state.data, valueEnd)) {
+        let timeStart = valueEnd + 2
+
+        while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd;
+        Tokens.add(tokens, valueStart, valueEnd)  // title
+
+        while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart;
+        while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd;
+        Tokens.add(tokens, timeStart, end)  // time
+    } else {
+        Tokens.add(tokens, valueStart, valueEnd)  // title
+        Tokens.add(tokens, valueEnd, valueEnd)  // empty token for time
+    }
+}
+
+function isSpaceOrComma(data: string, position: number): boolean {
+    const c = data.charCodeAt(position);
+    return c === 32 || c === 44
+}
+
+function isTime(data: string, position: number): boolean {
+    // T/t
+    const c = data.charCodeAt(position);
+    if (c !== 84 && c !== 116) return false;
+    // =
+    if (data.charCodeAt(position + 1) !== 61) return false;
+
+    return true;
+}
+
+/**
+ * number of atoms (free format integer)
+ */
+function handleNumberOfAtoms(state: State, tokens: Tokens) {
+    skipWhitespace(state)
+    state.currentTokenStart = state.position
+    eatValue(state)
+    state.info.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd)
+    Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd)
+    eatLine(state)
+}
+
+/**
+ * This format is fixed, ie. all columns are in a fixed position.
+ * Optionally (for now only yet with trjconv) you can write gro files
+ * with any number of decimal places, the format will then be n+5
+ * positions with n decimal places (n+1 for velocities) in stead
+ * of 8 with 3 (with 4 for velocities). Upon reading, the precision
+ * will be inferred from the distance between the decimal points
+ * (which will be n+5). Columns contain the following information
+ * (from left to right):
+ *     residue number (5 positions, integer)
+ *     residue name (5 characters)
+ *     atom name (5 characters)
+ *     atom number (5 positions, integer)
+ *     position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
+ *     velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
+ */
+function handleAtoms(state: State) {
+    const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ];
+    const fields = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ]
+    if (state.info.hasVelocities) {
+        fields.push('vx', 'vy', 'vz')
+    }
+
+    const fieldCount = fields.length
+    const tokens = Tokens.create(state.info.numberOfAtoms * 2 * fieldCount)
+
+    let start: number;
+    let end: number;
+
+    for (let i = 0, _i = state.info.numberOfAtoms; i < _i; ++i) {
+        state.currentTokenStart = state.position;
+        end = state.currentTokenStart;
+        for (let j = 0; j < fieldCount; ++j) {
+            start = end;
+            end = start + fieldSizes[j];
+
+            trim(state, start, end);
+            Tokens.addUnchecked(tokens, state.currentTokenStart, state.currentTokenEnd);
+        }
+        eatLine(state)
+    }
+
+    return createCategory(state.data, fields, tokens, state.info.numberOfAtoms);
+}
+
+/**
+ * box vectors (free format, space separated reals), values:
+ * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
+ * the last 6 values may be omitted (they will be set to zero).
+ * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
+ */
+function handleBoxVectors(state: State, tokens: Tokens) {
+    // just read the first three values, ignore any remaining
+    for (let i = 0; i < 3; ++i) {
+        skipWhitespace(state);
+        state.currentTokenStart = state.position;
+        eatValue(state);
+        Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd);
+    }
+}
+
+function parseInternal(data: string): Result<Data.File> {
+    const state = createState(data);
+
+    const headerFields = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ'];
+    const headerTokens = Tokens.create(2 * headerFields.length);
+
+    handleTitleString(state, headerTokens);
+    handleNumberOfAtoms(state, headerTokens);
+    const atoms = handleAtoms(state);
+    handleBoxVectors(state, headerTokens);
+
+    const block = Data.Block({
+        header: createCategory(data, headerFields, headerTokens, 1),
+        atoms
+    });
+
+    return Result.success(Data.File([block]));
+}
+
+export default function parse(data: string) {
+    return parseInternal(data);
+}

+ 40 - 0
src/reader/gro/schema.ts

@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import * as Schema from '../../data/schema'
+import * as Data from '../../data/data'
+
+const str = Schema.Field.str()
+const int = Schema.Field.int()
+const float = Schema.Field.float()
+
+const header = {
+    'title': str,
+    'timeInPs': float,
+    'numberOfAtoms': int,
+    'boxX': float,
+    'boxY': float,
+    'boxZ': float
+}
+
+const atoms = {
+    'residueNumber': int,
+    'residueName': str,
+    'atomName': str,
+    'atomNumber': int,
+    'x': float,
+    'y': float,
+    'z': float,
+    'vx': float,
+    'vy': float,
+    'vz': float
+}
+
+const schema = { header, atoms };
+export default function (block: Data.Block) {
+    return Schema.apply(schema, block);
+}

+ 11 - 9
src/parser.ts → src/reader/result.ts

@@ -5,19 +5,19 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-export type ParserResult<T> = ParserSuccess<T> | ParserError
+type ReaderResult<T> = Success<T> | Error
 
-export namespace ParserResult {
-    export function error<T>(message: string, line = -1): ParserResult<T> {
-        return new ParserError(message, line);
+namespace ReaderResult {
+    export function error<T>(message: string, line = -1): ReaderResult<T> {
+        return new Error(message, line);
     }
 
-    export function success<T>(result: T, warnings: string[] = []): ParserResult<T> {
-        return new ParserSuccess<T>(result, warnings);
+    export function success<T>(result: T, warnings: string[] = []): ReaderResult<T> {
+        return new Success<T>(result, warnings);
     }
 }
 
-export class ParserError {
+export class Error {
     isError: true = true;
 
     toString() {
@@ -33,8 +33,10 @@ export class ParserError {
     }
 }
 
-export class ParserSuccess<T> {
+export class Success<T> {
     isError: false = false;
 
     constructor(public result: T, public warnings: string[]) { }
-}
+}
+
+export default ReaderResult

+ 45 - 26
src/reader/spec/gro.spec.ts

@@ -2,10 +2,10 @@
  * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { parse } from '../gro'
-// import { Category } from '../../relational/category'
+import Gro from '../gro/index'
 
 const groString = `MD of 2 waters, t= 4.2
     6
@@ -26,52 +26,71 @@ const groStringHighPrecision = `Generated by trjconv : 2168 system t=  15.00000
 
 describe('gro reader', () => {
     it('basic', () => {
-        const parsed = parse(groString)
+        const parsed = Gro.parse(groString)
 
         if (parsed.isError) {
             console.log(parsed)
         } else {
-            const groFile = parsed.result
+            const groFile = parsed.result;
+            const data = Gro.schema(groFile.blocks[0]);
 
-            const header = groFile.blocks[0].getCategory('header')
-            if (header) {
-                expect(header.columnNames).toEqual(['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ'])
+            const { header, atoms } = data;
+            if (header._isDefined) {
+                expect(header.title.value(0)).toBe('MD of 2 waters')
+                expect(header.timeInPs.value(0)).toBeCloseTo(4.2)
+                expect(header.numberOfAtoms.value(0)).toBe(6)
 
-                expect(header.getColumn('title').getString(0)).toBe('MD of 2 waters')
-                expect(header.getColumn('timeInPs').getFloat(0)).toBeCloseTo(4.2)
-                expect(header.getColumn('numberOfAtoms').getInteger(0)).toBe(6)
-
-                expect(header.getColumn('boxX').getFloat(0)).toBeCloseTo(1.82060)
-                expect(header.getColumn('boxY').getFloat(0)).toBeCloseTo(1.82060)
-                expect(header.getColumn('boxZ').getFloat(0)).toBeCloseTo(1.82060)
+                expect(header.boxX.value(0)).toBeCloseTo(1.82060)
+                expect(header.boxY.value(0)).toBeCloseTo(1.82060)
+                expect(header.boxZ.value(0)).toBeCloseTo(1.82060)
             } else {
                 console.error('no header')
             }
+
+            if (atoms._rowCount === 6)  {
+                expect(atoms.x.value(0)).toBeCloseTo(0.126);
+                expect(atoms.y.value(0)).toBeCloseTo(1.624);
+                expect(atoms.z.value(0)).toBeCloseTo(1.679);
+
+                // TODO: check velocities when they are parsed.
+            } else {
+                console.error('no atoms');
+            }
         }
     })
 
     it('high precision', () => {
-        const parsed = parse(groStringHighPrecision)
+        const parsed = Gro.parse(groStringHighPrecision)
 
         if (parsed.isError) {
             console.log(parsed)
         } else {
-            const groFile = parsed.result
+            const groFile = parsed.result;
+            const data = Gro.schema(groFile.blocks[0]);
 
-            const header = groFile.blocks[0].getCategory('header')
-            if (header) {
-                expect(header.columnNames).toEqual(['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ'])
+            const { header, atoms } = data;
+            if (header._isDefined) {
+                expect(header.title.value(0)).toBe('Generated by trjconv : 2168 system')
+                expect(header.timeInPs.value(0)).toBeCloseTo(15)
+                expect(header.numberOfAtoms.value(0)).toBe(3)
 
-                expect(header.getColumn('title').getString(0)).toBe('Generated by trjconv : 2168 system')
-                expect(header.getColumn('timeInPs').getFloat(0)).toBeCloseTo(15)
-                expect(header.getColumn('numberOfAtoms').getInteger(0)).toBe(3)
-
-                expect(header.getColumn('boxX').getFloat(0)).toBeCloseTo(1.82060)
-                expect(header.getColumn('boxY').getFloat(0)).toBeCloseTo(1.82060)
-                expect(header.getColumn('boxZ').getFloat(0)).toBeCloseTo(1.82060)
+                expect(header.boxX.value(0)).toBeCloseTo(1.82060)
+                expect(header.boxY.value(0)).toBeCloseTo(1.82060)
+                expect(header.boxZ.value(0)).toBeCloseTo(1.82060)
             } else {
                 console.error('no header')
             }
+
+            if (atoms._rowCount === 3)  {
+                // TODO: test when high-prec parser is available
+                // expect(atoms.x.value(1)).toBeCloseTo(0.015804, 0.00001);
+                // expect(atoms.y.value(1)).toBeCloseTo(2.716597, 0.00001);
+                // expect(atoms.z.value(1)).toBeCloseTo(1.460588, 0.00001);
+
+                // TODO: check velocities when they are parsed.
+            } else {
+                console.error('no atoms');
+            }
         }
     })
 });

+ 0 - 24
src/relational/block.ts

@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { Category, UndefinedCategory } from './category'
-
-export abstract class Block<T> {
-    abstract getCategory(name: string): T|undefined
-    abstract addCategory(category: T): void
-
-    getCategoriesFromSchema<T extends object> (schema: T) {
-        return BlockCategories(this, schema)
-    }
-}
-
-export type BlockCategories<Categories extends string> = { readonly [name in Categories]: Category }
-export function BlockCategories<T extends object>(block: Block<any> | undefined, categories: T): BlockCategories<keyof T> {
-    const ret = Object.create(null);
-    if (!block) for (const c of Object.keys(categories)) ret[c] = UndefinedCategory;
-    else for (const c of Object.keys(categories)) ret[c] = block.getCategory(c);
-    return ret;
-}

+ 0 - 61
src/relational/category.ts

@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- */
-
-import { Column, UndefinedColumn } from './column'
-
-/**
- * Represents a tabular category with multiple fields represented as columns.
- *
- * Example:
- * _category.field1
- * _category.field2
- * ...
- */
-export abstract class Category {
-    name: string;
-    rowCount: number;
-    columnCount: number;
-    columnNames: string[];
-
-    /**
-     * If a field with the given name is not present, returns UndefinedColumn.
-     *
-     * Columns are accessed by their field name only, i.e.
-     * _category.field is accessed by
-     * category.getColumn('field')
-     *
-     * Note that columns are created on demand and there is some computational
-     * cost when creating a new column. Therefore, if you need to reuse a column,
-     * it is a good idea to cache it.
-     */
-    abstract getColumn(name: string): Column;
-
-    getColumnsFromSchema<T extends object> (schema: T) {
-        return CategoryColumns(this, schema)
-    }
-}
-
-/**
- * Represents a category that is not present.
- */
-class _UndefinedCategory extends Category {  // tslint:disable-line:class-name
-    name: ''
-    rowCount = 0
-    columnCount = 0
-    columnNames = []
-    getColumn(name: string) { return UndefinedColumn }
-}
-export const UndefinedCategory = new _UndefinedCategory() as Category;
-
-
-export type CategoryColumns<Columns extends string> = { readonly [name in Columns]: Column }
-export function CategoryColumns<T extends object>(category: Category | undefined, columns: T): CategoryColumns<keyof T> {
-    const ret = Object.create(null);
-    if (!category) for (const c of Object.keys(columns)) ret[c] = UndefinedColumn;
-    else for (const c of Object.keys(columns)) ret[c] = category.getColumn(c);
-    return ret;
-}

+ 0 - 38
src/relational/column.ts

@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- */
-
-import { ValuePresence } from './constants'
-
-/**
- * A columns represents a single field of a CIF category.
- */
-export interface Column {
-    isDefined: boolean;
-
-    getString(row: number): string | null;
-    getInteger(row: number): number;
-    getFloat(row: number): number;
-
-    getValuePresence(row: number): ValuePresence;
-
-    areValuesEqual(rowA: number, rowB: number): boolean;
-    stringEquals(row: number, value: string): boolean;
-}
-
-/**
- * Represents a column that is not present.
- */
-class _UndefinedColumn implements Column {  // tslint:disable-line:class-name
-    isDefined = false;
-    getString(row: number): string | null { return null; };
-    getInteger(row: number): number { return 0; }
-    getFloat(row: number): number { return 0.0; }
-    getValuePresence(row: number): ValuePresence { return ValuePresence.NotSpecified; }
-    areValuesEqual(rowA: number, rowB: number): boolean { return true; }
-    stringEquals(row: number, value: string): boolean { return value === null; }
-}
-export const UndefinedColumn = new _UndefinedColumn() as Column;

+ 0 - 12
src/relational/constants.ts

@@ -1,12 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- */
-
-export const enum ValuePresence {
-    Present = 0,
-    NotSpecified = 1,
-    Unknown = 2
-}

+ 0 - 39
src/relational/text-block.ts

@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { Block } from './block'
-import { TextCategory } from './text-category'
-
-export class TextBlock extends Block<TextCategory> {
-    private categoryMap: Map<string, TextCategory>;
-    private categoryList: TextCategory[];
-
-    data: string;
-
-    /**
-     * Gets a category by its name.
-     */
-    getCategory(name: string) {
-        return this.categoryMap.get(name);
-    }
-
-    /**
-     * Adds a category.
-     */
-    addCategory(category: TextCategory) {
-        this.categoryList[this.categoryList.length] = category;
-        this.categoryMap.set(category.name, category);
-    }
-
-    constructor(data: string) {
-        super()
-
-        this.data = data;
-
-        this.categoryMap = new Map()
-        this.categoryList = []
-    }
-}

+ 0 - 99
src/relational/text-category.ts

@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { Category } from './category'
-import { UndefinedColumn } from './column'
-import { TextColumn, CifColumn } from './text-column'
-
-import { Tokens } from '../utils/tokens'
-
-/**
- * Represents a category backed by a string.
- */
-export class TextCategory extends Category {
-    protected data: string;
-    protected columnNameList: string[];
-    protected columnIndices: Map<string, number>;
-
-    /**
-     * Name of the category.
-     */
-    name: string;
-
-    /**
-     * The array of columns.
-     */
-    get columnNames() {
-        return this.columnNameList;
-    }
-
-    /**
-     * Number of columns in the category.
-     */
-    columnCount: number;
-
-    /**
-     * Number of rows in the category.
-     */
-    rowCount: number;
-
-    /**
-     * Pairs of (start at index 2 * i, end at index 2 * i + 1) indices to the data string.
-     * The "end" character is not included (for it's iterated as for (i = start; i < end; i++)).
-     */
-    indices: Int32Array;
-
-    /**
-     * Get a column object that makes accessing data easier.
-     */
-    getColumn(name: string): TextColumn {
-        let i = this.columnIndices.get(name);
-        if (i !== void 0) return new TextColumn(this, this.data, name, i);
-        return UndefinedColumn as TextColumn;
-    }
-
-    initColumns(columns: string[]): void {
-        this.columnIndices = new Map<string, number>();
-        this.columnNameList = [];
-        for (let i = 0; i < columns.length; i++) {
-            this.columnIndices.set(columns[i], i);
-            this.columnNameList.push(columns[i]);
-        }
-    }
-
-    constructor(data: string, name: string, columns: string[], tokens: Tokens) {
-        super()
-
-        this.name = name;
-        this.indices = tokens.indices;
-        this.data = data;
-
-        this.columnCount = columns.length;
-        this.rowCount = (tokens.count / 2 / columns.length) | 0;
-
-        this.initColumns(columns)
-    }
-}
-
-export class CifCategory extends TextCategory {
-    getColumn(name: string): CifColumn {
-        let i = this.columnIndices.get(name);
-        if (i !== void 0) return new CifColumn(this, this.data, name, i);
-        return UndefinedColumn as CifColumn;
-    }
-
-    initColumns(columns: string[]): void {
-        this.columnIndices = new Map<string, number>();
-        this.columnNameList = [];
-        for (let i = 0; i < columns.length; i++) {
-            let colName = columns[i].substr(this.name.length + 1);
-            this.columnIndices.set(colName, i);
-            this.columnNameList.push(colName);
-        }
-    }
-}

+ 0 - 120
src/relational/text-column.ts

@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { Column } from './column'
-import { ValuePresence } from './constants'
-import { TextCategory } from './text-category'
-
-import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../utils/number-parser'
-import { ShortStringPool } from '../utils/short-string-pool'
-
-/**
- * Represents a single column.
- */
-export class TextColumn implements Column {
-
-    protected indices: Int32Array;
-    protected columnCount: number;
-    protected rowCount: number;
-    protected stringPool = ShortStringPool.create();
-
-    isDefined = true;
-
-    /**
-     * Returns the string value at given row.
-     */
-    getString(row: number): string | null {
-        let i = (row * this.columnCount + this.index) * 2;
-        return ShortStringPool.get(this.stringPool, this.data.substring(this.indices[i], this.indices[i + 1]));
-    }
-
-    /**
-     * Returns the integer value at given row.
-     */
-    getInteger(row: number): number {
-        let i = (row * this.columnCount + this.index) * 2;
-        return fastParseInt(this.data, this.indices[i], this.indices[i + 1]);
-    }
-
-    /**
-     * Returns the float value at given row.
-     */
-    getFloat(row: number): number {
-        let i = (row * this.columnCount + this.index) * 2;
-        return fastParseFloat(this.data, this.indices[i], this.indices[i + 1]);
-    }
-
-    /**
-     * Returns true if the token has the specified string value.
-     */
-    stringEquals(row: number, value: string) {
-        let aIndex = (row * this.columnCount + this.index) * 2,
-            s = this.indices[aIndex],
-            len = value.length;
-        if (len !== this.indices[aIndex + 1] - s) return false;
-        for (let i = 0; i < len; i++) {
-            if (this.data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
-        }
-        return true;
-    }
-
-    /**
-     * Determines if values at the given rows are equal.
-     */
-    areValuesEqual(rowA: number, rowB: number): boolean {
-        const aIndex = (rowA * this.columnCount + this.index) * 2
-        const bIndex = (rowB * this.columnCount + this.index) * 2
-        const aS = this.indices[aIndex]
-        const bS = this.indices[bIndex]
-        const len = this.indices[aIndex + 1] - aS
-        if (len !== this.indices[bIndex + 1] - bS) return false;
-        for (let i = 0; i < len; i++) {
-            if (this.data.charCodeAt(i + aS) !== this.data.charCodeAt(i + bS)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    getValuePresence(row: number): ValuePresence {
-        let index = 2 * (row * this.columnCount + this.index);
-        if (this.indices[index] === this.indices[index + 1]) {
-            return ValuePresence.NotSpecified
-        }
-        return ValuePresence.Present
-    }
-
-    constructor(table: TextCategory, protected data: string, public name: string, public index: number) {
-        this.indices = table.indices;
-        this.columnCount = table.columnCount;
-    }
-}
-
-export class CifColumn extends TextColumn {
-    /**
-     * Returns the string value at given row.
-     */
-    getString(row: number): string | null {
-        let ret = super.getString(row)
-        if (ret === '.' || ret === '?') return null;
-        return ret;
-    }
-
-    /**
-     * Returns true if the value is not defined (. or ? token).
-     */
-    getValuePresence(row: number): ValuePresence {
-        let index = 2 * (row * this.columnCount + this.index);
-        let s = this.indices[index];
-        if (this.indices[index + 1] - s !== 1) return ValuePresence.Present;
-        let v = this.data.charCodeAt(s);
-        if (v === 46 /* . */) return ValuePresence.NotSpecified;
-        if (v === 63 /* ? */) return ValuePresence.Unknown;
-        return ValuePresence.Present;
-    }
-}

+ 0 - 17
src/relational/text-file.ts

@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-import { File } from './file'
-import { TextBlock } from './text-block'
-
-export class TextFile implements File<TextBlock> {
-    data: string;
-    blocks: TextBlock[] = [];
-
-    constructor(data: string) {
-        this.data = data;
-    }
-}

+ 24 - 52
src/script.ts

@@ -7,33 +7,12 @@
 // import * as util from 'util'
 import * as fs from 'fs'
 
-import { parse, GroCategories, GroAtomBasicColumns } from './reader/gro'
-import { Category } from './relational/category'
+import Gro from './reader/gro/index'
 
-const file = '1crn.gro'
+//const file = '1crn.gro'
 // const file = 'water.gro'
 // const file = 'test.gro'
-// const file = 'md_1u19_trj.gro'
-
-function getFloatArray(category: Category, name: string) {
-    const column = category.getColumn(name)
-    const n = category.rowCount
-    const array = new Float32Array(n)
-    for (let i = 0; i < n; ++i) {
-        array[i] = column.getFloat(i)
-    }
-    return array
-}
-
-function getIntArray(category: Category, name: string) {
-    const column = category.getColumn(name)
-    const n = category.rowCount
-    const array = new Int32Array(n)
-    for (let i = 0; i < n; ++i) {
-        array[i] = column.getInteger(i)
-    }
-    return array
-}
+const file = 'md_1u19_trj.gro'
 
 fs.readFile(`./examples/${file}`, 'utf8', function (err,data) {
     if (err) {
@@ -42,61 +21,54 @@ fs.readFile(`./examples/${file}`, 'utf8', function (err,data) {
     // console.log(data);
 
     console.time('parse')
-    const parsed = parse(data)
+    const parsed = Gro.parse(data)
     console.timeEnd('parse')
     if (parsed.isError) {
         console.log(parsed)
     } else {
         const groFile = parsed.result
-        const categories = groFile.blocks[0].getCategoriesFromSchema(GroCategories)
+        const data = Gro.schema(groFile.blocks[0])
 
         // const header = groFile.blocks[0].getCategory('header')
-        const header = categories.header
-        if (header) {
-            console.log(header.columnNames)
-
-            console.log('title', header.getColumn('title').getString(0))
-            console.log('timeInPs', header.getColumn('timeInPs').getFloat(0))
-            console.log('numberOfAtoms', header.getColumn('numberOfAtoms').getInteger(0))
-            console.log('boxX', header.getColumn('boxX').getFloat(0))
-            console.log('boxY', header.getColumn('boxY').getFloat(0))
-            console.log('boxZ', header.getColumn('boxZ').getFloat(0))
+        const { header, atoms } = data;
+        if (header._rowCount !== 1) {
+            console.log('title', header.title.value(0))
+            console.log('timeInPs', header.timeInPs.value(0))
+            console.log('numberOfAtoms', header.numberOfAtoms.value(0))
+            console.log('boxX', header.boxX.value(0))
+            console.log('boxY', header.boxY.value(0))
+            console.log('boxZ', header.boxZ.value(0))
         } else {
             console.error('no header')
         }
 
-        const atoms = categories.atoms
-        if (atoms) {
-            console.log(atoms.columnNames)
-
-            const columns = atoms.getColumnsFromSchema(GroAtomBasicColumns)
-
-            console.log(`'${columns.residueNumber.getString(1)}'`)
-            console.log(`'${columns.residueName.getString(1)}'`)
-            console.log(`'${columns.atomName.getString(1)}'`)
-            console.log(columns.z.getFloat(1))
-            console.log(`'${columns.z.getString(1)}'`)
+        if (atoms._rowCount > 0) {
+            console.log(`'${atoms.residueNumber.value(1)}'`)
+            console.log(`'${atoms.residueName.value(1)}'`)
+            console.log(`'${atoms.atomName.value(1)}'`)
+            console.log(atoms.z.value(1))
+            console.log(`'${atoms.z.value(1)}'`)
 
-            const n = atoms.rowCount
+            const n = atoms._rowCount
             console.log('rowCount', n)
 
             console.time('getFloatArray x')
-            const x = getFloatArray(atoms, 'x')
+            const x = atoms.x.toArray(0, n, x => new Float32Array(x))!
             console.timeEnd('getFloatArray x')
             console.log(x.length, x[0], x[x.length-1])
 
             console.time('getFloatArray y')
-            const y = getFloatArray(atoms, 'y')
+            const y = atoms.y.toArray(0, n, x => new Float32Array(x))!
             console.timeEnd('getFloatArray y')
             console.log(y.length, y[0], y[y.length-1])
 
             console.time('getFloatArray z')
-            const z = getFloatArray(atoms, 'z')
+            const z = atoms.z.toArray(0, n, x => new Float32Array(x))!
             console.timeEnd('getFloatArray z')
             console.log(z.length, z[0], z[z.length-1])
 
             console.time('getIntArray residueNumber')
-            const residueNumber = getIntArray(atoms, 'residueNumber')
+            const residueNumber = atoms.residueNumber.toArray(0, n, x => new Int32Array(x))!
             console.timeEnd('getIntArray residueNumber')
             console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1])
         } else {

+ 0 - 28
src/utils/chunked-array.ts

@@ -113,34 +113,6 @@ export namespace ChunkedArray {
         return ret as any;
     }
 
-    export function forVertex3D(chunkVertexCount: number = 262144): ChunkedArray<number> {
-        return create<number>(size => new Float32Array(size) as any, chunkVertexCount, 3)
-    }
-
-    export function forIndexBuffer(chunkIndexCount: number = 262144): ChunkedArray<number> {
-        return create<number>(size => new Uint32Array(size) as any, chunkIndexCount, 3)
-    }
-
-    export function forTokenIndices(chunkTokenCount: number = 131072): ChunkedArray<number> {
-        return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 2)
-    }
-
-    export function forIndices(chunkTokenCount: number = 131072): ChunkedArray<number> {
-        return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 1)
-    }
-
-    export function forInt32(chunkSize: number = 131072): ChunkedArray<number> {
-        return create<number>(size => new Int32Array(size) as any, chunkSize, 1)
-    }
-
-    export function forFloat32(chunkSize: number = 131072): ChunkedArray<number> {
-        return create<number>(size => new Float32Array(size) as any, chunkSize, 1)
-    }
-
-    export function forArray<T>(chunkSize: number = 131072): ChunkedArray<T> {
-        return create<T>(size => [] as any, chunkSize, 1)
-    }
-
     export function create<T>(creator: (size: number) => any, chunkElementCount: number, elementSize: number): ChunkedArray<T> {
         chunkElementCount = chunkElementCount | 0;
         if (chunkElementCount <= 0) chunkElementCount = 1;

+ 0 - 88
src/utils/helper.ts

@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- */
-
-import { TokenizerState } from './tokenizer-state'
-
-/**
- * Eat everything until a newline occurs.
- */
-export function eatLine(state: TokenizerState) {
-    while (state.position < state.length) {
-        switch (state.data.charCodeAt(state.position)) {
-            case 10: // \n
-                state.currentTokenEnd = state.position
-                ++state.position
-                ++state.currentLineNumber
-                return
-            case 13: // \r
-                state.currentTokenEnd = state.position
-                ++state.position
-                ++state.currentLineNumber
-                if (state.data.charCodeAt(state.position) === 10) {
-                    ++state.position
-                }
-                return
-            default:
-                ++state.position
-        }
-    }
-    state.currentTokenEnd = state.position;
-}
-
-/**
- * Eat everything until a whitespace/newline occurs.
- */
-export function eatValue(state: TokenizerState) {
-    while (state.position < state.length) {
-        switch (state.data.charCodeAt(state.position)) {
-            case 9:  // \t
-            case 10: // \n
-            case 13: // \r
-            case 32: // ' '
-                state.currentTokenEnd = state.position;
-                return;
-            default:
-                ++state.position;
-                break;
-        }
-    }
-    state.currentTokenEnd = state.position;
-}
-
-/**
- * Skips all the whitespace - space, tab, newline, CR
- * Handles incrementing line count.
- */
-export function skipWhitespace(state: TokenizerState): number {
-    let prev = 10;
-    while (state.position < state.length) {
-        let c = state.data.charCodeAt(state.position);
-        switch (c) {
-            case 9: // '\t'
-            case 32: // ' '
-                prev = c;
-                ++state.position;
-                break;
-            case 10: // \n
-                // handle \r\n
-                if (prev !== 13) {
-                    ++state.currentLineNumber;
-                }
-                prev = c;
-                ++state.position;
-                break;
-            case 13: // \r
-                prev = c;
-                ++state.position;
-                ++state.currentLineNumber;
-                break;
-            default:
-                return prev;
-        }
-    }
-    return prev;
-}

+ 4 - 2
src/utils/short-string-pool.ts

@@ -9,8 +9,8 @@
  * This ensures there is only 1 instance of a short string.
  * Also known as string interning, see https://en.wikipedia.org/wiki/String_interning
  */
-export type ShortStringPool = { [key: string]: string }
-export namespace ShortStringPool {
+interface ShortStringPool { [key: string]: string }
+namespace ShortStringPool {
     export function create(): ShortStringPool { return Object.create(null); }
     export function get(pool: ShortStringPool, str: string) {
         if (str.length > 6) return str;
@@ -20,3 +20,5 @@ export namespace ShortStringPool {
         return str;
     }
 }
+
+export default ShortStringPool;

+ 0 - 20
src/utils/tokenizer-state.ts

@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-export interface TokenizerState {
-    data: string
-
-    position: number
-    length: number
-
-    currentLineNumber: number
-    currentTokenStart: number
-    currentTokenEnd: number
-
-    currentTokenType?: number
-}

+ 0 - 47
src/utils/tokens.ts

@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
- *
- * from https://github.com/dsehnal/CIFTools.js
- * @author David Sehnal <david.sehnal@gmail.com>
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- */
-
-/**
- * A helper for building a typed array of token indices.
- */
-export interface Tokens {
-    indicesLenMinus2: number,
-    count: number,
-    indices: Int32Array
-}
-
-export namespace Tokens {
-    function resize(tokens: Tokens) {
-        // scale the size using golden ratio, because why not.
-        const newBuffer = new Int32Array((1.61 * tokens.indices.length) | 0);
-        newBuffer.set(tokens.indices);
-        tokens.indices = newBuffer;
-        tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0;
-    }
-
-    export function add(tokens: Tokens, start: number, end: number) {
-        if (tokens.count > tokens.indicesLenMinus2) {
-            resize(tokens);
-        }
-        tokens.indices[tokens.count++] = start;
-        tokens.indices[tokens.count++] = end;
-    }
-
-    export function addUnchecked(tokens: Tokens, start: number, end: number) {
-        tokens.indices[tokens.count++] = start;
-        tokens.indices[tokens.count++] = end;
-    }
-
-    export function create(size: number): Tokens {
-        return {
-            indicesLenMinus2: (size - 2) | 0,
-            count: 0,
-            indices: new Int32Array(size)
-        }
-    }
-}