Pārlūkot izejas kodu

started with gro reader

Alexander Rose 7 gadi atpakaļ
revīzija
07c3377ddb

+ 7 - 0
.gitignore

@@ -0,0 +1,7 @@
+build/
+
+node_modules/
+debug.log
+npm-debug.log
+
+*.sublime-workspace

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+The MIT License
+
+    Copyright (c) 2017, MolQL contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

+ 27 - 0
README.md

@@ -0,0 +1,27 @@
+
+[![License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](https://github.com/arose/molio/blob/master/LICENSE)
+
+- general, non-opinionated library for reading and writing molecular structure related file formats
+- extending on the ideas of the CIFTools.js library
+
+
+## Building & Running
+
+### Build:
+
+    npm install
+    npm run build
+
+### Build automatically on file save:
+
+    npm run watch
+
+### Bundle with rollup (UMD and ES6 )
+
+    npm run watch
+
+
+TODO
+----
+
+-

+ 8 - 0
molio.sublime-project

@@ -0,0 +1,8 @@
+{
+	"folders":
+	[
+		{
+			"path": "."
+		}
+	]
+}

+ 41 - 0
package.json

@@ -0,0 +1,41 @@
+{
+  "name": "molio",
+  "version": "0.1.0",
+  "description": "Parsers for molecular data.",
+  "main": "dist/molio.js",
+  "module": "dist/molio.esm.js",
+  "types": "src/index.d.ts",
+  "scripts": {
+    "lint": "./node_modules/.bin/tslint src/**/*.ts",
+    "build": "./node_modules/.bin/tsc",
+    "watch": "./node_modules/.bin/tsc -watch",
+    "bundle": "./node_modules/.bin/rollup -c",
+    "test": "./node_modules/.bin/jest",
+    "dist": "./node_modules/.bin/uglifyjs build/js/molio.dev.js -cm > dist/molio.js && cp build/js/molio.esm.js dist/molio.esm.js",
+    "script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js"
+  },
+  "jest": {
+    "moduleFileExtensions": [ "ts", "js" ],
+    "transform": { "\\.ts$": "<rootDir>/node_modules/ts-jest/preprocessor.js" },
+    "testRegex": "\\.spec\\.ts$"
+  },
+  "author": "",
+  "license": "MIT",
+  "devDependencies": {
+    "@types/jest": "latest",
+    "@types/node": "^8.0.25",
+    "jest": "^20.0.4",
+    "rollup": "^0.49.2",
+    "rollup-plugin-buble": "^0.15.0",
+    "rollup-plugin-commonjs": "^8.2.0",
+    "rollup-plugin-json": "^2.3.0",
+    "rollup-plugin-node-resolve": "^3.0.0",
+    "rollup-watch": "^4.3.1",
+    "ts-jest": "^20.0.14",
+    "tslint": "^5.7.0",
+    "typescript": "^2.5.1",
+    "uglify-js": "^3.0.28",
+    "webpack": "^3.5.5"
+  },
+  "dependencies": {}
+}

+ 36 - 0
rollup.config.js

@@ -0,0 +1,36 @@
+import buble from 'rollup-plugin-buble';
+import json from 'rollup-plugin-json';
+import resolve from 'rollup-plugin-node-resolve';
+import commonjs from 'rollup-plugin-commonjs';
+
+var path = require('path');
+var pkg = require('./package.json');
+var external = Object.keys(pkg.dependencies);
+
+export default {
+  input: 'build/js/src/index.js',
+  plugins: [
+    resolve({
+      jsnext: true,
+      main: true
+    }),
+    commonjs(),
+    json(),
+    buble()
+  ],
+  output: [
+    {
+      file: "build/js/molio.dev.js",
+      format: 'umd',
+      name: 'MOLIO',
+      sourcemap: true
+    },
+    {
+      file: "build/js/molio.esm.js",
+      format: 'es',
+      sourcemap: true
+    }
+  ],
+  external: external,
+  sourcemap: true
+};

+ 4 - 0
src/index.d.ts

@@ -0,0 +1,4 @@
+
+export { ParserResult, ParserError, ParserSuccess } from './parser'
+
+export { parse as groReader } from './reader/gro'

+ 2 - 0
src/index.ts

@@ -0,0 +1,2 @@
+
+export { parse as groReader } from './reader/gro'

+ 40 - 0
src/parser.ts

@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * from https://github.com/dsehnal/CIFTools.js
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+export type ParserResult<T> = ParserSuccess<T> | ParserError
+
+export namespace ParserResult {
+    export function error<T>(message: string, line = -1): ParserResult<T> {
+        return new ParserError(message, line);
+    }
+
+    export function success<T>(result: T, warnings: string[] = []): ParserResult<T> {
+        return new ParserSuccess<T>(result, warnings);
+    }
+}
+
+export class ParserError {
+    isError: true = true;
+
+    toString() {
+        if (this.line >= 0) {
+            return `[Line ${this.line}] ${this.message}`;
+        }
+        return this.message;
+    }
+
+    constructor(
+        public message: string,
+        public line: number) {
+    }
+}
+
+export class ParserSuccess<T> {
+    isError: false = false;
+
+    constructor(public result: T, public warnings: string[]) { }
+}

+ 272 - 0
src/reader/gro.ts

@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { parseInt } from '../utils/number-parser'
+import { eatLine, eatValue, skipWhitespace } from '../utils/helper'
+import { Tokens } from '../utils/tokens'
+import { TokenizerState } from '../utils/tokenizer-state'
+
+import { TextTable } from '../relational/text-table'
+
+import { ParserResult } from '../parser'
+
+/**
+ * http://manual.gromacs.org/current/online/gro.html
+ */
+
+export interface GroFile {
+    data: string;
+    blocks: GroBlock[];
+}
+
+export interface GroBlock {
+    getTable(name: string): TextTable
+    addTable(table: TextTable): void
+}
+
+export class GroFile implements GroFile {
+    data: string;
+    blocks: GroBlock[] = [];
+
+    constructor(data: string) {
+        this.data = data;
+    }
+}
+
+export class GroBlock implements GroBlock {
+    private tableMap: Map<string, TextTable>;
+    private tableList: TextTable[];
+
+    data: string;
+
+    /**
+     * Gets a table by its name.
+     */
+    getTable(name: string) {
+        return this.tableMap.get(name);
+    }
+
+    /**
+     * Adds a table.
+     */
+    addTable(table: TextTable) {
+        this.tableList[this.tableList.length] = table;
+        this.tableMap.set(table.name, table);
+    }
+
+    constructor(data: string) {
+        this.data = data;
+
+        this.tableMap = new Map()
+        this.tableList = []
+    }
+}
+
+export interface GroState extends TokenizerState {
+    numberOfAtoms: number
+    hasVelocities: boolean
+    numberOfDecimalPlaces: number
+}
+
+export function createTokenizer(data: string): GroState {
+    return {
+        data,
+
+        position: 0,
+        length: data.length,
+
+        currentLineNumber: 1,
+        currentTokenStart: 0,
+        currentTokenEnd: 0,
+
+        numberOfAtoms: 0,
+        hasVelocities: false,
+        numberOfDecimalPlaces: 3
+    };
+}
+
+/**
+ * title string (free format string, optional time in ps after 't=')
+ */
+function handleTitleString (state: GroState, tokens: Tokens) {
+    eatLine(state)
+    // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd))
+    let start = state.currentTokenStart
+    let end = state.currentTokenEnd
+    let valueStart = state.currentTokenStart
+    let valueEnd = start
+
+    while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd;
+
+    if (isTime(state.data, valueEnd)) {
+        let timeStart = valueEnd + 2
+
+        while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd;
+        Tokens.add(tokens, valueStart, valueEnd)  // title
+
+        while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart;
+        while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd;
+        Tokens.add(tokens, timeStart, end)  // time
+    } else {
+        Tokens.add(tokens, valueStart, valueEnd)  // title
+        Tokens.add(tokens, valueEnd, valueEnd)  // empty token for time
+    }
+}
+
+function isSpaceOrComma(data: string, position: number): boolean {
+    const c = data.charCodeAt(position);
+    return c === 32 || c === 44
+}
+
+function isTime(data: string, position: number): boolean {
+    // T/t
+    const c = data.charCodeAt(position);
+    if (c !== 84 && c !== 116) return false;
+    // =
+    if (data.charCodeAt(position + 1) !== 61) return false;
+
+    return true;
+}
+
+// function isDot(state: TokenizerState): boolean {
+//     // .
+//     if (state.data.charCodeAt(state.currentTokenStart) !== 46) return false;
+
+//     return true;
+// }
+
+// function numberOfDecimalPlaces (state: TokenizerState) {
+//     // var ndec = firstLines[ 2 ].length - firstLines[ 2 ].lastIndexOf('.') - 1
+//     const start = state.currentTokenStart
+//     const end = state.currentTokenEnd
+//     for (let i = end; start < i; --i) {
+//         // .
+//         if (state.data.charCodeAt(i) === 46) return end - start - i
+//     }
+//     throw new Error('Could not determine number of decimal places')
+// }
+
+/**
+ * number of atoms (free format integer)
+ */
+function handleNumberOfAtoms (state: GroState, tokens: Tokens) {
+    skipWhitespace(state)
+    state.currentTokenStart = state.position
+    eatValue(state)
+    state.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd)
+    Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd)
+    eatLine(state)
+}
+
+// function checkForVelocities (state: GroState) {
+
+// }
+
+/**
+ * This format is fixed, ie. all columns are in a fixed position.
+ * Optionally (for now only yet with trjconv) you can write gro files
+ * with any number of decimal places, the format will then be n+5
+ * positions with n decimal places (n+1 for velocities) in stead
+ * of 8 with 3 (with 4 for velocities). Upon reading, the precision
+ * will be inferred from the distance between the decimal points
+ * (which will be n+5). Columns contain the following information
+ * (from left to right):
+ *     residue number (5 positions, integer)
+ *     residue name (5 characters)
+ *     atom name (5 characters)
+ *     atom number (5 positions, integer)
+ *     position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
+ *     velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
+ */
+function handleAtoms (state: GroState, block: GroBlock) {
+    const name = 'atoms'
+
+    const columns = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ]
+    if (state.hasVelocities) {
+        columns.push('vx', 'vy', 'vz')
+    }
+    const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ]
+
+    const columnCount = columns.length
+    const tokens = Tokens.create(state.numberOfAtoms * 2 * columnCount)
+
+    for (let i = 0; i < state.numberOfAtoms; ++i) {
+        state.currentTokenStart = state.position
+        eatLine(state)
+        // console.log('atom line', state.data.substring(state.currentTokenStart, state.currentTokenEnd))
+
+        let start: number
+        let end = state.currentTokenStart
+        for (let j = 0; j < columnCount; ++j) {
+            start = end
+            end = start + fieldSizes[j]
+
+            // trim
+            let valueStart = start
+            let valueEnd = end
+            while (valueStart < valueEnd && state.data.charCodeAt(valueStart) === 32) ++valueStart;
+            while (valueEnd > valueStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd;
+
+            Tokens.add(tokens, valueStart, valueEnd)
+        }
+    }
+
+    block.addTable(new TextTable(state.data, name, columns, tokens));
+}
+
+/**
+ * box vectors (free format, space separated reals), values:
+ * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
+ * the last 6 values may be omitted (they will be set to zero).
+ * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
+ */
+function handleBoxVectors (state: GroState, tokens: Tokens) {
+    // just read the first three values, ignore any remaining
+    for (let i = 0; i < 3; ++i) {
+        skipWhitespace(state)
+        state.currentTokenStart = state.position
+        eatValue(state)
+        Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd)
+    }
+}
+
+/**
+ * Creates an error result.
+ */
+// function error(line: number, message: string) {
+//     return ParserResult.error<GroFile>(message, line);
+// }
+
+/**
+ * Creates a data result.
+ */
+function result(data: GroFile) {
+    return ParserResult.success(data);
+}
+
+function parseInternal(data: string): ParserResult<GroFile> {
+    const state = createTokenizer(data)
+    const file = new GroFile(data)
+
+    let block = new GroBlock(data)
+    file.blocks.push(block)
+
+    const headerColumns = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']
+    const headerTokens = Tokens.create(2 * headerColumns.length)
+    let header = new TextTable(state.data, 'header', headerColumns, headerTokens)
+    block.addTable(header)
+
+    handleTitleString(state, headerTokens)
+    handleNumberOfAtoms(state, headerTokens)
+    handleAtoms(state, block)
+    handleBoxVectors(state, headerTokens)
+
+    return result(file);
+}
+
+export function parse(data: string) {
+    return parseInternal(data);
+}

+ 40 - 0
src/reader/spec/gro.spec.ts

@@ -0,0 +1,40 @@
+
+import { parse } from '../gro'
+// import { Table } from '../../relational/table'
+
+const groString = `MD of 2 waters, t= 4.2
+    6
+    1WATER  OW1    1   0.126   1.624   1.679  0.1227 -0.0580  0.0434
+    1WATER  HW2    2   0.190   1.661   1.747  0.8085  0.3191 -0.7791
+    1WATER  HW3    3   0.177   1.568   1.613 -0.9045 -2.6469  1.3180
+    2WATER  OW1    4   1.275   0.053   0.622  0.2519  0.3140 -0.1734
+    2WATER  HW2    5   1.337   0.002   0.680 -1.0641 -1.1349  0.0257
+    2WATER  HW3    6   1.326   0.120   0.568  1.9427 -0.8216 -0.0244
+   1.82060   1.82060   1.82060`
+
+describe('gro reader', () => {
+    it('basic', () => {
+        const parsed = parse(groString)
+
+        if (parsed.isError) {
+            console.log(parsed)
+        } else {
+            const groFile = parsed.result
+
+            const header = groFile.blocks[0].getTable('header')
+            if (header) {
+                expect(header.columnNames).toEqual(['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ'])
+
+                expect(header.getColumn('title').getString(0)).toBe('MD of 2 waters')
+                expect(header.getColumn('timeInPs').getFloat(0)).toBeCloseTo(4.2)
+                expect(header.getColumn('numberOfAtoms').getInteger(0)).toBe(6)
+
+                expect(header.getColumn('boxX').getFloat(0)).toBeCloseTo(1.82060)
+                expect(header.getColumn('boxY').getFloat(0)).toBeCloseTo(1.82060)
+                expect(header.getColumn('boxZ').getFloat(0)).toBeCloseTo(1.82060)
+            } else {
+                console.error('no header')
+            }
+        }
+    })
+});

+ 32 - 0
src/relational/column.ts

@@ -0,0 +1,32 @@
+
+import { ValuePresence } from './constants'
+
+/**
+ * A columns represents a single field of a CIF category.
+ */
+export interface Column {
+    isDefined: boolean;
+
+    getString(row: number): string | null;
+    getInteger(row: number): number;
+    getFloat(row: number): number;
+
+    getValuePresence(row: number): ValuePresence;
+
+    areValuesEqual(rowA: number, rowB: number): boolean;
+    stringEquals(row: number, value: string): boolean;
+}
+
+/**
+ * Represents a column that is not present.
+ */
+class _UndefinedColumn implements Column {  // tslint:disable-line:class-name
+    isDefined = false;
+    getString(row: number): string | null { return null; };
+    getInteger(row: number): number { return 0; }
+    getFloat(row: number): number { return 0.0; }
+    getValuePresence(row: number): ValuePresence { return ValuePresence.NotSpecified; }
+    areValuesEqual(rowA: number, rowB: number): boolean { return true; }
+    stringEquals(row: number, value: string): boolean { return value === null; }
+}
+export const UndefinedColumn = new _UndefinedColumn() as Column;

+ 7 - 0
src/relational/constants.ts

@@ -0,0 +1,7 @@
+
+
+export const enum ValuePresence {
+    Present = 0,
+    NotSpecified = 1,
+    Unknown = 2
+}

+ 30 - 0
src/relational/table.ts

@@ -0,0 +1,30 @@
+
+import { Column } from './column'
+
+/**
+ * Represents that CIF category with multiple fields represented as columns.
+ *
+ * Example:
+ * _category.field1
+ * _category.field2
+ * ...
+ */
+export interface Table {
+    name: string;
+    rowCount: number;
+    columnCount: number;
+    columnNames: string[];
+
+    /**
+     * If a field with the given name is not present, returns UndefinedColumn.
+     *
+     * Columns are accessed by their field name only, i.e.
+     * _category.field is accessed by
+     * category.getColumn('field')
+     *
+     * Note that columns are created on demand and there is some computational
+     * cost when creating a new column. Therefore, if you need to reuse a column,
+     * it is a good idea to cache it.
+     */
+    getColumn(name: string): Column;
+}

+ 113 - 0
src/relational/text-column.ts

@@ -0,0 +1,113 @@
+
+import { Column } from './column'
+import { ValuePresence } from './constants'
+import { TextTable } from './text-table'
+
+import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../utils/number-parser'
+import { ShortStringPool } from '../utils/short-string-pool'
+
+/**
+ * Represents a single column.
+ */
+export class TextColumn implements Column {
+
+    protected indices: Int32Array;
+    protected columnCount: number;
+    protected rowCount: number;
+    protected stringPool = ShortStringPool.create();
+
+    isDefined = true;
+
+    /**
+     * Returns the string value at given row.
+     */
+    getString(row: number): string | null {
+        let i = (row * this.columnCount + this.index) * 2;
+        return ShortStringPool.get(this.stringPool, this.data.substring(this.indices[i], this.indices[i + 1]));
+    }
+
+    /**
+     * Returns the integer value at given row.
+     */
+    getInteger(row: number): number {
+        let i = (row * this.columnCount + this.index) * 2;
+        return fastParseInt(this.data, this.indices[i], this.indices[i + 1]);
+    }
+
+    /**
+     * Returns the float value at given row.
+     */
+    getFloat(row: number): number {
+        let i = (row * this.columnCount + this.index) * 2;
+        return fastParseFloat(this.data, this.indices[i], this.indices[i + 1]);
+    }
+
+    /**
+     * Returns true if the token has the specified string value.
+     */
+    stringEquals(row: number, value: string) {
+        let aIndex = (row * this.columnCount + this.index) * 2,
+            s = this.indices[aIndex],
+            len = value.length;
+        if (len !== this.indices[aIndex + 1] - s) return false;
+        for (let i = 0; i < len; i++) {
+            if (this.data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
+        }
+        return true;
+    }
+
+    /**
+     * Determines if values at the given rows are equal.
+     */
+    areValuesEqual(rowA: number, rowB: number): boolean {
+        const aIndex = (rowA * this.columnCount + this.index) * 2
+        const bIndex = (rowB * this.columnCount + this.index) * 2
+        const aS = this.indices[aIndex]
+        const bS = this.indices[bIndex]
+        const len = this.indices[aIndex + 1] - aS
+        if (len !== this.indices[bIndex + 1] - bS) return false;
+        for (let i = 0; i < len; i++) {
+            if (this.data.charCodeAt(i + aS) !== this.data.charCodeAt(i + bS)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    getValuePresence(row: number): ValuePresence {
+        let index = 2 * (row * this.columnCount + this.index);
+        if (this.indices[index] === this.indices[index + 1]) {
+            return ValuePresence.NotSpecified
+        }
+        return ValuePresence.Present
+    }
+
+    constructor(table: TextTable, protected data: string, public name: string, public index: number) {
+        this.indices = table.indices;
+        this.columnCount = table.columnCount;
+    }
+}
+
+export class CifColumn extends TextColumn {
+    /**
+     * Returns the string value at given row.
+     */
+    getString(row: number): string | null {
+        let ret = super.getString(row)
+        if (ret === '.' || ret === '?') return null;
+        return ret;
+    }
+
+    /**
+     * Returns true if the value is not defined (. or ? token).
+     */
+    getValuePresence(row: number): ValuePresence {
+        let index = 2 * (row * this.columnCount + this.index);
+        let s = this.indices[index];
+        if (this.indices[index + 1] - s !== 1) return ValuePresence.Present;
+        let v = this.data.charCodeAt(s);
+        if (v === 46 /* . */) return ValuePresence.NotSpecified;
+        if (v === 63 /* ? */) return ValuePresence.Unknown;
+        return ValuePresence.Present;
+    }
+}

+ 92 - 0
src/relational/text-table.ts

@@ -0,0 +1,92 @@
+
+
+import { Table } from './table'
+import { UndefinedColumn } from './column'
+import { TextColumn, CifColumn } from './text-column'
+
+import { Tokens } from '../utils/tokens'
+
+/**
+ * Represents a table backed by a string.
+ */
+export class TextTable implements Table {
+    protected data: string;
+    protected columnNameList: string[];
+    protected columnIndices: Map<string, number>;
+
+    /**
+     * Name of the category.
+     */
+    name: string;
+
+    /**
+     * The array of columns.
+     */
+    get columnNames() {
+        return this.columnNameList;
+    }
+
+    /**
+     * Number of columns in the category.
+     */
+    columnCount: number;
+
+    /**
+     * Number of rows in the category.
+     */
+    rowCount: number;
+
+    /**
+     * Pairs of (start at index 2 * i, end at index 2 * i + 1) indices to the data string.
+     * The "end" character is not included (for it's iterated as for (i = start; i < end; i++)).
+     */
+    indices: Int32Array;
+
+    /**
+     * Get a column object that makes accessing data easier.
+     */
+    getColumn(name: string): TextColumn {
+        let i = this.columnIndices.get(name);
+        if (i !== void 0) return new TextColumn(this, this.data, name, i);
+        return UndefinedColumn as TextColumn;
+    }
+
+    initColumns(columns: string[]): void {
+        this.columnIndices = new Map<string, number>();
+        this.columnNameList = [];
+        for (let i = 0; i < columns.length; i++) {
+            this.columnIndices.set(columns[i], i);
+            this.columnNameList.push(columns[i]);
+        }
+    }
+
+    constructor(
+        data: string, name: string, columns: string[], tokens: Tokens) {
+        this.name = name;
+        this.indices = tokens.indices;
+        this.data = data;
+
+        this.columnCount = columns.length;
+        this.rowCount = (tokens.count / 2 / columns.length) | 0;
+
+        this.initColumns(columns)
+    }
+}
+
+export class CifTable extends TextTable {
+    getColumn(name: string): CifColumn {
+        let i = this.columnIndices.get(name);
+        if (i !== void 0) return new CifColumn(this, this.data, name, i);
+        return UndefinedColumn as CifColumn;
+    }
+
+    initColumns(columns: string[]): void {
+        this.columnIndices = new Map<string, number>();
+        this.columnNameList = [];
+        for (let i = 0; i < columns.length; i++) {
+            let colName = columns[i].substr(this.name.length + 1);
+            this.columnIndices.set(colName, i);
+            this.columnNameList.push(colName);
+        }
+    }
+}

+ 97 - 0
src/script.ts

@@ -0,0 +1,97 @@
+
+// import * as util from 'util'
+import * as fs from 'fs'
+
+import { parse } from './reader/gro'
+import { Table } from './relational/table'
+
+const file = '1crn.gro'
+// const file = 'water.gro'
+// const file = 'test.gro'
+// const file = 'md_1u19_trj.gro'
+
+function getFloatArray(table: Table, name: string) {
+    const column = table.getColumn(name)
+    const n = table.rowCount
+    const array = new Float32Array(n)
+    for (let i = 0; i < n; ++i) {
+        array[i] = column.getFloat(i)
+    }
+    return array
+}
+
+function getIntArray(table: Table, name: string) {
+    const column = table.getColumn(name)
+    const n = table.rowCount
+    const array = new Int32Array(n)
+    for (let i = 0; i < n; ++i) {
+        array[i] = column.getInteger(i)
+    }
+    return array
+}
+
+fs.readFile(`./examples/${file}`, 'utf8', function (err,data) {
+    if (err) {
+        return console.log(err);
+    }
+    // console.log(data);
+
+    console.time('parse')
+    const parsed = parse(data)
+    console.timeEnd('parse')
+    if (parsed.isError) {
+        console.log(parsed)
+    } else {
+        const groFile = parsed.result
+
+        const header = groFile.blocks[0].getTable('header')
+        if (header) {
+            console.log(header.columnNames)
+
+            console.log('title', header.getColumn('title').getString(0))
+            console.log('timeInPs', header.getColumn('timeInPs').getFloat(0))
+            console.log('numberOfAtoms', header.getColumn('numberOfAtoms').getInteger(0))
+            console.log('boxX', header.getColumn('boxX').getFloat(0))
+            console.log('boxY', header.getColumn('boxY').getFloat(0))
+            console.log('boxZ', header.getColumn('boxZ').getFloat(0))
+        } else {
+            console.error('no header')
+        }
+
+        const atoms = groFile.blocks[0].getTable('atoms')
+        if (atoms) {
+            console.log(atoms.columnNames)
+
+            console.log(`'${atoms.getColumn('residueNumber').getString(1)}'`)
+            console.log(`'${atoms.getColumn('residueName').getString(1)}'`)
+            console.log(`'${atoms.getColumn('atomName').getString(1)}'`)
+            console.log(atoms.getColumn('z').getFloat(1))
+            console.log(`'${atoms.getColumn('z').getString(1)}'`)
+
+            const n = atoms.rowCount
+            console.log('rowCount', n)
+
+            console.time('getFloatArray x')
+            const x = getFloatArray(atoms, 'x')
+            console.timeEnd('getFloatArray x')
+            console.log(x.length, x[0], x[x.length-1])
+
+            console.time('getFloatArray y')
+            const y = getFloatArray(atoms, 'y')
+            console.timeEnd('getFloatArray y')
+            console.log(y.length, y[0], y[y.length-1])
+
+            console.time('getFloatArray z')
+            const z = getFloatArray(atoms, 'z')
+            console.timeEnd('getFloatArray z')
+            console.log(z.length, z[0], z[z.length-1])
+
+            console.time('getIntArray residueNumber')
+            const residueNumber = getIntArray(atoms, 'residueNumber')
+            console.timeEnd('getIntArray residueNumber')
+            console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1])
+        } else {
+            console.error('no atoms')
+        }
+    }
+});

+ 161 - 0
src/utils/chunked-array.ts

@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * from https://github.com/dsehnal/CIFTools.js
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+/**
+ * A generic chunked array builder.
+ *
+ * When adding elements, the array growns by a specified number
+ * of elements and no copying is done until ChunkedArray.compact
+ * is called.
+ */
+export interface ChunkedArray<T> {
+    creator: (size: number) => any;
+    elementSize: number;
+    chunkSize: number;
+    current: any;
+    currentIndex: number;
+
+    parts: any[];
+    elementCount: number;
+}
+
+export namespace ChunkedArray {
+    export function is(x: any): x is ChunkedArray<any> {
+        return x.creator && x.chunkSize;
+    }
+
+    export function add4<T>(array: ChunkedArray<T>, x: T, y: T, z: T, w: T) {
+        if (array.currentIndex >= array.chunkSize) {
+            array.currentIndex = 0;
+            array.current = array.creator(array.chunkSize);
+            array.parts[array.parts.length] = array.current;
+        }
+
+        array.current[array.currentIndex++] = x;
+        array.current[array.currentIndex++] = y;
+        array.current[array.currentIndex++] = z;
+        array.current[array.currentIndex++] = w;
+        return array.elementCount++;
+    }
+
+    export function add3<T>(array: ChunkedArray<T>, x: T, y: T, z: T) {
+        if (array.currentIndex >= array.chunkSize) {
+            array.currentIndex = 0;
+            array.current = array.creator(array.chunkSize);
+            array.parts[array.parts.length] = array.current;
+        }
+
+        array.current[array.currentIndex++] = x;
+        array.current[array.currentIndex++] = y;
+        array.current[array.currentIndex++] = z;
+        return array.elementCount++;
+    }
+
+    export function add2<T>(array: ChunkedArray<T>, x: T, y: T) {
+        if (array.currentIndex >= array.chunkSize) {
+            array.currentIndex = 0;
+            array.current = array.creator(array.chunkSize);
+            array.parts[array.parts.length] = array.current;
+        }
+
+        array.current[array.currentIndex++] = x;
+        array.current[array.currentIndex++] = y;
+        return array.elementCount++;
+    }
+
+    export function add<T>(array: ChunkedArray<T>, x: T) {
+        if (array.currentIndex >= array.chunkSize) {
+            array.currentIndex = 0;
+            array.current = array.creator(array.chunkSize);
+            array.parts[array.parts.length] = array.current;
+        }
+
+        array.current[array.currentIndex++] = x;
+        return array.elementCount++;
+    }
+
+
+    export function compact<T>(array: ChunkedArray<T>): T[] {
+        const ret = array.creator(array.elementSize * array.elementCount)
+        const offset = (array.parts.length - 1) * array.chunkSize
+        let offsetInner = 0
+        let part: any
+
+        if (array.parts.length > 1) {
+            if (array.parts[0].buffer) {
+                for (let i = 0; i < array.parts.length - 1; i++) {
+                    ret.set(array.parts[i], array.chunkSize * i);
+                }
+            } else {
+
+                for (let i = 0; i < array.parts.length - 1; i++) {
+                    offsetInner = array.chunkSize * i;
+                    part = array.parts[i];
+
+                    for (let j = 0; j < array.chunkSize; j++) {
+                        ret[offsetInner + j] = part[j];
+                    }
+                }
+            }
+        }
+
+        if (array.current.buffer && array.currentIndex >= array.chunkSize) {
+            ret.set(array.current, array.chunkSize * (array.parts.length - 1));
+        } else {
+            for (let i = 0; i < array.currentIndex; i++) {
+                ret[offset + i] = array.current[i];
+            }
+        }
+        return ret as any;
+    }
+
+    export function forVertex3D(chunkVertexCount: number = 262144): ChunkedArray<number> {
+        return create<number>(size => new Float32Array(size) as any, chunkVertexCount, 3)
+    }
+
+    export function forIndexBuffer(chunkIndexCount: number = 262144): ChunkedArray<number> {
+        return create<number>(size => new Uint32Array(size) as any, chunkIndexCount, 3)
+    }
+
+    export function forTokenIndices(chunkTokenCount: number = 131072): ChunkedArray<number> {
+        return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 2)
+    }
+
+    export function forIndices(chunkTokenCount: number = 131072): ChunkedArray<number> {
+        return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 1)
+    }
+
+    export function forInt32(chunkSize: number = 131072): ChunkedArray<number> {
+        return create<number>(size => new Int32Array(size) as any, chunkSize, 1)
+    }
+
+    export function forFloat32(chunkSize: number = 131072): ChunkedArray<number> {
+        return create<number>(size => new Float32Array(size) as any, chunkSize, 1)
+    }
+
+    export function forArray<T>(chunkSize: number = 131072): ChunkedArray<T> {
+        return create<T>(size => [] as any, chunkSize, 1)
+    }
+
+    export function create<T>(creator: (size: number) => any, chunkElementCount: number, elementSize: number): ChunkedArray<T> {
+        chunkElementCount = chunkElementCount | 0;
+        if (chunkElementCount <= 0) chunkElementCount = 1;
+
+        let chunkSize = chunkElementCount * elementSize;
+        let current = creator(chunkSize)
+
+        return {
+            elementSize,
+            chunkSize,
+            creator,
+            current,
+            parts: [current],
+            currentIndex: 0,
+            elementCount: 0
+        } as ChunkedArray<T>
+    }
+}

+ 82 - 0
src/utils/helper.ts

@@ -0,0 +1,82 @@
+
+import { TokenizerState } from './tokenizer-state'
+
+/**
+ * Eat everything until a newline occurs.
+ */
+export function eatLine(state: TokenizerState) {
+    while (state.position < state.length) {
+        switch (state.data.charCodeAt(state.position)) {
+            case 10: // \n
+                state.currentTokenEnd = state.position
+                ++state.position
+                ++state.currentLineNumber
+                return
+            case 13: // \r
+                state.currentTokenEnd = state.position
+                ++state.position
+                ++state.currentLineNumber
+                if (state.data.charCodeAt(state.position) === 10) {
+                    ++state.position
+                }
+                return
+            default:
+                ++state.position
+        }
+    }
+    state.currentTokenEnd = state.position;
+}
+
+/**
+ * Eat everything until a whitespace/newline occurs.
+ */
+export function eatValue(state: TokenizerState) {
+    while (state.position < state.length) {
+        switch (state.data.charCodeAt(state.position)) {
+            case 9:  // \t
+            case 10: // \n
+            case 13: // \r
+            case 32: // ' '
+                state.currentTokenEnd = state.position;
+                return;
+            default:
+                ++state.position;
+                break;
+        }
+    }
+    state.currentTokenEnd = state.position;
+}
+
+/**
+ * Skips all the whitespace - space, tab, newline, CR
+ * Handles incrementing line count.
+ */
+export function skipWhitespace(state: TokenizerState): number {
+    let prev = 10;
+    while (state.position < state.length) {
+        let c = state.data.charCodeAt(state.position);
+        switch (c) {
+            case 9: // '\t'
+            case 32: // ' '
+                prev = c;
+                ++state.position;
+                break;
+            case 10: // \n
+                // handle \r\n
+                if (prev !== 13) {
+                    ++state.currentLineNumber;
+                }
+                prev = c;
+                ++state.position;
+                break;
+            case 13: // \r
+                prev = c;
+                ++state.position;
+                ++state.currentLineNumber;
+                break;
+            default:
+                return prev;
+        }
+    }
+    return prev;
+}

+ 76 - 0
src/utils/number-parser.ts

@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * from https://github.com/dsehnal/CIFTools.js
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+/**
+ * Efficient integer and float parsers.
+ *
+ * For the purposes of parsing numbers from the mmCIF data representations,
+ * up to 4 times faster than JS parseInt/parseFloat.
+ */
+
+export function parseIntSkipLeadingWhitespace(str: string, start: number, end: number) {
+    while (start < end && str.charCodeAt(start) === 32) start++;
+    return parseInt(str, start, end);
+}
+
+export function parseInt(str: string, start: number, end: number) {
+    let ret = 0, neg = 1;
+    if (str.charCodeAt(start) === 45 /* - */) { neg = -1; start++; }
+    for (; start < end; start++) {
+        let c = str.charCodeAt(start) - 48;
+        if (c > 9 || c < 0) return (neg * ret) | 0;
+        else ret = (10 * ret + c) | 0;
+    }
+    return neg * ret;
+}
+
+function parseScientific(main: number, str: string, start: number, end: number) {
+    // handle + in '1e+1' separately.
+    if (str.charCodeAt(start) === 43 /* + */) start++;
+    return main * Math.pow(10.0, parseInt(str, start, end));
+}
+
+export function parseFloatSkipLeadingWhitespace(str: string, start: number, end: number) {
+    while (start < end && str.charCodeAt(start) === 32) start++;
+    return parseFloat(str, start, end);
+}
+
+export function parseFloat(str: string, start: number, end: number) {
+    let neg = 1.0, ret = 0.0, point = 0.0, div = 1.0;
+
+    if (str.charCodeAt(start) === 45) {
+        neg = -1.0;
+        ++start;
+    }
+
+    while (start < end) {
+        let c = str.charCodeAt(start) - 48;
+        if (c >= 0 && c < 10) {
+            ret = ret * 10 + c;
+            ++start;
+        } else if (c === -2) { // .
+            ++start;
+            while (start < end) {
+                c = str.charCodeAt(start) - 48;
+                if (c >= 0 && c < 10) {
+                    point = 10.0 * point + c;
+                    div = 10.0 * div;
+                    ++start;
+                } else if (c === 53 || c === 21) { // 'e'/'E'
+                    return parseScientific(neg * (ret + point / div), str, start + 1, end);
+                } else {
+                    return neg * (ret + point / div);
+                }
+            }
+            return neg * (ret + point / div);
+        } else if (c === 53 || c === 21) { // 'e'/'E'
+            return parseScientific(neg * ret, str, start + 1, end);
+        }
+        else break;
+    }
+    return neg * ret;
+}

+ 22 - 0
src/utils/short-string-pool.ts

@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * from https://github.com/dsehnal/CIFTools.js
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+/**
+ * This ensures there is only 1 instance of a short string.
+ * Also known as string interning, see https://en.wikipedia.org/wiki/String_interning
+ */
+export type ShortStringPool = { [key: string]: string }
+export namespace ShortStringPool {
+    export function create(): ShortStringPool { return Object.create(null); }
+    export function get(pool: ShortStringPool, str: string) {
+        if (str.length > 6) return str;
+        const value = pool[str];
+        if (value !== void 0) return value;
+        pool[str] = str;
+        return str;
+    }
+}

+ 13 - 0
src/utils/tokenizer-state.ts

@@ -0,0 +1,13 @@
+
+export interface TokenizerState {
+    data: string
+
+    position: number
+    length: number
+
+    currentLineNumber: number
+    currentTokenStart: number
+    currentTokenEnd: number
+
+    currentTokenType?: number
+}

+ 35 - 0
src/utils/tokens.ts

@@ -0,0 +1,35 @@
+
+/**
+ * A helper for building a typed array of token indices.
+ */
+export interface Tokens {
+    indicesLenMinus2: number,
+    count: number,
+    indices: Int32Array
+}
+
+export namespace Tokens {
+    function resize(tokens: Tokens) {
+        // scale the size using golden ratio, because why not.
+        const newBuffer = new Int32Array((1.61 * tokens.indices.length) | 0);
+        newBuffer.set(tokens.indices);
+        tokens.indices = newBuffer;
+        tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0;
+    }
+
+    export function add(tokens: Tokens, start: number, end: number) {
+        if (tokens.count > tokens.indicesLenMinus2) {
+            resize(tokens);
+        }
+        tokens.indices[tokens.count++] = start;
+        tokens.indices[tokens.count++] = end;
+    }
+
+    export function create(size: number): Tokens {
+        return {
+            indicesLenMinus2: (size - 2) | 0,
+            count: 0,
+            indices: new Int32Array(size)
+        }
+    }
+}

+ 14 - 0
tsconfig.json

@@ -0,0 +1,14 @@
+{
+    "compilerOptions": {
+        "target": "es6",
+        "alwaysStrict": true,
+        "noImplicitAny": true,
+        "noImplicitThis": true,
+        "sourceMap": false,
+        "noUnusedLocals": true,
+        "strictNullChecks": true,
+        "lib": [ "es6" ],
+        "outDir": "build/js/src"
+    },
+    "include": [ "src/**/*" ]
+}

+ 67 - 0
tslint.json

@@ -0,0 +1,67 @@
+{
+    "rules": {
+        "array-type": [
+            true,
+            "array"
+        ],
+        "arrow-parens": false,
+        "no-var-keyword": true,
+        "ordered-imports": [false],
+        "trailing-comma": [false],
+        "class-name": true,
+        "comment-format": [
+            true,
+            "check-space"
+        ],
+        "indent": [
+            true,
+            "spaces"
+        ],
+        "no-eval": true,
+        "no-internal-module": true,
+        "no-trailing-whitespace": true,
+        "no-unsafe-finally": true,
+        "one-line": [
+            true,
+            "check-open-brace",
+            "check-whitespace"
+        ],
+        "quotemark": [
+            true,
+            "single"
+        ],
+        "semicolon": [ false ],
+        "triple-equals": [
+            true,
+            "allow-null-check"
+        ],
+        "typedef-whitespace": [
+            true,
+            {
+                "call-signature": "nospace",
+                "index-signature": "nospace",
+                "parameter": "nospace",
+                "property-declaration": "nospace",
+                "variable-declaration": "nospace"
+            }
+        ],
+        "variable-name": [
+            true,
+            "ban-keywords"
+        ],
+        "whitespace": [
+            true,
+            "check-branch",
+            "check-decl",
+            "check-separator",
+            "check-type"
+        ]
+    },
+    "jsRules": {
+        "triple-equals": [
+            true,
+            "allow-null-check"
+        ]
+    },
+    "defaultSeverity": "warning"
+}