Przeglądaj źródła

Merge branch 'master' of https://github.com/molstar/molstar-proto

Alexander Rose 6 lat temu
rodzic
commit
1db00af703

+ 1 - 1
src/mol-io/reader/ccp4/parser.ts

@@ -6,7 +6,7 @@
 
 import { Task, RuntimeContext } from 'mol-task';
 import { Ccp4File, Ccp4Header } from './schema'
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { FileHandle } from '../../common/file-handle';
 
 async function parseInternal(file: FileHandle, ctx: RuntimeContext): Promise<Result<Ccp4File>> {

+ 1 - 1
src/mol-io/reader/cif/binary/parser.ts

@@ -7,7 +7,7 @@
 import * as Data from '../data-model'
 import { EncodedCategory, EncodedFile } from '../../../common/binary-cif'
 import Field from './field'
-import Result from '../../result'
+import { ReaderResult as Result } from '../../result'
 import decodeMsgPack from '../../../common/msgpack/decode'
 import { Task } from 'mol-task'
 

+ 48 - 2
src/mol-io/reader/cif/text/field.ts

@@ -29,8 +29,9 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Cif
     };
 
     const valueKind: Data.CifField['valueKind'] = row => {
-        const s = indices[2 * row];
-        if (indices[2 * row + 1] - s !== 1) return Column.ValueKind.Present;
+        const s = indices[2 * row], l = indices[2 * row + 1] - s;
+        if (l > 1) return Column.ValueKind.Present;
+        if (l === 0) return Column.ValueKind.NotPresent;
         const v = data.charCodeAt(s);
         if (v === 46 /* . */) return Column.ValueKind.NotPresent;
         if (v === 63 /* ? */) return Column.ValueKind.Unknown;
@@ -51,4 +52,49 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Cif
         toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
         toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
     }
+}
+
+export function CifTextValueField(values: string[]): Data.CifField {
+    const rowCount = values.length;
+
+    const str: Data.CifField['str'] = row => {
+        const ret = values[row];
+        if (!ret || ret === '.' || ret === '?') return '';
+        return ret;
+    };
+
+    const int: Data.CifField['int'] = row => {
+        const v = values[row];
+        return fastParseInt(v, 0, v.length) || 0;
+    };
+
+    const float: Data.CifField['float'] = row => {
+        const v = values[row];
+        return fastParseFloat(v, 0, v.length) || 0;
+    };
+
+    const valueKind: Data.CifField['valueKind'] = row => {
+        const v = values[row], l = v.length;
+        if (l > 1) return Column.ValueKind.Present;
+        if (l === 0) return Column.ValueKind.NotPresent;
+        const c = v.charCodeAt(0);
+        if (c === 46 /* . */) return Column.ValueKind.NotPresent;
+        if (c === 63 /* ? */) return Column.ValueKind.Unknown;
+        return Column.ValueKind.Present;
+    };
+
+    return {
+        __array: void 0,
+        binaryEncoding: void 0,
+        isDefined: true,
+        rowCount,
+        str,
+        int,
+        float,
+        valueKind,
+        areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
+        toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
+        toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
+        toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
+    }
 }

+ 2 - 2
src/mol-io/reader/cif/text/parser.ts

@@ -25,7 +25,7 @@
 import * as Data from '../data-model'
 import Field from './field'
 import { Tokens, TokenBuilder } from '../../common/text/tokenizer'
-import Result from '../../result'
+import { ReaderResult as Result } from '../../result'
 import { Task, RuntimeContext, chunkedSubtask } from 'mol-task'
 
 /**
@@ -507,7 +507,7 @@ async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise
     const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
     const tokens: Tokens[] = [];
     const fieldCount = fieldNames.length;
-    for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate);
+    for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer.data, rowCountEstimate);
 
     const state: LoopReadState = {
         fieldCount,

+ 50 - 15
src/mol-io/reader/common/text/tokenizer.ts

@@ -8,7 +8,9 @@
 
 import { chunkedSubtask, RuntimeContext } from 'mol-task'
 
-export interface Tokenizer {
+export { Tokenizer }
+
+interface Tokenizer {
     data: string,
 
     position: number,
@@ -25,7 +27,7 @@ export interface Tokens {
     indices: ArrayLike<number>
 }
 
-export function Tokenizer(data: string): Tokenizer {
+function Tokenizer(data: string): Tokenizer {
     return {
         data,
         position: 0,
@@ -36,7 +38,7 @@ export function Tokenizer(data: string): Tokenizer {
     };
 }
 
-export namespace Tokenizer {
+namespace Tokenizer {
     export function getTokenString(state: Tokenizer) {
         return state.data.substring(state.tokenStart, state.tokenEnd);
     }
@@ -52,7 +54,7 @@ export namespace Tokenizer {
     /**
      * Eat everything until a newline occurs.
      */
-    export function eatLine(state: Tokenizer) {
+    export function eatLine(state: Tokenizer): boolean {
         const { data } = state;
         while (state.position < state.length) {
             switch (data.charCodeAt(state.position)) {
@@ -60,7 +62,7 @@ export namespace Tokenizer {
                     state.tokenEnd = state.position;
                     ++state.position;
                     ++state.lineNumber;
-                    return;
+                    return true;
                 case 13: // \r
                     state.tokenEnd = state.position;
                     ++state.position;
@@ -68,13 +70,14 @@ export namespace Tokenizer {
                     if (data.charCodeAt(state.position) === 10) {
                         ++state.position;
                     }
-                    return;
+                    return true;
                 default:
                     ++state.position;
                     break;
             }
         }
         state.tokenEnd = state.position;
+        return state.tokenStart !== state.tokenEnd;
     }
 
     /** Sets the current token start to the current position */
@@ -85,7 +88,7 @@ export namespace Tokenizer {
     /** Sets the current token start to current position and moves to the next line. */
     export function markLine(state: Tokenizer) {
         state.tokenStart = state.position;
-        eatLine(state);
+        return eatLine(state);
     }
 
     /** Advance the state by the given number of lines and return line starts/ends as tokens. */
@@ -95,15 +98,18 @@ export namespace Tokenizer {
     }
 
     function readLinesChunk(state: Tokenizer, count: number, tokens: Tokens) {
+        let read = 0;
         for (let i = 0; i < count; i++) {
-            markLine(state);
+            if (!markLine(state)) return read;
             TokenBuilder.addUnchecked(tokens, state.tokenStart, state.tokenEnd);
+            read++;
         }
+        return read;
     }
 
     /** Advance the state by the given number of lines and return line starts/ends as tokens. */
     export function readLines(state: Tokenizer, count: number): Tokens {
-        const lineTokens = TokenBuilder.create(state, count * 2);
+        const lineTokens = TokenBuilder.create(state.data, count * 2);
         readLinesChunk(state, count, lineTokens);
         return lineTokens;
     }
@@ -111,7 +117,7 @@ export namespace Tokenizer {
     /** Advance the state by the given number of lines and return line starts/ends as tokens. */
     export async function readLinesAsync(state: Tokenizer, count: number, ctx: RuntimeContext, initialLineCount = 100000): Promise<Tokens> {
         const { length } = state;
-        const lineTokens = TokenBuilder.create(state, count * 2);
+        const lineTokens = TokenBuilder.create(state.data, count * 2);
 
         let linesAlreadyRead = 0;
         await chunkedSubtask(ctx, initialLineCount, state, (chunkSize, state) => {
@@ -124,6 +130,37 @@ export namespace Tokenizer {
         return lineTokens;
     }
 
+    export function readAllLines(data: string) {
+        const state = Tokenizer(data);
+        const tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2))
+        while (markLine(state)) {
+            TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd);
+        }
+        return tokens;
+    }
+
+    function readLinesChunkChecked(state: Tokenizer, count: number, tokens: Tokens) {
+        let read = 0;
+        for (let i = 0; i < count; i++) {
+            if (!markLine(state)) return read;
+            TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd);
+            read++;
+        }
+        return read;
+    }
+
+    export async function readAllLinesAsync(data: string, ctx: RuntimeContext, chunkSize = 100000) {
+        const state = Tokenizer(data);
+        const tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2));
+
+        await chunkedSubtask(ctx, chunkSize, state, (chunkSize, state) => {
+            readLinesChunkChecked(state, chunkSize, tokens);
+            return state.position < state.length ? chunkSize : 0;
+        }, (ctx, state) => ctx.update({ message: 'Parsing...', current: state.position, max: length }));
+
+        return tokens;
+    }
+
     /**
      * Eat everything until a whitespace/newline occurs.
      */
@@ -234,16 +271,14 @@ export namespace TokenBuilder {
         tokens.count++;
     }
 
-    export function create(tokenizer: Tokenizer, size: number): Tokens {
+    export function create(data: string, size: number): Tokens {
         size = Math.max(10, size)
         return <Builder>{
-            data: tokenizer.data,
+            data,
             indicesLenMinus2: (size - 2) | 0,
             count: 0,
             offset: 0,
             indices: new Uint32Array(size)
         }
     }
-}
-
-export default Tokenizer
+}

+ 2 - 2
src/mol-io/reader/csv/parser.ts

@@ -8,7 +8,7 @@
 import { Tokens, TokenBuilder, Tokenizer } from '../common/text/tokenizer'
 import * as Data from './data-model'
 import Field from './field'
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { Task, RuntimeContext, chunkedSubtask, } from 'mol-task'
 
 const enum CsvTokenType {
@@ -231,7 +231,7 @@ function readRecordsChunks(state: State) {
 
 function addColumn (state: State) {
     state.columnNames.push(Tokenizer.getTokenString(state.tokenizer))
-    state.tokens.push(TokenBuilder.create(state.tokenizer, state.data.length / 80))
+    state.tokens.push(TokenBuilder.create(state.tokenizer.data, state.data.length / 80))
 }
 
 function init(state: State) {

+ 1 - 1
src/mol-io/reader/dsn6/parser.ts

@@ -6,7 +6,7 @@
 
 import { Task, RuntimeContext } from 'mol-task';
 import { Dsn6File, Dsn6Header } from './schema'
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { FileHandle } from '../../common/file-handle';
 
 function parseBrixHeader(str: string): Dsn6Header {

+ 2 - 2
src/mol-io/reader/gro/parser.ts

@@ -6,10 +6,10 @@
  */
 
 import { Column } from 'mol-data/db'
-import Tokenizer from '../common/text/tokenizer'
+import { Tokenizer } from '../common/text/tokenizer'
 import FixedColumn from '../common/text/column/fixed'
 import * as Schema from './schema'
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { Task, RuntimeContext } from 'mol-task'
 
 interface State {

+ 16 - 16
src/mol-io/reader/mol2/parser.ts

@@ -15,7 +15,7 @@ import { Column } from 'mol-data/db'
 import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'
 import TokenColumn from '../common/text/column/token'
 import * as Schema from './schema'
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { Task, RuntimeContext, chunkedSubtask } from 'mol-task'
 
 const { skipWhitespace, eatValue, markLine, getTokenString, readLine } = Tokenizer;
@@ -130,12 +130,12 @@ async function handleAtoms(state: State): Promise<Schema.Mol2Atoms> {
     }
 
     // required columns
-    const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const atom_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const atom_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const xTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const yTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const zTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const atom_typeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
 
     const atom_idTokenColumn = TokenColumn(atom_idTokens);
     const atom_nameTokenColumn = TokenColumn(atom_nameTokens);
@@ -145,10 +145,10 @@ async function handleAtoms(state: State): Promise<Schema.Mol2Atoms> {
     const atom_typeColumn = TokenColumn(atom_typeTokens);
 
     // optional columns
-    const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const subst_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const subst_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const chargeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
+    const status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
 
     const subst_idTokenColumn = TokenColumn(subst_idTokens);
     const subst_nameTokenColumn = TokenColumn(subst_nameTokens);
@@ -257,10 +257,10 @@ async function handleBonds(state: State): Promise<Schema.Mol2Bonds> {
     }
 
     // required columns
-    const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
-    const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
-    const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
-    const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    const bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
+    const origin_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
+    const target_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
+    const bondTypeTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
 
     const bond_idTokenColumn = TokenColumn(bond_idTokens);
     const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens);
@@ -268,7 +268,7 @@ async function handleBonds(state: State): Promise<Schema.Mol2Bonds> {
     const bondTypeTokenColumn = TokenColumn(bondTypeTokens);
 
     // optional columns
-    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    const status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
     const status_bitTokenColumn = TokenColumn(status_bitTokens);
     const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str);
 

+ 1 - 1
src/mol-io/reader/obj/parser.ts

@@ -4,7 +4,7 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import Result from '../result'
+import { ReaderResult as Result } from '../result'
 import { Task, RuntimeContext } from 'mol-task'
 import { Mesh } from 'mol-geo/geometry/mesh/mesh';
 

+ 14 - 0
src/mol-io/reader/pdb/parser.ts

@@ -0,0 +1,14 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { PdbFile } from './schema';
+import { Task } from 'mol-task';
+import { ReaderResult } from '../result';
+import { Tokenizer } from '../common/text/tokenizer';
+
+export function parsePDB(data: string, id?: string): Task<ReaderResult<PdbFile>> {
+    return Task.create('Parse PDB', async ctx => ReaderResult.success({ id, lines: await Tokenizer.readAllLinesAsync(data, ctx) }));
+}

+ 12 - 0
src/mol-io/reader/pdb/schema.ts

@@ -0,0 +1,12 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Tokens } from '../common/text/tokenizer';
+
+export interface PdbFile {
+    id?: string,
+    lines: Tokens
+}

+ 280 - 0
src/mol-io/reader/pdb/to-cif.ts

@@ -0,0 +1,280 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { CifField, CifCategory } from '../cif';
+import { mmCIF_Schema } from '../cif/schema/mmcif';
+import CifTextField, { CifTextValueField } from '../cif/text/field';
+import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
+import { PdbFile } from './schema';
+import { CifFile } from '../cif/data-model';
+import { substringStartsWith } from 'mol-util/string';
+import { Task } from 'mol-task';
+
+function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
+    return {
+        name,
+        fieldNames: Object.keys(fields),
+        rowCount,
+        getField(f: string) {
+            return fields[f];
+        }
+    }
+}
+
+function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
+    return {
+        id: CifTextValueField(['1', '2', '3']),
+        type: CifTextValueField(['polymer', 'non-polymer', 'water'])
+    }
+}
+
+function atom_site_template(data: string, count: number) {
+    const str = () => new Array(count) as string[];
+    const ts = () => TokenBuilder.create(data, 2 * count);
+    return {
+        index: 0,
+        count,
+        group_PDB: ts(),
+        id: str(),
+        auth_atom_id: ts(),
+        label_alt_id: ts(),
+        auth_comp_id: ts(),
+        auth_asym_id: ts(),
+        auth_seq_id: ts(),
+        pdbx_PDB_ins_code: ts(),
+        Cartn_x: ts(),
+        Cartn_y: ts(),
+        Cartn_z: ts(),
+        occupancy: ts(),
+        B_iso_or_equiv: ts(),
+        type_symbol: ts(),
+        pdbx_PDB_model_num: str(),
+        label_entity_id: str()
+    };
+}
+
+function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
+    const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
+    const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
+    const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
+    const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
+
+    return {
+        auth_asym_id,
+        auth_atom_id,
+        auth_comp_id,
+        auth_seq_id,
+        B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
+        Cartn_x: CifTextField(sites.Cartn_x, sites.count),
+        Cartn_y: CifTextField(sites.Cartn_y, sites.count),
+        Cartn_z: CifTextField(sites.Cartn_z, sites.count),
+        group_PDB: CifTextField(sites.group_PDB, sites.count),
+        id: CifTextValueField(sites.id),
+
+        label_alt_id: CifTextField(sites.label_alt_id, sites.count),
+
+        label_asym_id: auth_asym_id,
+        label_atom_id: auth_atom_id,
+        label_comp_id: auth_comp_id,
+        label_seq_id: auth_seq_id,
+        label_entity_id: CifTextValueField(sites.label_entity_id),
+
+        occupancy: CifTextField(sites.occupancy, sites.count),
+        type_symbol: CifTextField(sites.type_symbol, sites.count),
+
+        pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
+        pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
+    };
+}
+
+const WaterNames = new Set([ 'SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC' ]);
+
+function getEntityId(residueName: string, isHet: boolean) {
+    if (isHet) {
+        if (WaterNames.has(residueName)) return '3';
+        return '2';
+    }
+    return '1';
+}
+
+function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) {
+    const { data: str } = data;
+    let startPos = s;
+    let start = s;
+    const end = e;
+    const length = end - start;
+
+    // TODO: filter invalid atoms
+
+    // COLUMNS        DATA TYPE       CONTENTS
+    // --------------------------------------------------------------------------------
+    // 1 -  6        Record name     "ATOM  "
+    Tokenizer.trim(data, start, start + 6);
+    TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
+
+    // 7 - 11        Integer         Atom serial number.
+    // TODO: support HEX
+    start = startPos + 6;
+    Tokenizer.trim(data, start, start + 5);
+    sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
+
+    // 13 - 16        Atom            Atom name.
+    start = startPos + 12;
+    Tokenizer.trim(data, start, start + 4);
+    TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
+
+    // 17             Character       Alternate location indicator.
+    if (str.charCodeAt(startPos + 16) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
+    }
+
+    // 18 - 20        Residue name    Residue name.
+    start = startPos + 17;
+    Tokenizer.trim(data, start, start + 3);
+    TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
+    const residueName = str.substring(data.tokenStart, data.tokenEnd);
+
+    // 22             Character       Chain identifier.
+    TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
+
+    // 23 - 26        Integer         Residue sequence number.
+    // TODO: support HEX
+    start = startPos + 22;
+    Tokenizer.trim(data, start, start + 4);
+    TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
+
+    // 27             AChar           Code for insertion of residues.
+    if (str.charCodeAt(startPos + 26) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
+    }
+
+    // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
+    start = startPos + 30;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
+
+    // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
+    start = startPos + 38;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
+
+    // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
+    start = startPos + 46;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
+
+    // 55 - 60        Real(6.2)       Occupancy.
+    start = startPos + 54;
+    Tokenizer.trim(data, start, start + 6);
+    TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
+
+    // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
+    if (length >= 66) {
+        start = startPos + 60;
+        Tokenizer.trim(data, start, start + 6);
+        TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    }
+
+    // 73 - 76        LString(4)      Segment identifier, left-justified.
+    // ignored
+
+    // 77 - 78        LString(2)      Element symbol, right-justified.
+    if (length >= 78) {
+        start = startPos + 76;
+        Tokenizer.trim(data, start, start + 2);
+
+        if (data.tokenStart < data.tokenEnd) {
+            TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
+        } else {
+            // "guess" the symbol
+            TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+        }
+    } else {
+        TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+    }
+
+    sites.label_entity_id[sites.index] = getEntityId(residueName, isHet);
+    sites.pdbx_PDB_model_num[sites.index] = model;
+
+    sites.index++;
+}
+
+type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
+
+async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> {
+    const { lines } = pdb;
+    const { data, indices } = lines;
+    const tokenizer = Tokenizer(data);
+
+    // Count the atoms
+    let atomCount = 0;
+    for (let i = 0, _i = lines.count; i < _i; i++) {
+        const s = indices[2 * i], e = indices[2 * i + 1];
+        switch (data[s]) {
+            case 'A':
+                if (substringStartsWith(data, s, e, 'ATOM  ')) atomCount++;
+                break;
+            case 'H':
+                if (substringStartsWith(data, s, e, 'HETATM')) atomCount++;
+                break;
+        }
+    }
+
+    const atom_site = atom_site_template(data, atomCount);
+
+    let modelNum = 0, modelStr = '';
+
+    for (let i = 0, _i = lines.count; i < _i; i++) {
+        const s = indices[2 * i], e = indices[2 * i + 1];
+        switch (data[s]) {
+            case 'A':
+                if (!substringStartsWith(data, s, e, 'ATOM  ')) continue;
+                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
+                addAtom(atom_site, modelStr, tokenizer, s, e, false);
+                break;
+            case 'H':
+                if (!substringStartsWith(data, s, e, 'HETATM')) continue;
+                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
+                addAtom(atom_site, modelStr, tokenizer, s, e, true);
+                break;
+            case 'M':
+                if (substringStartsWith(data, s, e, 'MODEL ')) {
+                    modelNum++;
+                    modelStr = '' + modelNum;
+                }
+                break;
+
+        }
+    }
+
+    const categories = {
+        entity: toCategory('entity', _entity(), 3),
+        atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
+    }
+
+    return {
+        name: pdb.id,
+        blocks: [{
+            saveFrames: [],
+            header: pdb.id || 'PDB',
+            categoryNames: Object.keys(categories),
+            categories
+        }]
+    };
+}
+
+export function convertPDBtoMmCif(pdb: PdbFile): Task<CifFile> {
+    return Task.create('Convert PDB to mmCIF', async ctx => {
+        await ctx.update('Converting to mmCIF...');
+        return pdbToMmCIF(pdb);
+    });
+}

+ 17 - 17
src/mol-io/reader/result.ts

@@ -5,7 +5,7 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-type ReaderResult<T> = Success<T> | Error
+type ReaderResult<T> = ReaderResult.Success<T> | ReaderResult.Error
 
 namespace ReaderResult {
     export function error<T>(message: string, line = -1): ReaderResult<T> {
@@ -15,28 +15,28 @@ namespace ReaderResult {
     export function success<T>(result: T, warnings: string[] = []): ReaderResult<T> {
         return new Success<T>(result, warnings);
     }
-}
 
-export class Error {
-    isError: true = true;
+    export class Error {
+        isError: true = true;
 
-    toString() {
-        if (this.line >= 0) {
-            return `[Line ${this.line}] ${this.message}`;
+        toString() {
+            if (this.line >= 0) {
+                return `[Line ${this.line}] ${this.message}`;
+            }
+            return this.message;
         }
-        return this.message;
-    }
 
-    constructor(
-        public message: string,
-        public line: number) {
+        constructor(
+            public message: string,
+            public line: number) {
+        }
     }
-}
 
-export class Success<T> {
-    isError: false = false;
+    export class Success<T> {
+        isError: false = false;
 
-    constructor(public result: T, public warnings: string[]) { }
+        constructor(public result: T, public warnings: string[]) { }
+    }
 }
 
-export default ReaderResult
+export { ReaderResult }

+ 4 - 3
src/mol-model/structure/model/format.ts

@@ -7,6 +7,7 @@
 // import { File as GroFile } from 'mol-io/reader/gro/schema'
 import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'
 import CIF, { CifFrame } from 'mol-io/reader/cif';
+import { PdbFile } from 'mol-io/reader/pdb/schema';
 
 type Format =
     // | Format.gro
@@ -15,10 +16,10 @@ type Format =
 namespace Format {
     // export interface gro { kind: 'gro', data: GroFile }
     export interface mmCIF { kind: 'mmCIF', data: mmCIF_Database, frame: CifFrame }
+    export function mmCIF(frame: CifFrame, data?: mmCIF_Database): mmCIF { return { kind: 'mmCIF', data: data || CIF.schema.mmCIF(frame), frame }; }
 
-    export function mmCIF(frame: CifFrame, data?: mmCIF_Database): mmCIF {
-        return { kind: 'mmCIF', data: data || CIF.schema.mmCIF(frame), frame };
-    }
+    export interface PDB { kind: 'PDB', data: PdbFile }
+    export function PDB(data: PdbFile) { return { kind: 'PDB', data }; }
 }
 
 export default Format

+ 269 - 0
src/mol-model/structure/model/formats/pdb.ts

@@ -0,0 +1,269 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import Format from '../format';
+import { Model } from '../model';
+import { Task } from 'mol-task';
+import { PdbFile } from 'mol-io/reader/pdb/schema';
+import from_mmCIF from './mmcif';
+import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { substringStartsWith } from 'mol-util/string';
+import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
+import { CifField, CifCategory } from 'mol-io/reader/cif';
+import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field';
+
+function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
+    return {
+        name,
+        fieldNames: Object.keys(fields),
+        rowCount,
+        getField(f: string) {
+            return fields[f];
+        }
+    }
+}
+
+function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
+    return {
+        id: CifTextValueField(['1', '2', '3']),
+        type: CifTextValueField(['polymer', 'non-polymer', 'water'])
+    }
+}
+
+function atom_site_template(data: string, count: number) {
+    const str = () => new Array(count) as string[];
+    const ts = () => TokenBuilder.create(data, 2 * count);
+    return {
+        count,
+        group_PDB: ts(),
+        id: str(),
+        auth_atom_id: ts(),
+        label_alt_id: ts(),
+        auth_comp_id: ts(),
+        auth_asym_id: ts(),
+        auth_seq_id: ts(),
+        pdbx_PDB_ins_code: ts(),
+        Cartn_x: ts(),
+        Cartn_y: ts(),
+        Cartn_z: ts(),
+        occupancy: ts(),
+        B_iso_or_equiv: ts(),
+        type_symbol: ts(),
+        pdbx_PDB_model_num: str(),
+        label_entity_id: str()
+    };
+}
+
+function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
+    const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
+    const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
+    const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
+    const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
+
+    return {
+        auth_asym_id,
+        auth_atom_id,
+        auth_comp_id,
+        auth_seq_id,
+        B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
+        Cartn_x: CifTextField(sites.Cartn_x, sites.count),
+        Cartn_y: CifTextField(sites.Cartn_y, sites.count),
+        Cartn_z: CifTextField(sites.Cartn_z, sites.count),
+        group_PDB: CifTextField(sites.group_PDB, sites.count),
+        id: CifTextValueField(sites.id),
+
+        label_alt_id: CifTextField(sites.label_alt_id, sites.count),
+
+        label_asym_id: auth_asym_id,
+        label_atom_id: auth_atom_id,
+        label_comp_id: auth_comp_id,
+        label_seq_id: auth_seq_id,
+        label_entity_id: CifTextValueField(sites.label_entity_id),
+
+        occupancy: CifTextField(sites.occupancy, sites.count),
+        type_symbol: CifTextField(sites.type_symbol, sites.count),
+
+        pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
+        pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
+    };
+}
+
+function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
+    const { data: str } = data;
+    let startPos = s;
+    let start = s;
+    const end = e;
+    const length = end - start;
+
+    // TODO: filter invalid atoms
+
+    // COLUMNS        DATA TYPE       CONTENTS
+    // --------------------------------------------------------------------------------
+    // 1 -  6        Record name     "ATOM  "
+    Tokenizer.trim(data, start, start + 6);
+    TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
+
+    // 7 - 11        Integer         Atom serial number.
+    // TODO: support HEX
+    start = startPos + 6;
+    Tokenizer.trim(data, start, start + 5);
+    sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd);
+
+    // 13 - 16        Atom            Atom name.
+    start = startPos + 12;
+    Tokenizer.trim(data, start, start + 4);
+    TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
+
+    // 17             Character       Alternate location indicator.
+    if (str.charCodeAt(startPos + 16) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
+    }
+
+    // 18 - 20        Residue name    Residue name.
+    start = startPos + 17;
+    Tokenizer.trim(data, start, start + 3);
+    TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
+
+    // 22             Character       Chain identifier.
+    TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
+
+    // 23 - 26        Integer         Residue sequence number.
+    // TODO: support HEX
+    start = startPos + 22;
+    Tokenizer.trim(data, start, start + 4);
+    TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
+
+    // 27             AChar           Code for insertion of residues.
+    if (str.charCodeAt(startPos + 26) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
+    }
+
+    // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
+    start = startPos + 30;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
+
+    // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
+    start = startPos + 38;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
+
+    // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
+    start = startPos + 46;
+    Tokenizer.trim(data, start, start + 8);
+    TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
+
+    // 55 - 60        Real(6.2)       Occupancy.
+    start = startPos + 54;
+    Tokenizer.trim(data, start, start + 6);
+    TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
+
+    // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
+    if (length >= 66) {
+        start = startPos + 60;
+        Tokenizer.trim(data, start, start + 6);
+        TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    }
+
+    // 73 - 76        LString(4)      Segment identifier, left-justified.
+    // ignored
+
+    // 77 - 78        LString(2)      Element symbol, right-justified.
+    if (length >= 78) {
+        start = startPos + 76;
+        Tokenizer.trim(data, start, start + 2);
+
+        if (data.tokenStart < data.tokenEnd) {
+            TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
+        } else {
+            // "guess" the symbol
+            TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+        }
+    } else {
+        TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+    }
+
+    // TODO
+    sites.label_entity_id.push('1');
+    sites.pdbx_PDB_model_num.push(model);
+
+}
+
+type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
+
+async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> {
+    const { lines } = pdb;
+    const { data, indices } = lines;
+    const tokenizer = Tokenizer(data);
+
+    // Count the atoms
+    let atomCount = 0;
+    for (let i = 0, _i = lines.count; i < _i; i++) {
+        const s = indices[2 * i], e = indices[2 * i + 1];
+        switch (data[s]) {
+            case 'A':
+                if (substringStartsWith(data, s, e, 'ATOM  ')) atomCount++;
+                break;
+            case 'H':
+                if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++;
+                break;
+        }
+    }
+
+    const atom_site = atom_site_template(data, atomCount);
+
+    let modelNum = 0, modelStr = '';
+
+    for (let i = 0, _i = lines.count; i < _i; i++) {
+        const s = indices[2 * i], e = indices[2 * i + 1];
+        switch (data[s]) {
+            case 'A':
+                if (!substringStartsWith(data, s, e, 'ATOM  ')) continue;
+                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
+                addAtom(atom_site, modelStr, tokenizer, s, e);
+                break;
+            case 'H':
+                if (!substringStartsWith(data, s, e, 'HETATM')) continue;
+                if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
+                addAtom(atom_site, modelStr, tokenizer, s, e);
+                break;
+            case 'M':
+                if (substringStartsWith(data, s, e, 'MODEL ')) {
+                    modelNum++;
+                    modelStr = '' + modelNum;
+                }
+                break;
+
+        }
+    }
+
+    const categories = {
+        entity: toCategory('entity', _entity(), 3),
+        atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
+    }
+
+    return Format.mmCIF({
+        header: pdb.id || 'PDB',
+        categoryNames: Object.keys(categories),
+        categories
+    });
+}
+
+function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> {
+    return Task.create('Create PDB Model', async ctx => {
+        await ctx.update('Converting to mmCIF...');
+        const cif = await pdbToMmCIF(format.data);
+        return from_mmCIF(cif).runInContext(ctx);
+    });
+}
+
+export default buildModels;

+ 15 - 8
src/mol-plugin/state/actions/basic.ts

@@ -41,6 +41,7 @@ const DownloadStructure = StateAction.build({
             }, { isFlat: true }),
             'url': PD.Group({
                 url: PD.Text(''),
+                format: PD.Select('cif', [['cif', 'CIF'], ['pdb', 'PDB']]),
                 isBinary: PD.Boolean(false),
                 supportProps: PD.Boolean(false)
             }, { isFlat: true })
@@ -60,7 +61,7 @@ const DownloadStructure = StateAction.build({
 
     switch (src.name) {
         case 'url':
-            downloadParams = src.params;
+            downloadParams = { url: src.params.url, isBinary: src.params.isBinary };
             break;
         case 'pdbe-updated':
             downloadParams = { url: `https://www.ebi.ac.uk/pdbe/static/entry/${src.params.id.toLowerCase()}_updated.cif`, isBinary: false, label: `PDBe: ${src.params.id}` };
@@ -75,7 +76,8 @@ const DownloadStructure = StateAction.build({
     }
 
     const data = b.toRoot().apply(StateTransforms.Data.Download, downloadParams);
-    return state.updateTree(createStructureTree(ctx, data, params.source.params.supportProps));
+    const traj = createModelTree(data, src.name === 'url' ? src.params.format : 'cif');
+    return state.updateTree(createStructureTree(ctx, traj, params.source.params.supportProps));
 });
 
 export const OpenStructure = StateAction.build({
@@ -85,15 +87,20 @@ export const OpenStructure = StateAction.build({
 })(({ params, state }, ctx: PluginContext) => {
     const b = state.build();
     const data = b.toRoot().apply(StateTransforms.Data.ReadFile, { file: params.file, isBinary: /\.bcif$/i.test(params.file.name) });
-    return state.updateTree(createStructureTree(ctx, data, false));
+    const traj = createModelTree(data, 'cif');
+    return state.updateTree(createStructureTree(ctx, traj, false));
 });
 
-function createStructureTree(ctx: PluginContext, b: StateTreeBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, supportProps: boolean): StateTree {
-    let root = b
-        .apply(StateTransforms.Data.ParseCif)
-        .apply(StateTransforms.Model.TrajectoryFromMmCif)
-        .apply(StateTransforms.Model.ModelFromTrajectory, { modelIndex: 0 });
+function createModelTree(b: StateTreeBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, format: 'pdb' | 'cif' = 'cif') {
+    const parsed = format === 'cif'
+        ? b.apply(StateTransforms.Data.ParseCif).apply(StateTransforms.Model.TrajectoryFromMmCif)
+        : b.apply(StateTransforms.Data.ConvertPDBtoMmCif).apply(StateTransforms.Model.TrajectoryFromMmCif);
+
+    return parsed.apply(StateTransforms.Model.ModelFromTrajectory, { modelIndex: 0 });
+}
 
+function createStructureTree(ctx: PluginContext, b: StateTreeBuilder.To<PluginStateObject.Molecule.Model>, supportProps: boolean): StateTree {
+    let root = b;
     if (supportProps) {
         root = root.apply(StateTransforms.Model.CustomModelProperties);
     }

+ 20 - 0
src/mol-plugin/state/transforms/data.ts

@@ -15,6 +15,8 @@ import { Transformer } from 'mol-state';
 import { readFromFile } from 'mol-util/data-source';
 import * as CCP4 from 'mol-io/reader/ccp4/parser'
 import * as DSN6 from 'mol-io/reader/dsn6/parser'
+import { parsePDB } from 'mol-io/reader/pdb/parser';
+import { convertPDBtoMmCif } from 'mol-io/reader/pdb/to-cif';
 
 export { Download }
 type Download = typeof Download
@@ -95,6 +97,24 @@ const ParseCif = PluginStateTransform.BuiltIn({
     }
 });
 
+export { ConvertPDBtoMmCif }
+type ConvertPDBtoMmCif = typeof ConvertPDBtoMmCif
+const ConvertPDBtoMmCif = PluginStateTransform.BuiltIn({
+    name: 'convert-pdb-to-mmcif',
+    display: { name: 'Convert PDB string to mmCIF' },
+    from: [SO.Data.String],
+    to: SO.Format.Cif
+})({
+    apply({ a }) {
+        return Task.create('Parse CIF', async ctx => {
+            const parsed = await parsePDB(a.data).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+            const cif = await convertPDBtoMmCif(parsed.result).runInContext(ctx);
+            return new SO.Format.Cif(cif);
+        });
+    }
+});
+
 export { ParseCcp4 }
 type ParseCcp4 = typeof ParseCcp4
 const ParseCcp4 = PluginStateTransform.BuiltIn({

+ 9 - 0
src/mol-util/string.ts

@@ -37,4 +37,13 @@ export function snakeCaseToWords(str: string) {
 
 export function stringToWords(str: string) {
     return capitalize(splitCamelCase(splitSnakeCase(str)))
+}
+
+export function substringStartsWith(str: string, start: number, end: number, target: string) {
+    let len = target.length;
+    if (len > end - start) return false;
+    for (let i = 0; i < len; i++) {
+        if (str.charCodeAt(start + i) !== target.charCodeAt(i)) return false;
+    }
+    return true;
 }