Browse Source

wip pdb parser

David Sehnal 6 years ago
parent
commit
dc35fa2d5f

+ 17 - 0
src/mol-io/reader/cif/data-model.ts

@@ -57,6 +57,19 @@ export namespace CifCategory {
     export function empty(name: string): CifCategory {
         return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } };
     };
+
+    export type SomeFields<S> = { [P in keyof S]?: CifField }
+    export type Fields<S> = { [P in keyof S]: CifField }
+
+    export function ofFields(name: string, fields: { [name: string]: CifField }): CifCategory {
+        const fieldNames = Object.keys(fields);
+        return {
+            rowCount: fieldNames.length > 0 ? fields[fieldNames[0]].rowCount : 0,
+            name,
+            fieldNames,
+            getField(name) { return fields[name]; }
+        };
+    }
 }
 
 /**
@@ -84,6 +97,10 @@ export interface CifField {
 }
 
 export namespace CifField {
+    export function ofString(value: string) {
+        return ofStrings([value]);
+    }
+
     export function ofStrings(values: string[]): CifField {
         const rowCount = values.length;
         const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };

+ 5 - 0
src/mol-io/reader/common/text/tokenizer.ts

@@ -228,6 +228,7 @@ namespace Tokenizer {
         state.tokenStart = s;
         state.tokenEnd = e + 1;
         state.position = end;
+        return state;
     }
 }
 
@@ -265,6 +266,10 @@ export namespace TokenBuilder {
         tokens.count++;
     }
 
+    export function addToken(tokens: Tokens, tokenizer: Tokenizer) {
+        add(tokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+    }
+
     export function addUnchecked(tokens: Tokens, start: number, end: number) {
         (tokens as Builder).indices[(tokens as Builder).offset++] = start;
         (tokens as Builder).indices[(tokens as Builder).offset++] = end;

+ 44 - 0
src/mol-model-formats/structure/pdb/assembly.ts

@@ -0,0 +1,44 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { CifCategory, CifField } from 'mol-io/reader/cif';
+import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+
+export function parseCryst1(id: string, record: string): CifCategory[] {
+    // COLUMNS       DATA TYPE      CONTENTS
+    // --------------------------------------------------------------------------------
+    //  1 -  6       Record name    "CRYST1"
+    //  7 - 15       Real(9.3)      a (Angstroms)
+    // 16 - 24       Real(9.3)      b (Angstroms)
+    // 25 - 33       Real(9.3)      c (Angstroms)
+    // 34 - 40       Real(7.2)      alpha (degrees)
+    // 41 - 47       Real(7.2)      beta (degrees)
+    // 48 - 54       Real(7.2)      gamma (degrees)
+    // 56 - 66       LString        Space group
+    // 67 - 70       Integer        Z value
+
+    const get = (s: number, l: number) => (record.substr(s, l) || '').trim()
+
+    const cell: CifCategory.Fields<mmCIF_Schema['cell']> = {
+        entry_id: CifField.ofString(id),
+        length_a: CifField.ofString(get(6, 9)),
+        length_b: CifField.ofString(get(15, 9)),
+        length_c: CifField.ofString(get(24, 9)),
+        angle_alpha: CifField.ofString(get(33, 7)),
+        angle_beta: CifField.ofString(get(40, 7)),
+        angle_gamma: CifField.ofString(get(47, 7)),
+        Z_PDB: CifField.ofString(get(66, 4)),
+        pdbx_unique_axis: CifField.ofString('?')
+    };
+    const symmetry: CifCategory.Fields<mmCIF_Schema['symmetry']> = {
+        entry_id: CifField.ofString(id),
+        'space_group_name_H-M': CifField.ofString(get(55, 11)),
+        Int_Tables_number: CifField.ofString('?'),
+        cell_setting: CifField.ofString('?'),
+        space_group_name_Hall: CifField.ofString('?')
+    }
+    return [CifCategory.ofFields('cell', cell), CifCategory.ofFields('symmetry', symmetry)];
+}

+ 20 - 42
src/mol-model-formats/structure/pdb/to-cif.ts

@@ -100,84 +100,63 @@ function getEntityId(residueName: string, isHet: boolean) {
 
 function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) {
     const { data: str } = data;
-    let startPos = s;
-    let start = s;
-    const end = e;
-    const length = end - start;
+    const length = e - s;
 
     // TODO: filter invalid atoms
 
     // COLUMNS        DATA TYPE       CONTENTS
     // --------------------------------------------------------------------------------
     // 1 -  6        Record name     "ATOM  "
-    Tokenizer.trim(data, start, start + 6);
-    TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
 
     // 7 - 11        Integer         Atom serial number.
     // TODO: support HEX
-    start = startPos + 6;
-    Tokenizer.trim(data, start, start + 5);
+    Tokenizer.trim(data, s + 6, s + 11);
     sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
 
     // 13 - 16        Atom            Atom name.
-    start = startPos + 12;
-    Tokenizer.trim(data, start, start + 4);
-    TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
 
     // 17             Character       Alternate location indicator.
-    if (str.charCodeAt(startPos + 16) === 32) { // ' '
+    if (str.charCodeAt(s + 16) === 32) { // ' '
         TokenBuilder.add(sites.label_alt_id, 0, 0);
     } else {
-        TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
+        TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
     }
 
     // 18 - 20        Residue name    Residue name.
-    start = startPos + 17;
-    Tokenizer.trim(data, start, start + 3);
-    TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
     const residueName = str.substring(data.tokenStart, data.tokenEnd);
 
     // 22             Character       Chain identifier.
-    TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
+    TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
 
     // 23 - 26        Integer         Residue sequence number.
     // TODO: support HEX
-    start = startPos + 22;
-    Tokenizer.trim(data, start, start + 4);
-    TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
 
     // 27             AChar           Code for insertion of residues.
-    if (str.charCodeAt(startPos + 26) === 32) { // ' '
+    if (str.charCodeAt(s + 26) === 32) { // ' '
         TokenBuilder.add(sites.label_alt_id, 0, 0);
     } else {
-        TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
+        TokenBuilder.add(sites.label_alt_id, s + 26, s + 27);
     }
 
     // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
-    start = startPos + 30;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
 
     // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
-    start = startPos + 38;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
 
     // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
-    start = startPos + 46;
-    Tokenizer.trim(data, start, start + 8);
-    TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
 
     // 55 - 60        Real(6.2)       Occupancy.
-    start = startPos + 54;
-    Tokenizer.trim(data, start, start + 6);
-    TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
+    TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
 
     // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
     if (length >= 66) {
-        start = startPos + 60;
-        Tokenizer.trim(data, start, start + 6);
-        TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
+        TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
     } else {
         TokenBuilder.add(sites.label_alt_id, 0, 0);
     }
@@ -187,17 +166,16 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num
 
     // 77 - 78        LString(2)      Element symbol, right-justified.
     if (length >= 78) {
-        start = startPos + 76;
-        Tokenizer.trim(data, start, start + 2);
+        Tokenizer.trim(data, s + 76, s + 78);
 
         if (data.tokenStart < data.tokenEnd) {
-            TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
+            TokenBuilder.addToken(sites.type_symbol, data);
         } else {
             // "guess" the symbol
-            TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+            TokenBuilder.add(sites.type_symbol, s + 12, s + 13);
         }
     } else {
-        TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
+        TokenBuilder.add(sites.type_symbol, s + 12, s + 13);
     }
 
     sites.label_entity_id[sites.index] = getEntityId(residueName, isHet);