Browse Source

pdb-parser, move atom-site methods to separate file

Alexander Rose 5 years ago
parent
commit
5d18643374

+ 154 - 0
src/mol-model-formats/structure/pdb/atom-site.ts

@@ -0,0 +1,154 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { CifField } from '../../../mol-io/reader/cif';
+import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
+import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
+import { guessElementSymbolTokens } from '../util';
+
+type AtomSiteTemplate = typeof getAtomSiteTemplate extends (...args: any) => infer T ? T : never
+export function getAtomSiteTemplate(data: string, count: number) {
+    const str = () => [] as string[];
+    const ts = () => TokenBuilder.create(data, 2 * count);
+    return {
+        index: 0,
+        count,
+        group_PDB: ts(),
+        id: str(),
+        auth_atom_id: ts(),
+        label_alt_id: ts(),
+        auth_comp_id: ts(),
+        auth_asym_id: ts(),
+        auth_seq_id: ts(),
+        pdbx_PDB_ins_code: ts(),
+        Cartn_x: ts(),
+        Cartn_y: ts(),
+        Cartn_z: ts(),
+        occupancy: ts(),
+        B_iso_or_equiv: ts(),
+        type_symbol: ts(),
+        pdbx_PDB_model_num: str(),
+        label_entity_id: str()
+    };
+}
+
+export function getAtomSite(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
+    const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
+    const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
+    const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
+    const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
+
+    return {
+        auth_asym_id,
+        auth_atom_id,
+        auth_comp_id,
+        auth_seq_id,
+        B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
+        Cartn_x: CifField.ofTokens(sites.Cartn_x),
+        Cartn_y: CifField.ofTokens(sites.Cartn_y),
+        Cartn_z: CifField.ofTokens(sites.Cartn_z),
+        group_PDB: CifField.ofTokens(sites.group_PDB),
+        id: CifField.ofStrings(sites.id),
+
+        label_alt_id: CifField.ofTokens(sites.label_alt_id),
+
+        label_asym_id: auth_asym_id,
+        label_atom_id: auth_atom_id,
+        label_comp_id: auth_comp_id,
+        label_seq_id: auth_seq_id,
+        label_entity_id: CifField.ofStrings(sites.label_entity_id),
+
+        occupancy: CifField.ofTokens(sites.occupancy),
+        type_symbol: CifField.ofTokens(sites.type_symbol),
+
+        pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
+        pdbx_PDB_model_num: CifField.ofStrings(sites.pdbx_PDB_model_num)
+    };
+}
+
+export function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
+    const { data: str } = data;
+    const length = e - s;
+
+    // TODO: filter invalid atoms
+
+    // COLUMNS        DATA TYPE       CONTENTS
+    // --------------------------------------------------------------------------------
+    // 1 -  6        Record name     "ATOM  "
+    TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
+
+    // 7 - 11        Integer         Atom serial number.
+    // TODO: support HEX
+    Tokenizer.trim(data, s + 6, s + 11);
+    sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
+
+    // 13 - 16        Atom            Atom name.
+    TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
+
+    // 17             Character       Alternate location indicator.
+    if (str.charCodeAt(s + 16) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
+    }
+
+    // 18 - 20        Residue name    Residue name.
+    TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
+
+    // 22             Character       Chain identifier.
+    TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
+
+    // 23 - 26        Integer         Residue sequence number.
+    // TODO: support HEX
+    TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
+
+    // 27             AChar           Code for insertion of residues.
+    if (str.charCodeAt(s + 26) === 32) { // ' '
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    } else {
+        TokenBuilder.add(sites.label_alt_id, s + 26, s + 27);
+    }
+
+    // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
+    TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
+
+    // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
+    TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
+
+    // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
+    TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
+
+    // 55 - 60        Real(6.2)       Occupancy.
+    TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
+
+    // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
+    if (length >= 66) {
+        TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
+    } else {
+        TokenBuilder.add(sites.label_alt_id, 0, 0);
+    }
+
+    // 73 - 76        LString(4)      Segment identifier, left-justified.
+    // ignored
+
+    // 77 - 78        LString(2)      Element symbol, right-justified.
+    if (length >= 78) {
+        Tokenizer.trim(data, s + 76, s + 78);
+
+        if (data.tokenStart < data.tokenEnd) {
+            TokenBuilder.addToken(sites.type_symbol, data);
+        } else {
+            guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
+        }
+    } else {
+        guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
+    }
+
+    sites.pdbx_PDB_model_num[sites.index] = model;
+
+    sites.index++;
+}

+ 4 - 148
src/mol-model-formats/structure/pdb/to-cif.ts

@@ -6,161 +6,17 @@
  */
 
 import { substringStartsWith } from '../../../mol-util/string';
-import { CifField, CifCategory, CifFrame } from '../../../mol-io/reader/cif';
-import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
-import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
+import { CifCategory, CifFrame } from '../../../mol-io/reader/cif';
+import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
 import { PdbFile } from '../../../mol-io/reader/pdb/schema';
 import { parseCryst1, parseRemark350, parseMtrix } from './assembly';
 import { parseHelix, parseSheet } from './secondary-structure';
-import { guessElementSymbolTokens } from '../util';
 import { parseCmpnd, parseHetnam } from './entity';
 import { ComponentBuilder } from '../common/component';
 import { EntityBuilder } from '../common/entity';
 import { Column } from '../../../mol-data/db';
 import { getMoleculeType } from '../../../mol-model/structure/model/types';
-
-type AtomSiteTemplate = typeof getAtomSiteTemplate extends (...args: any) => infer T ? T : never
-function getAtomSiteTemplate(data: string, count: number) {
-    const str = () => [] as string[];
-    const ts = () => TokenBuilder.create(data, 2 * count);
-    return {
-        index: 0,
-        count,
-        group_PDB: ts(),
-        id: str(),
-        auth_atom_id: ts(),
-        label_alt_id: ts(),
-        auth_comp_id: ts(),
-        auth_asym_id: ts(),
-        auth_seq_id: ts(),
-        pdbx_PDB_ins_code: ts(),
-        Cartn_x: ts(),
-        Cartn_y: ts(),
-        Cartn_z: ts(),
-        occupancy: ts(),
-        B_iso_or_equiv: ts(),
-        type_symbol: ts(),
-        pdbx_PDB_model_num: str(),
-        label_entity_id: str()
-    };
-}
-
-function getAomSite(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
-    const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
-    const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
-    const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
-    const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
-
-    return {
-        auth_asym_id,
-        auth_atom_id,
-        auth_comp_id,
-        auth_seq_id,
-        B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
-        Cartn_x: CifField.ofTokens(sites.Cartn_x),
-        Cartn_y: CifField.ofTokens(sites.Cartn_y),
-        Cartn_z: CifField.ofTokens(sites.Cartn_z),
-        group_PDB: CifField.ofTokens(sites.group_PDB),
-        id: CifField.ofStrings(sites.id),
-
-        label_alt_id: CifField.ofTokens(sites.label_alt_id),
-
-        label_asym_id: auth_asym_id,
-        label_atom_id: auth_atom_id,
-        label_comp_id: auth_comp_id,
-        label_seq_id: auth_seq_id,
-        label_entity_id: CifField.ofStrings(sites.label_entity_id),
-
-        occupancy: CifField.ofTokens(sites.occupancy),
-        type_symbol: CifField.ofTokens(sites.type_symbol),
-
-        pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
-        pdbx_PDB_model_num: CifField.ofStrings(sites.pdbx_PDB_model_num)
-    };
-}
-
-function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
-    const { data: str } = data;
-    const length = e - s;
-
-    // TODO: filter invalid atoms
-
-    // COLUMNS        DATA TYPE       CONTENTS
-    // --------------------------------------------------------------------------------
-    // 1 -  6        Record name     "ATOM  "
-    TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
-
-    // 7 - 11        Integer         Atom serial number.
-    // TODO: support HEX
-    Tokenizer.trim(data, s + 6, s + 11);
-    sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
-
-    // 13 - 16        Atom            Atom name.
-    TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
-
-    // 17             Character       Alternate location indicator.
-    if (str.charCodeAt(s + 16) === 32) { // ' '
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    } else {
-        TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
-    }
-
-    // 18 - 20        Residue name    Residue name.
-    TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
-
-    // 22             Character       Chain identifier.
-    TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
-
-    // 23 - 26        Integer         Residue sequence number.
-    // TODO: support HEX
-    TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
-
-    // 27             AChar           Code for insertion of residues.
-    if (str.charCodeAt(s + 26) === 32) { // ' '
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    } else {
-        TokenBuilder.add(sites.label_alt_id, s + 26, s + 27);
-    }
-
-    // 31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
-    TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
-
-    // 39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
-    TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
-
-    // 47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
-    TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
-
-    // 55 - 60        Real(6.2)       Occupancy.
-    TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
-
-    // 61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
-    if (length >= 66) {
-        TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
-    } else {
-        TokenBuilder.add(sites.label_alt_id, 0, 0);
-    }
-
-    // 73 - 76        LString(4)      Segment identifier, left-justified.
-    // ignored
-
-    // 77 - 78        LString(2)      Element symbol, right-justified.
-    if (length >= 78) {
-        Tokenizer.trim(data, s + 76, s + 78);
-
-        if (data.tokenStart < data.tokenEnd) {
-            TokenBuilder.addToken(sites.type_symbol, data);
-        } else {
-            guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
-        }
-    } else {
-        guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
-    }
-
-    sites.pdbx_PDB_model_num[sites.index] = model;
-
-    sites.index++;
-}
+import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site';
 
 export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
     const { lines } = pdb;
@@ -301,7 +157,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
     const categories = {
         entity: entityBuilder.getEntityCategory(),
         chem_comp: componentBuilder.getChemCompCategory(),
-        atom_site: CifCategory.ofFields('atom_site', getAomSite(atomSite))
+        atom_site: CifCategory.ofFields('atom_site', getAtomSite(atomSite))
     } as any;
 
     for (const c of helperCategories) {