Browse Source

Merge pull request #812 from valasatava/pdb-to-cif-header

Parse HEADER record when reading PDB file
Alexander Rose 1 year ago
parent
commit
adab6b0a6a

+ 1 - 0
CHANGELOG.md

@@ -14,6 +14,7 @@ Note that since we don't clearly distinguish between a public and private interf
 - Fix an edge case in the `mol-state`'s `State` when trying to apply a transform to an existing Null object
 - Add `SbNcbrPartialCharges` extension for coloring and labeling atoms and residues by partial atomic charges
   - uses custom mmcif categories `_sb_ncbr_partial_atomic_charges_meta` and `_sb_ncbr_partial_atomic_charges` (more info in [README.md](./src/extensions/sb-ncbr/README.md))
+- Parse HEADER record when reading PDB file
 
 ## [v3.34.0] - 2023-04-16
 

+ 2 - 1
package.json

@@ -98,7 +98,8 @@
     "David Williams <dwilliams@nobiastx.com>",
     "Zhenyu Zhang <jump2cn@gmail.com>",
     "Russell Parker <russell@benchling.com>",
-    "Dominik Tichy <tichydominik451@gmail.com>"
+    "Dominik Tichy <tichydominik451@gmail.com>",
+    "Yana Rose <yana.v.rose@gmail.com>"
   ],
   "license": "MIT",
   "devDependencies": {

+ 31 - 0
src/mol-model-formats/structure/pdb/header.ts

@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Yana Rose <yana.v.rose@gmail.com>
+ */
+export type PdbHeaderData = {
+    id_code?: string,
+    dep_date?: string,
+    classification?: string
+};
+
+export function addHeader(data: string, s: number, e: number, header: PdbHeaderData) {
+
+    //     COLUMNS       DATA  TYPE     FIELD             DEFINITION
+    // ------------------------------------------------------------------------------------
+    //  1 -  6       Record name    "HEADER"
+    // 11 - 50       String(40)     classification    Classifies the molecule(s).
+    // 51 - 59       Date           depDate           Deposition date. This is the date the
+    //                                                coordinates  were received at the PDB.
+    // 63 - 66       IDcode         idCode            This identifier is unique within the PDB.
+
+    // PDB to PDBx/mmCIF Data Item Correspondences
+    // classification  	  _struct_keywords.pdbx_keywords
+    // depDate  	      _pdbx_database_status.recvd_initial_deposition_date
+    // idCode  	          _entry.id
+
+    const line = data.substring(s, e);
+    header.id_code = line.substring(62, 66).trim() || undefined;
+    header.dep_date = line.substring(50, 59).trim() || undefined;
+    header.classification = line.substring(10, 50).trim() || undefined;
+}

+ 28 - 3
src/mol-model-formats/structure/pdb/to-cif.ts

@@ -3,10 +3,11 @@
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Yana Rose <yana.v.rose@gmail.com>
  */
 
 import { substringStartsWith } from '../../../mol-util/string';
-import { CifCategory, CifFrame } from '../../../mol-io/reader/cif';
+import { CifCategory, CifField, CifFrame } from '../../../mol-io/reader/cif';
 import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
 import { PdbFile } from '../../../mol-io/reader/pdb/schema';
 import { parseCryst1, parseRemark350, parseMtrix } from './assembly';
@@ -20,6 +21,8 @@ import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site';
 import { addAnisotropic, getAnisotropicTemplate, getAnisotropic } from './anisotropic';
 import { parseConect } from './conect';
 import { isDebugMode } from '../../../mol-util/debug';
+import { PdbHeaderData, addHeader } from './header';
+import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
 
 export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
     const { lines } = pdb;
@@ -42,7 +45,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
                 break;
         }
     }
-
+    const header: PdbHeaderData = {};
     const atomSite = getAtomSiteTemplate(data, atomCount);
     const anisotropic = getAnisotropicTemplate(data, anisotropicCount);
     const entityBuilder = new EntityBuilder();
@@ -94,7 +97,9 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
                 }
                 break;
             case 'H':
-                if (substringStartsWith(data, s, e, 'HETATM')) {
+                if (substringStartsWith(data, s, e, 'HEADER')) {
+                    addHeader(data, s, e, header);
+                } else if (substringStartsWith(data, s, e, 'HETATM')) {
                     if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
                     addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt);
                 } else if (substringStartsWith(data, s, e, 'HELIX')) {
@@ -169,6 +174,26 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
         }
     }
 
+    // build entry, struct_keywords and pdbx_database_status
+    if (header.id_code) {
+        const entry: CifCategory.SomeFields<mmCIF_Schema['entry']> = {
+            id: CifField.ofString(header.id_code)
+        };
+        helperCategories.push(CifCategory.ofFields('entry', entry));
+    }
+    if (header.classification) {
+        const struct_keywords: CifCategory.SomeFields<mmCIF_Schema['struct_keywords']> = {
+            pdbx_keywords: CifField.ofString(header.classification)
+        };
+        helperCategories.push(CifCategory.ofFields('struct_keywords', struct_keywords));
+    }
+    if (header.dep_date) {
+        const pdbx_database_status: CifCategory.SomeFields<mmCIF_Schema['pdbx_database_status']> = {
+            recvd_initial_deposition_date: CifField.ofString(header.dep_date)
+        };
+        helperCategories.push(CifCategory.ofFields('pdbx_database_status', pdbx_database_status));
+    }
+
     // build entity and chem_comp categories
     const seqIds = Column.ofIntTokens(atomSite.auth_seq_id);
     const atomIds = Column.ofStringTokens(atomSite.auth_atom_id);