ソースを参照

Expose explicit bond orders from struct_conn in mmcif (#999)

* Expose explicit bond orders from struct_conn in mmcif

StructConn was referencing the wrong column name, it was using auth_seq_id instead of label_seq_id
The latter is mandatory by https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/struct_conn.html
This was causing no matches found during `getEntriesFromStructConn`
for building the bond lookup.

* update CHANGELOG and file headers

* Prefer auth_seq_id, fallback to label_seq_id

* case on presence instead of

* clarify changes in CHANGELOG

---------

Co-authored-by: David Sehnal <dsehnal@users.noreply.github.com>
Yakov Pechersky 1 年間 前
コミット
0199afd5f3

+ 1 - 0
CHANGELOG.md

@@ -6,6 +6,7 @@ Note that since we don't clearly distinguish between a public and private interf
 
 ## [Unreleased]
 
+- Use bonds from `_struct_conn` in mmCIF files that use `label_seq_id`
 - Fix measurement label `offsetZ` default: not needed when `scaleByRadius` is enbaled
 - Support for label rendering in HeadlessPluginContext
 - MolViewSpec extension

+ 7 - 0
src/mol-model-formats/structure/pdb/conect.ts

@@ -2,6 +2,7 @@
  * Copyright (c) 2021-2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Yakov Pechersky <ffxen158@gmail.com>
  */
 
 import { CifCategory, CifField } from '../../../mol-io/reader/cif';
@@ -20,12 +21,14 @@ export function parseConect(lines: Tokens, lineStart: number, lineEnd: number, s
     const conn_type_id: string[] = [];
 
     const ptnr1_label_asym_id: string[] = [];
+    const ptnr1_label_seq_id: number[] = [];
     const ptnr1_auth_seq_id: number[] = [];
     const ptnr1_label_atom_id: string[] = [];
     const ptnr1_label_alt_id: string[] = [];
     const ptnr1_PDB_ins_code: string[] = [];
 
     const ptnr2_label_asym_id: string[] = [];
+    const ptnr2_label_seq_id: number[] = [];
     const ptnr2_auth_seq_id: number[] = [];
     const ptnr2_label_atom_id: string[] = [];
     const ptnr2_label_alt_id: string[] = [];
@@ -59,12 +62,14 @@ export function parseConect(lines: Tokens, lineStart: number, lineEnd: number, s
             conn_type_id.push('covale');
 
             ptnr1_label_asym_id.push(sites.label_asym_id!.str(idxA));
+            ptnr1_label_seq_id.push(sites.label_seq_id!.int(idxA));
             ptnr1_auth_seq_id.push(sites.auth_seq_id!.int(idxA));
             ptnr1_label_atom_id.push(sites.label_atom_id!.str(idxA));
             ptnr1_label_alt_id.push(sites.label_alt_id!.str(idxA));
             ptnr1_PDB_ins_code.push(sites.pdbx_PDB_ins_code!.str(idxA));
 
             ptnr2_label_asym_id.push(sites.label_asym_id!.str(idxB));
+            ptnr2_label_seq_id.push(sites.label_seq_id!.int(idxB));
             ptnr2_auth_seq_id.push(sites.auth_seq_id!.int(idxB));
             ptnr2_label_atom_id.push(sites.label_atom_id!.str(idxB));
             ptnr2_label_alt_id.push(sites.label_alt_id!.str(idxB));
@@ -79,12 +84,14 @@ export function parseConect(lines: Tokens, lineStart: number, lineEnd: number, s
         conn_type_id: CifField.ofStrings(conn_type_id),
 
         ptnr1_label_asym_id: CifField.ofStrings(ptnr1_label_asym_id),
+        ptnr1_label_seq_id: CifField.ofNumbers(ptnr1_label_seq_id),
         ptnr1_auth_seq_id: CifField.ofNumbers(ptnr1_auth_seq_id),
         ptnr1_label_atom_id: CifField.ofStrings(ptnr1_label_atom_id),
         pdbx_ptnr1_label_alt_id: CifField.ofStrings(ptnr1_label_alt_id),
         pdbx_ptnr1_PDB_ins_code: CifField.ofStrings(ptnr1_PDB_ins_code),
 
         ptnr2_label_asym_id: CifField.ofStrings(ptnr2_label_asym_id),
+        ptnr2_label_seq_id: CifField.ofNumbers(ptnr2_label_seq_id),
         ptnr2_auth_seq_id: CifField.ofNumbers(ptnr2_auth_seq_id),
         ptnr2_label_atom_id: CifField.ofStrings(ptnr2_label_atom_id),
         pdbx_ptnr2_label_alt_id: CifField.ofStrings(ptnr2_label_alt_id),

+ 10 - 2
src/mol-model-formats/structure/property/bonds/struct_conn.ts

@@ -3,6 +3,7 @@
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Yakov Pechersky <ffxen158@gmail.com>
  */
 
 import { Model } from '../../../../mol-model/structure/model/model';
@@ -94,6 +95,7 @@ export namespace StructConn {
         const { conn_type_id, pdbx_dist_value, pdbx_value_order } = struct_conn;
         const p1 = {
             label_asym_id: struct_conn.ptnr1_label_asym_id,
+            label_seq_id: struct_conn.ptnr1_label_seq_id,
             auth_seq_id: struct_conn.ptnr1_auth_seq_id,
             label_atom_id: struct_conn.ptnr1_label_atom_id,
             label_alt_id: struct_conn.pdbx_ptnr1_label_alt_id,
@@ -102,6 +104,7 @@ export namespace StructConn {
         };
         const p2: typeof p1 = {
             label_asym_id: struct_conn.ptnr2_label_asym_id,
+            label_seq_id: struct_conn.ptnr2_label_seq_id,
             auth_seq_id: struct_conn.ptnr2_auth_seq_id,
             label_atom_id: struct_conn.ptnr2_label_atom_id,
             label_alt_id: struct_conn.pdbx_ptnr2_label_alt_id,
@@ -117,13 +120,18 @@ export namespace StructConn {
             // turns out "mismat" records might not have atom name value
             if (!atomName) return undefined;
 
+            // prefer auth_seq_id, but if it is absent, then fall back to label_seq_id
+            const resId = (ps.auth_seq_id.valueKind(row) === Column.ValueKind.Present) ?
+                ps.auth_seq_id.value(row) :
+                ps.label_seq_id.value(row);
+            const resInsCode = ps.ins_code.value(row);
             const altId = ps.label_alt_id.value(row);
             for (const eId of entityIds) {
                 const residueIndex = model.atomicHierarchy.index.findResidue(
                     eId,
                     asymId,
-                    ps.auth_seq_id.value(row),
-                    ps.ins_code.value(row)
+                    resId,
+                    resInsCode
                 );
                 if (residueIndex < 0) continue;
                 const atomIndex = model.atomicHierarchy.index.findAtomOnResidue(residueIndex, atomName, altId);