Browse Source

normalize atom_site early

Alexander Rose 3 years ago
parent
commit
d3b2c20c26

+ 2 - 9
src/mol-io/reader/cif/schema.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -53,18 +53,11 @@ function getColumnCtor(t: Column.Schema): ColumnCtor {
     }
 }
 
-function hasPresentValues(rowCount: number, valueKind: (row: number) => Column.ValueKind) {
-    for (let i = 0, il = rowCount; i < il; i++) {
-        if (valueKind(i) === Column.ValueKind.Present) return true;
-    }
-    return false;
-}
-
 function createColumn<T>(schema: Column.Schema, field: Data.CifField, value: (row: number) => T, toArray: Column<T>['toArray']): Column<T> {
     return {
         schema,
         __array: field.__array,
-        isDefined: field.isDefined && hasPresentValues(field.rowCount, field.valueKind),
+        isDefined: field.isDefined,
         rowCount: field.rowCount,
         value,
         valueKind: field.valueKind,

+ 2 - 14
src/mol-model-formats/structure/basic/atomic.ts

@@ -29,11 +29,10 @@ function findHierarchyOffsets(atom_site: AtomSite) {
     const start = 0, end = atom_site._rowCount;
     const residues = [start as ElementIndex], chains = [start as ElementIndex];
 
-    const { label_entity_id, label_asym_id, auth_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code } = atom_site;
-    const asym_id = label_asym_id.isDefined ? label_asym_id : auth_asym_id;
+    const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code } = atom_site;
 
     for (let i = start + 1 as ElementIndex; i < end; i++) {
-        const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !asym_id.areValuesEqual(i - 1, i);
+        const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !label_asym_id.areValuesEqual(i - 1, i);
         const newResidue = newChain
             || !label_seq_id.areValuesEqual(i - 1, i)
             || !auth_seq_id.areValuesEqual(i - 1, i)
@@ -46,11 +45,6 @@ function findHierarchyOffsets(atom_site: AtomSite) {
     return { residues, chains };
 }
 
-function substUndefinedColumn<T extends Table<any>>(table: T, a: keyof T, b: keyof T) {
-    if (!table[a].isDefined) table[a] = table[b];
-    if (!table[b].isDefined) table[b] = table[a];
-}
-
 function createHierarchyData(atom_site: AtomSite, sourceIndex: Column<number>, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
     const atoms = Table.ofColumns(AtomsSchema, {
         type_symbol: Column.ofArray({ array: Column.mapToArray(atom_site.type_symbol, ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }),
@@ -84,12 +78,6 @@ function createHierarchyData(atom_site: AtomSite, sourceIndex: Column<number>, o
     Table.columnToArray(residues, 'label_seq_id', Int32Array);
     Table.columnToArray(residues, 'auth_seq_id', Int32Array);
 
-    // Fix possibly missing auth_/label_ columns
-    substUndefinedColumn(atoms, 'label_atom_id', 'auth_atom_id');
-    substUndefinedColumn(atoms, 'label_comp_id', 'auth_comp_id');
-    substUndefinedColumn(residues, 'label_seq_id', 'auth_seq_id');
-    substUndefinedColumn(chains, 'label_asym_id', 'auth_asym_id');
-
     return { atoms, residues, chains, atomSourceIndex: sourceIndex };
 }
 

+ 30 - 8
src/mol-model-formats/structure/basic/parser.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -150,17 +150,40 @@ function findModelEnd(num: Column<number>, startIndex: number) {
     return endIndex;
 }
 
+function hasPresentValues(column: Column<any>) {
+    for (let i = 0, il = column.rowCount; i < il; i++) {
+        if (column.valueKind(i) === Column.ValueKind.Present) return true;
+    }
+    return false;
+}
+
+function substUndefinedColumn<T extends Table<any>>(table: T, a: keyof T, b: keyof T) {
+    if (!table[a].isDefined || !hasPresentValues(table[a])) table[a] = table[b];
+    if (!table[b].isDefined || !hasPresentValues(table[b])) table[b] = table[a];
+}
+
+/** Fix possibly missing auth_/label_ columns */
+function getNormalizeAtomSite(atom_site: AtomSite) {
+    const normalized = Table.ofColumns(atom_site._schema, atom_site);
+    substUndefinedColumn(normalized, 'label_atom_id', 'auth_atom_id');
+    substUndefinedColumn(normalized, 'label_comp_id', 'auth_comp_id');
+    substUndefinedColumn(normalized, 'label_seq_id', 'auth_seq_id');
+    substUndefinedColumn(normalized, 'label_asym_id', 'auth_asym_id');
+    return normalized;
+}
+
 async function readStandard(ctx: RuntimeContext, data: BasicData, properties: CommonProperties, format: ModelFormat) {
     const models: Model[] = [];
 
     if (data.atom_site) {
-        const atomCount = data.atom_site.id.rowCount;
+        const normalizedAtomSite = getNormalizeAtomSite(data.atom_site);
+        const atomCount = normalizedAtomSite.id.rowCount;
         const entities = getEntityData(data);
 
         let modelStart = 0;
         while (modelStart < atomCount) {
-            const modelEnd = findModelEnd(data.atom_site.pdbx_PDB_model_num, modelStart);
-            const { atom_site, sourceIndex } = await sortAtomSite(ctx, data.atom_site, modelStart, modelEnd);
+            const modelEnd = findModelEnd(normalizedAtomSite.pdbx_PDB_model_num, modelStart);
+            const { atom_site, sourceIndex } = await sortAtomSite(ctx, normalizedAtomSite, modelStart, modelEnd);
             const model = createStandardModel(data, atom_site, sourceIndex, entities, properties, format, models.length > 0 ? models[models.length - 1] : void 0);
             models.push(model);
             modelStart = modelEnd;
@@ -186,8 +209,6 @@ function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
     return ret;
 }
 
-
-
 async function readIntegrative(ctx: RuntimeContext, data: BasicData, properties: CommonProperties, format: ModelFormat) {
     const entities = getEntityData(data);
     // when `atom_site.ihm_model_id` is undefined fall back to `atom_site.pdbx_PDB_model_num`
@@ -208,15 +229,16 @@ async function readIntegrative(ctx: RuntimeContext, data: BasicData, properties:
         for (let i = 0; i < data.ihm_model_list._rowCount; i++) {
             const id = model_id.value(i);
 
+            const normalizedAtomSite = getNormalizeAtomSite(data.atom_site);
             let atom_site, atom_site_sourceIndex;
             if (atom_sites.has(id)) {
                 const e = atom_sites.get(id)!;
                 // need to sort `data.atom_site` as `e.start` and `e.end` are indices into that
-                const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, data.atom_site, e.start, e.end);
+                const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, normalizedAtomSite, e.start, e.end);
                 atom_site = sorted;
                 atom_site_sourceIndex = sourceIndex;
             } else {
-                atom_site = Table.window(data.atom_site, data.atom_site._schema, 0, 0);
+                atom_site = Table.window(normalizedAtomSite, normalizedAtomSite._schema, 0, 0);
                 atom_site_sourceIndex = Column.ofIntArray([]);
             }
 

+ 3 - 4
src/mol-model-formats/structure/basic/sort.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2018-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  */
@@ -18,12 +18,11 @@ export type SortedAtomSite = {
 export async function sortAtomSite(ctx: RuntimeContext, atom_site: AtomSite, start: number, end: number): Promise<SortedAtomSite> {
     const indices = createRangeArray(start, end - 1);
 
-    const { label_entity_id, label_asym_id, auth_asym_id, label_seq_id } = atom_site;
-    const asym_id = label_asym_id.isDefined ? label_asym_id : auth_asym_id;
+    const { label_entity_id, label_asym_id, label_seq_id } = atom_site;
     const entityBuckets = makeBuckets(indices, label_entity_id.value);
     if (ctx.shouldUpdate) await ctx.update();
     for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) {
-        const chainBuckets = makeBuckets(indices, asym_id.value, { start: entityBuckets[ei], end: entityBuckets[ei + 1] });
+        const chainBuckets = makeBuckets(indices, label_asym_id.value, { start: entityBuckets[ei], end: entityBuckets[ei + 1] });
         for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) {
             const aI = chainBuckets[cI];
             // are we in HETATM territory?