Browse Source

sort mmCIF atom_site

David Sehnal 6 years ago
parent
commit
a8d4302839

+ 10 - 2
src/mol-data/util/_spec/buckets.spec.ts

@@ -18,16 +18,24 @@ describe('buckets', () => {
     it('full range', () => {
         const xs = [1, 1, 2, 2, 3, 1];
         const range = createRangeArray(0, xs.length - 1);
-        const bs = makeBuckets(range, i => xs[i]);
+        const bs = makeBuckets(range, i => xs[i], false);
 
         expect(reorder(range, xs)).toEqual([1, 1, 1, 2, 2, 3]);
         expect(Array.from(bs)).toEqual([0, 3, 5, 6]);
     });
 
+    it('sort', () => {
+        const xs = [3, 1, 2, 1, 2, 3];
+        const range = createRangeArray(0, xs.length - 1);
+        makeBuckets(range, i => xs[i], true);
+
+        expect(reorder(range, xs)).toEqual([1, 1, 2, 2, 3, 3]);
+    });
+
     it('subrange', () => {
         const xs = [2, 1, 2, 1, 2, 3, 1];
         const range = createRangeArray(0, xs.length - 1);
-        const bs = makeBuckets(range, i => xs[i], 1, 5);
+        const bs = makeBuckets(range, i => xs[i], false, 1, 5);
 
         expect(reorder(range, xs)).toEqual([2, 1, 1, 2, 2, 3, 1]);
         expect(Array.from(bs)).toEqual([1, 3, 5]);

+ 20 - 5
src/mol-data/util/buckets.ts

@@ -5,11 +5,12 @@
  */
 
 type Bucket = {
+    key: any,
     count: number,
     offset: number
 }
 
-function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, start: number, end: number) {
+function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, sort: boolean, start: number, end: number) {
     const buckets = new Map<any, Bucket>();
     const bucketList: Bucket[] = [];
 
@@ -21,7 +22,7 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
             buckets.get(key)!.count++;
             if (prevKey !== key) isBucketed = false;
         } else {
-            const bucket: Bucket = { count: 1, offset: i };
+            const bucket: Bucket = { key, count: 1, offset: i };
             buckets.set(key, bucket);
             bucketList[bucketList.length] = bucket;
         }
@@ -31,11 +32,25 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
     const bucketOffsets = new Int32Array(bucketList.length + 1);
     bucketOffsets[bucketList.length] = end;
 
-    if (isBucketed) {
+    let sorted = true;
+    if (sort) {
+        for (let i = 1, _i = bucketList.length; i < _i; i++) {
+            if (bucketList[i - 1].key > bucketList[i].key) {
+                sorted = false;
+                break;
+            }
+        }
+    }
+
+    if (isBucketed && sorted) {
         for (let i = 0; i < bucketList.length; i++) bucketOffsets[i] = bucketList[i].offset;
         return bucketOffsets;
     }
 
+    if (sort && !sorted) {
+        bucketList.sort((x, y) => x.key <= y.key ? -1 : 1);
+    }
+
     let offset = 0;
     for (let i = 0; i < bucketList.length; i++) {
         const b = bucketList[i];
@@ -64,11 +79,11 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
  * Reorders indices so that the same keys are next to each other, [start, end)
  * Returns the offsets of buckets. So that [offsets[i], offsets[i + 1]) determines the range.
  */
-export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, start?: number, end?: number): ArrayLike<number> {
+export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, sort: boolean, start?: number, end?: number): ArrayLike<number> {
     const s = start || 0;
     const e = typeof end === 'undefined' ? indices.length : end;
 
     if (e - s <= 0) throw new Error('Can only bucket non-empty collections.');
 
-    return _makeBuckets(indices, getKey, s, e);
+    return _makeBuckets(indices, getKey, sort, s, e);
 }

+ 3 - 0
src/mol-model/structure/model/formats/mmcif.ts

@@ -23,6 +23,7 @@ import { getSequence } from './mmcif/sequence';
 import mmCIF_Format = Format.mmCIF
 import { Task } from 'mol-task';
 import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
+import { sortAtomSite } from './mmcif/sort';
 
 function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
     const num = data.atom_site.pdbx_PDB_model_num;
@@ -196,6 +197,8 @@ function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
         let modelStart = 0;
         while (modelStart < atomCount) {
             const bounds = findModelBounds(format, modelStart);
+
+            // const indices = await sortAtomSite(ctx, format.data.atom_site, 0, Interval.end(bounds));
             const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0);
             models.push(model);
             modelStart = Interval.end(bounds);

+ 32 - 0
src/mol-model/structure/model/formats/mmcif/sort.ts

@@ -0,0 +1,32 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
+import { createRangeArray, makeBuckets } from 'mol-data/util';
+import { Column } from 'mol-data/db';
+import { RuntimeContext } from 'mol-task';
+
+export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Database['atom_site'], start: number, end: number) {
+    const indices = createRangeArray(start, end - 1);
+
+    const { label_entity_id, label_asym_id, label_seq_id } = atom_site;
+    const entityBuckets = makeBuckets(indices, label_entity_id.value, false);
+    if (ctx.shouldUpdate) await ctx.update();
+    for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) {
+        const chainBuckets = makeBuckets(indices, label_asym_id.value, false, entityBuckets[ei], entityBuckets[ei + 1]);
+        for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) {
+            const aI = chainBuckets[cI];
+            // are we in HETATM territory?
+            if (label_seq_id.valueKind(aI) !== Column.ValueKind.Present) continue;
+
+            makeBuckets(indices, label_seq_id.value, true, aI, chainBuckets[cI + 1]);
+            if (ctx.shouldUpdate) await ctx.update();
+        }
+        if (ctx.shouldUpdate) await ctx.update();
+    }
+
+    return indices;
+}