Explorar el Código

mol-model: filter sequence related in CIF export by present entity ids

David Sehnal hace 6 años
padre
commit
2fa188dcfe

+ 7 - 19
src/mol-model/structure/export/categories/misc.ts

@@ -5,21 +5,18 @@
  */
 
 import { Column } from 'mol-data/db';
-import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
 import { CifWriter } from 'mol-io/writer/cif';
-import { unionMany } from 'mol-util/set';
-import { Model } from '../../model';
 import { CifExportContext } from '../mmcif';
+import { getModelMmCifCategory, getUniqueResidueNamesFromStructures } from './utils';
 import CifCategory = CifWriter.Category
-import { Structure } from '../../structure';
 
 export const _chem_comp: CifCategory<CifExportContext> = {
     name: 'chem_comp',
-    instance({ structures, cache }) {
-        const chem_comp = getCifCategory(structures[0].model, 'chem_comp');
+    instance({ firstModel, structures, cache }) {
+        const chem_comp = getModelMmCifCategory(structures[0].model, 'chem_comp');
         if (!chem_comp) return CifCategory.Empty;
         const { id } = chem_comp;
-        const names = cache.uniqueResidueNames || (cache.uniqueResidueNames = getUniqueResidueNames(structures));
+        const names = cache.uniqueResidueNames || (cache.uniqueResidueNames = getUniqueResidueNamesFromStructures(structures));
         const indices = Column.indicesOf(id, id => names.has(id));
         return CifCategory.ofTable(chem_comp, indices);
     }
@@ -27,21 +24,12 @@ export const _chem_comp: CifCategory<CifExportContext> = {
 
 export const _pdbx_chem_comp_identifier: CifCategory<CifExportContext> = {
     name: 'pdbx_chem_comp_identifier',
-    instance({ structures, cache }) {
-        const pdbx_chem_comp_identifier = getCifCategory(structures[0].model, 'pdbx_chem_comp_identifier');
+    instance({ firstModel, structures, cache }) {
+        const pdbx_chem_comp_identifier = getModelMmCifCategory(firstModel, 'pdbx_chem_comp_identifier');
         if (!pdbx_chem_comp_identifier) return CifCategory.Empty;
         const { comp_id } = pdbx_chem_comp_identifier;
-        const names = cache.uniqueResidueNames || (cache.uniqueResidueNames = getUniqueResidueNames(structures));
+        const names = cache.uniqueResidueNames || (cache.uniqueResidueNames = getUniqueResidueNamesFromStructures(structures));
         const indices = Column.indicesOf(comp_id, id => names.has(id));
         return CifCategory.ofTable(pdbx_chem_comp_identifier, indices);
     }
-}
-
-function getCifCategory<K extends keyof mmCIF_Schema>(model: Model, name: K): mmCIF_Database[K] | undefined {
-    if (model.sourceData.kind !== 'mmCIF') return;
-    return model.sourceData.data[name];
-}
-
-function getUniqueResidueNames(structures: Structure[]) {
-    return unionMany(structures.map(s => s.uniqueResidueNames));
 }

+ 36 - 0
src/mol-model/structure/export/categories/sequence.ts

@@ -0,0 +1,36 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column } from 'mol-data/db';
+import { CifWriter } from 'mol-io/writer/cif';
+import { Structure } from '../../structure';
+import { CifExportContext } from '../mmcif';
+import { getModelMmCifCategory, getUniqueEntityIdsFromStructures } from './utils';
+import CifCategory = CifWriter.Category
+
+export const _struct_asym: CifCategory<CifExportContext> = createCategory('struct_asym');
+export const _entity_poly: CifCategory<CifExportContext> = createCategory('entity_poly');
+export const _entity_poly_seq: CifCategory<CifExportContext> = createCategory('entity_poly_seq');
+
+function createCategory(categoryName: 'struct_asym' | 'entity_poly' | 'entity_poly_seq'): CifCategory<CifExportContext> {
+    return {
+        name: categoryName,
+        instance({ structures, cache }) {
+            return getCategoryInstance(structures, categoryName, cache);
+        }
+    };
+}
+
+function getCategoryInstance(structures: Structure[], categoryName: 'struct_asym' | 'entity_poly' | 'entity_poly_seq', cache: any) {
+    const category = getModelMmCifCategory(structures[0].model, categoryName);
+    if (!category) return CifCategory.Empty;
+    const { entity_id } = category;
+    const names = cache.uniqueEntityIds || (cache.uniqueEntityIds = getUniqueEntityIdsFromStructures(structures));
+    const indices = Column.indicesOf(entity_id, id => names.has(id));
+    return CifCategory.ofTable(category, indices);
+
+}
+

+ 42 - 0
src/mol-model/structure/export/categories/utils.ts

@@ -0,0 +1,42 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { unionMany } from 'mol-util/set';
+import { Model } from '../../model';
+import { Structure } from '../../structure';
+import { EntityIndex } from '../../model/indexing';
+import { UniqueArray } from 'mol-data/generic';
+import { sortArray } from 'mol-data/util';
+
+export function getModelMmCifCategory<K extends keyof mmCIF_Schema>(model: Model, name: K): mmCIF_Database[K] | undefined {
+    if (model.sourceData.kind !== 'mmCIF') return;
+    return model.sourceData.data[name];
+}
+
+export function getUniqueResidueNamesFromStructures(structures: Structure[]) {
+    return unionMany(structures.map(s => s.uniqueResidueNames));
+}
+
+export function getUniqueEntityIdsFromStructures(structures: Structure[]): Set<string> {
+    if (structures.length === 0) return new Set();
+
+    const names = structures[0].model.entities.data.id;
+    return new Set(getUniqueEntityIndicesFromStructures(structures).map(i => names.value(i)));
+}
+
+export function getUniqueEntityIndicesFromStructures(structures: Structure[]): ReadonlyArray<EntityIndex> {
+    if (structures.length === 0) return [];
+    if (structures.length === 1) return structures[0].entityIndices;
+    const ret = UniqueArray.create<EntityIndex, EntityIndex>();
+    for (const s of structures) {
+        for (const e of s.entityIndices) {
+            UniqueArray.add(ret, e, e);
+        }
+    }
+    sortArray(ret.array);
+    return ret.array;
+}

+ 6 - 4
src/mol-model/structure/export/mmcif.ts

@@ -14,6 +14,8 @@ import { _struct_conf, _struct_sheet_range } from './categories/secondary-struct
 import { _pdbx_struct_mod_residue } from './categories/modified-residues';
 import { _chem_comp, _pdbx_chem_comp_identifier } from './categories/misc';
 import { Model } from '../model';
+import { getUniqueEntityIndicesFromStructures } from './categories/utils';
+import { _struct_asym, _entity_poly, _entity_poly_seq } from './categories/sequence';
 
 export interface CifExportContext {
     structures: Structure[],
@@ -48,7 +50,7 @@ function copy_mmCif_category(name: keyof mmCIF_Schema): CifCategory<CifExportCon
 const _entity: CifCategory<CifExportContext> = {
     name: 'entity',
     instance({ structures }) {
-        const indices = structures[0].entityIndices;
+        const indices = getUniqueEntityIndicesFromStructures(structures);
         return CifCategory.ofTable(structures[0].model.entities.data, indices);
     }
 }
@@ -73,9 +75,9 @@ const Categories = [
     _struct_sheet_range,
 
     // Sequence
-    copy_mmCif_category('struct_asym'), // TODO: filter only present entities?
-    copy_mmCif_category('entity_poly'), // TODO: filter only present entities?
-    copy_mmCif_category('entity_poly_seq'), // TODO: filter only present entities?
+    _struct_asym,
+    _entity_poly,
+    _entity_poly_seq,
 
     // Branch
     copy_mmCif_category('pdbx_entity_branch'),

+ 9 - 0
src/mol-util/set.ts

@@ -31,6 +31,15 @@ export function unionMany<T>(sets: Set<T>[]) {
     return union;
 }
 
+export function unionManyArrays<T>(arrays: T[][]) {
+    if (arrays.length === 0) return new Set<T>();
+    const union = new Set(arrays[0]);
+    for (let i = 1; i < arrays.length; i++) {
+        for (const elem of arrays[i]) union.add(elem);
+    }
+    return union;
+}
+
 /** Create set containing elements of set a that are also in set b. */
 export function intersection<T>(setA: Set<T>, setB: Set<T>): Set<T> {
     const intersection = new Set();