Browse Source

surroundingLigands query wip

dsehnal 4 years ago
parent
commit
73f6793bd8

+ 40 - 24
src/mol-model/structure/model/properties/utils/residue-set.ts

@@ -11,7 +11,7 @@ export interface ResidueSetEntry {
     label_asym_id: string,
     label_comp_id: string,
     label_seq_id: number,
-    alt_id: string,
+    label_alt_id: string,
     ins_code: string,
     // 1_555 by default
     operator_name?: string
@@ -19,7 +19,7 @@ export interface ResidueSetEntry {
 
 export class ResidueSet {
     private index = new Map<string, Map<number, ResidueSetEntry[]>>();
-    private list: ResidueSetEntry[] = [];
+    private checkOperator: boolean = false;
 
     add(entry: ResidueSetEntry) {
         let root = this.index.get(entry.label_asym_id);
@@ -37,46 +37,55 @@ export class ResidueSet {
         const exists = this._find(entry, entries);
         if (!exists) {
             entries.push(entry);
-            this.list.push(entry);
             return true;
         }
 
         return false;
     }
 
-    private _asym_id = StructureProperties.chain.label_asym_id;
-    private _seq_id = StructureProperties.residue.label_seq_id;
-    private _comp_id = StructureProperties.atom.label_comp_id;
-    private _alt_id = StructureProperties.atom.label_alt_id;
-    private _ins_code = StructureProperties.residue.pdbx_PDB_ins_code;
-    private _op_name = StructureProperties.unit.operator_name;
+    getLabel(entry: ResidueSetEntry) {
+        return `${entry.label_asym_id} ${entry.label_comp_id} ${entry.label_seq_id}:${entry.ins_code}:${entry.label_alt_id}${this.checkOperator ? ' ' + (entry.operator_name ?? '1_555') : ''}`;
+    }
+
+    hasLabelAsymId(asym_id: string) {
+        return this.index.has(asym_id);
+    }
 
     has(loc: StructureElement.Location) {
-        const asym_id = this._asym_id(loc);
-        if (!this.index.has(asym_id)) return false;
+        const asym_id = _asym_id(loc);
+        if (!this.index.has(asym_id)) return;
         const root = this.index.get(asym_id)!;
-        const seq_id = this._seq_id(loc);
-        if (!root.has(seq_id)) return false;
+        const seq_id = _seq_id(loc);
+        if (!root.has(seq_id)) return;
         const entries = root.get(seq_id)!;
 
-        const comp_id = this._comp_id(loc);
-        const alt_id = this._alt_id(loc);
-        const ins_code = this._ins_code(loc);
-        const op_name = this._op_name(loc) ?? '1_555';
+        const comp_id = _comp_id(loc);
+        const alt_id = _alt_id(loc);
+        const ins_code = _ins_code(loc);
+        const op_name = _op_name(loc) ?? '1_555';
 
         for (const e of entries) {
-            if (e.label_comp_id !== comp_id || e.alt_id !== alt_id || e.ins_code !== ins_code) continue;
+            if (e.label_comp_id !== comp_id || e.label_alt_id !== alt_id || e.ins_code !== ins_code) continue;
             if (this.checkOperator && (e.operator_name ?? '1_555') !== op_name) continue;
 
-            return true;
+            return e;
         }
+    }
 
-        return false;
+    static getEntryFromLocation(loc: StructureElement.Location): ResidueSetEntry {
+        return {
+            label_asym_id: _asym_id(loc),
+            label_comp_id: _comp_id(loc),
+            label_seq_id: _seq_id(loc),
+            label_alt_id: _alt_id(loc),
+            ins_code: _ins_code(loc),
+            operator_name: _op_name(loc) ?? '1_555'
+        };
     }
 
     private _find(entry: ResidueSetEntry, xs: ResidueSetEntry[]) {
         for (const e of xs) {
-            if (e.label_comp_id !== entry.label_comp_id || e.alt_id !== entry.alt_id || e.ins_code !== entry.ins_code) continue;
+            if (e.label_comp_id !== entry.label_comp_id || e.label_alt_id !== entry.label_alt_id || e.ins_code !== entry.ins_code) continue;
             if (this.checkOperator && (e.operator_name ?? '1_555') !== (entry.operator_name ?? '1_555')) continue;
 
             return true;
@@ -85,7 +94,14 @@ export class ResidueSet {
         return false;
     }
 
-    constructor(private checkOperator = false) {
-
+    constructor(options?: { checkOperator?: boolean }) {
+        this.checkOperator = options?.checkOperator ?? false;
     }
-}
+}
+
+const _asym_id = StructureProperties.chain.label_asym_id;
+const _seq_id = StructureProperties.residue.label_seq_id;
+const _comp_id = StructureProperties.atom.label_comp_id;
+const _alt_id = StructureProperties.atom.label_alt_id;
+const _ins_code = StructureProperties.residue.pdbx_PDB_ins_code;
+const _op_name = StructureProperties.unit.operator_name;

+ 162 - 23
src/mol-model/structure/query/queries/modifiers.ts

@@ -17,6 +17,9 @@ import { StructureSubsetBuilder } from '../../structure/util/subset-builder';
 import { StructureElement } from '../../structure/element';
 import { MmcifFormat } from '../../../../mol-model-formats/structure/mmcif';
 import { UndirectedGraph } from '../../../../mol-math/graph/undirected-graph';
+import { ResidueSet, ResidueSetEntry } from '../../model/properties/utils/residue-set';
+import { StructureProperties } from '../../structure/properties';
+import { arraySetAdd } from '../../../../mol-util/array';
 
 function getWholeResidues(ctx: QueryContext, source: Structure, structure: Structure) {
     const builder = source.subsetBuilder(true);
@@ -440,7 +443,7 @@ function expandConnected(ctx: QueryContext, structure: Structure) {
 export interface SurroundingLigandsParams {
     query: StructureQuery,
     radius: number,
-    computedExpand: boolean
+    includeWater: boolean
 }
 
 /**
@@ -452,20 +455,141 @@ export function surroundingLigands({ query, radius }: SurroundingLigandsParams):
         const source = StructureSelection.unionStructure(query(ctx));
         const surroundings = getWholeResidues(ctx, ctx.inputStructure, getIncludeSurroundings(ctx, ctx.inputStructure, source, { radius }));
 
-        // find ligand component pivots
-        //   - keep non-polymers & non-waters
+        const prd = getPrdAsymIdx(ctx.inputStructure);
+        const { residues, graph } = getStructConnInfo(ctx.inputStructure);
 
-        // expand ligand components
-        //   - expand PRD chains
-        //   - expand components based on struct_conn
+        const l = StructureElement.Location.create(surroundings);
 
-        // add waters
+        const includedPrdChains = new Map<string, string[]>();
+        const includedComponents = new Set<string>();
 
-        return 0 as any;
+        const componentResidues = new ResidueSet({ checkOperator: true });
+
+        for (const unit of surroundings.units) {
+            if (unit.kind !== Unit.Kind.Atomic) continue;
+
+            l.unit = unit;
+
+            const { elements } = unit;
+            const chainsIt = Segmentation.transientSegments(unit.model.atomicHierarchy.chainAtomSegments, elements);
+            const residuesIt = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, elements);
+
+            while (chainsIt.hasNext) {
+                const chainSegment = chainsIt.move();
+                l.element = elements[chainSegment.start];
+
+                const asym_id = StructureProperties.chain.label_asym_id(l);
+                const op_name = StructureProperties.unit.operator_name(l);
+
+                // check for PRD molecules
+                if (prd.has(asym_id)) {
+                    if (includedPrdChains.has(asym_id)) {
+                        arraySetAdd(includedPrdChains.get(asym_id)!, op_name);
+                    } else {
+                        includedPrdChains.set(asym_id, [op_name]);
+                    }
+                    continue;
+                }
+
+                const entityType = StructureProperties.entity.type(l);
+
+                // test entity and chain
+                if (entityType === 'water' || entityType === 'polymer') continue;
+
+                residuesIt.setSegment(chainSegment);
+                while (residuesIt.hasNext) {
+                    const residueSegment = residuesIt.move();
+                    l.element = elements[residueSegment.start];
+
+                    const connEntry = residues.has(l);
+                    if (connEntry) {
+                        const { pivot, component } = graph.getComponent(residues.getLabel(connEntry));
+
+                        if (includedComponents.has(pivot)) continue;
+
+                        includedComponents.add(pivot);
+
+                        for (const v of component) {
+                            const entry = graph.vertices.get(v)!;
+                            if (!prd.has(entry?.label_asym_id)) {
+                                componentResidues.add(entry);
+                            }
+                        }
+                    } else {
+                        const entry = ResidueSet.getEntryFromLocation(l);
+                        componentResidues.add(entry);
+                    }
+                }
+            }
+
+            ctx.throwIfTimedOut();
+        }
+
+        // assemble the core structure
+
+        const builder = ctx.inputStructure.subsetBuilder(true);
+        for (const unit of ctx.inputStructure.units) {
+            if (unit.kind !== Unit.Kind.Atomic) continue;
+
+            l.unit = unit;
+            const { elements } = unit;
+            const chainsIt = Segmentation.transientSegments(unit.model.atomicHierarchy.chainAtomSegments, elements);
+            const residuesIt = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, elements);
+
+            builder.beginUnit(unit.id);
+
+            while (chainsIt.hasNext) {
+                const chainSegment = chainsIt.move();
+                l.element = elements[chainSegment.start];
+
+                const asym_id = StructureProperties.chain.label_asym_id(l);
+                const op_name = StructureProperties.unit.operator_name(l);
+
+                if (includedPrdChains.has(asym_id) && includedPrdChains.get(asym_id)!.indexOf(op_name) >= 0) {
+                    builder.addElementRange(elements, chainSegment.start, chainSegment.end);
+                    continue;
+                }
+
+                if (!componentResidues.hasLabelAsymId(asym_id)) {
+                    continue;
+                }
+
+                residuesIt.setSegment(chainSegment);
+                while (residuesIt.hasNext) {
+                    const residueSegment = residuesIt.move();
+                    l.element = elements[residueSegment.start];
+
+                    if (!componentResidues.has(l)) continue;
+                    builder.addElementRange(elements, residueSegment.start, residueSegment.end);
+                }
+            }
+            builder.commitUnit();
+
+            ctx.throwIfTimedOut();
+        }
+
+        const components = builder.getStructure();
+
+        // const builder = new StructureUniqueSubsetBuilder(source);
+        // const lookup = source.lookup3d;
+        // const r = params.radius;
+
+        // for (const unit of structure.units) {
+        //     const { x, y, z } = unit.conformation;
+        //     const elements = unit.elements;
+        //     for (let i = 0, _i = elements.length; i < _i; i++) {
+        //         const e = elements[i];
+        //         lookup.findIntoBuilder(x(e), y(e), z(e), r, builder);
+        //     }
+
+        //     ctx.throwIfTimedOut();
+        // }
+
+        return StructureSelection.Sequence(ctx.inputStructure, [components]);
     };
 }
 
-function getPrdAsymcIdx(structure: Structure) {
+function getPrdAsymIdx(structure: Structure) {
     const model = structure.models[0];
     const ids = new Set<string>();
     if (!MmcifFormat.is(model.sourceData)) return ids;
@@ -476,35 +600,50 @@ function getPrdAsymcIdx(structure: Structure) {
     return ids;
 }
 
-type SurroudingResidue = [asym_id:string, comp_id:string, seq_id:number, alt_id: string, ins_code: string, symmetry:string]
-
-function getSurroundingResidueLabel(r: SurroudingResidue) {
-    return `${r[0]} ${r[1]} ${r[2]} ${r[3]} ${r[4]} ${r[5]}`;
-}
-
-function structConnGraph(structure: Structure): UndirectedGraph<string, SurroudingResidue> {
+function getStructConnInfo(structure: Structure) {
     const model = structure.models[0];
-    const graph = new UndirectedGraph<string, SurroudingResidue>();
-    if (!MmcifFormat.is(model.sourceData)) return graph;
+    const graph = new UndirectedGraph<string, ResidueSetEntry>();
+    const residues = new ResidueSet({ checkOperator: true });
+
+    if (!MmcifFormat.is(model.sourceData)) return { graph, residues };
 
     const struct_conn = model.sourceData.data.db.struct_conn;
     const { conn_type_id } = struct_conn;
     const { ptnr1_label_asym_id, ptnr1_label_comp_id, ptnr1_label_seq_id, ptnr1_symmetry, pdbx_ptnr1_label_alt_id, pdbx_ptnr1_PDB_ins_code } = struct_conn;
     const { ptnr2_label_asym_id, ptnr2_label_comp_id, ptnr2_label_seq_id, ptnr2_symmetry, pdbx_ptnr2_label_alt_id, pdbx_ptnr2_PDB_ins_code } = struct_conn;
+
     for (let i = 0; i < struct_conn._rowCount; i++) {
         const bondType = conn_type_id.value(i);
         if (bondType !== 'covale' && bondType !== 'metalc') continue;
 
-        const a: SurroudingResidue = [ptnr1_label_asym_id.value(i), ptnr1_label_comp_id.value(i), ptnr1_label_seq_id.value(i), pdbx_ptnr1_label_alt_id.value(i), pdbx_ptnr1_PDB_ins_code.value(i), ptnr1_symmetry.value(i) ?? '1_555'];
-        const b: SurroudingResidue = [ptnr2_label_asym_id.value(i), ptnr2_label_comp_id.value(i), ptnr2_label_seq_id.value(i), pdbx_ptnr2_label_alt_id.value(i), pdbx_ptnr2_PDB_ins_code.value(i), ptnr2_symmetry.value(i) ?? '1_555'];
-
-        const la = getSurroundingResidueLabel(a), lb = getSurroundingResidueLabel(b);
+        const a: ResidueSetEntry = {
+            label_asym_id: ptnr1_label_asym_id.value(i),
+            label_comp_id: ptnr1_label_comp_id.value(i),
+            label_seq_id: ptnr1_label_seq_id.value(i),
+            label_alt_id: pdbx_ptnr1_label_alt_id.value(i),
+            ins_code: pdbx_ptnr1_PDB_ins_code.value(i),
+            operator_name: ptnr1_symmetry.value(i) ?? '1_555'
+        };
+
+        const b: ResidueSetEntry = {
+            label_asym_id: ptnr2_label_asym_id.value(i),
+            label_comp_id: ptnr2_label_comp_id.value(i),
+            label_seq_id: ptnr2_label_seq_id.value(i),
+            label_alt_id: pdbx_ptnr2_label_alt_id.value(i),
+            ins_code: pdbx_ptnr2_PDB_ins_code.value(i),
+            operator_name: ptnr2_symmetry.value(i) ?? '1_555'
+        };
+
+        residues.add(a);
+        residues.add(b);
+
+        const la = residues.getLabel(a), lb = residues.getLabel(b);
 
         graph.addVertex(la, a);
         graph.addVertex(lb, b);
         graph.addEdge(la, lb);
     }
-    return graph;
+    return { graph, residues };
 }
 
 // TODO: unionBy (skip this one?), cluster

+ 13 - 0
src/mol-plugin-state/helpers/structure-selection-query.ts

@@ -425,6 +425,18 @@ const surroundings = StructureSelectionQuery('Surrounding Residues (5 \u212B) of
     referencesCurrent: true
 });
 
+const surroundingLigands = StructureSelectionQuery('Surrounding Ligands (5 \u212B) of Selection', MS.struct.modifier.union([
+    MS.struct.modifier.surroundingLigands({
+        0: MS.internal.generator.current(),
+        radius: 5,
+        'include-water': true
+    })
+]), {
+    description: 'Select ligand components within 5 \u212B of the current selection.',
+    category: StructureSelectionCategory.Manipulate,
+    referencesCurrent: true
+});
+
 const complement = StructureSelectionQuery('Inverse / Complement of Selection', MS.struct.modifier.union([
     MS.struct.modifier.exceptBy({
         0: MS.struct.generator.all(),
@@ -645,6 +657,7 @@ export const StructureSelectionQueries = {
     ring,
     aromaticRing,
     surroundings,
+    surroundingLigands,
     complement,
     covalentlyBonded,
     covalentlyOrMetallicBonded,

+ 6 - 0
src/mol-script/language/symbol-table/structure-query.ts

@@ -155,6 +155,12 @@ const modifier = {
         'as-whole-residues': Argument(Type.Bool, { isOptional: true })
     }), Types.ElementSelectionQuery, 'For each atom set in the selection, include all surrouding atoms/residues that are within the specified radius.'),
 
+    surroundingLigands: symbol(Arguments.Dictionary({
+        0: Argument(Types.ElementSelectionQuery),
+        radius: Argument(Type.Num),
+        'include-water': Argument(Type.Bool, { isOptional: true, defaultValue: true })
+    }), Types.ElementSelectionQuery, 'Find all ligands components around the source query.'),
+
     includeConnected: symbol(Arguments.Dictionary({
         0: Argument(Types.ElementSelectionQuery),
         'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }),

+ 7 - 0
src/mol-script/runtime/query/table.ts

@@ -258,6 +258,13 @@ const symbols = [
             elementRadius: xs['atom-radius']
         })(ctx);
     }),
+    D(MolScript.structureQuery.modifier.surroundingLigands, function structureQuery_modifier_includeSurroundingLigands(ctx, xs) {
+        return Queries.modifiers.surroundingLigands({
+            query: xs[0] as any,
+            radius: xs['radius'](ctx),
+            includeWater: !!(xs['include-water'] && xs['include-water'](ctx)),
+        })(ctx);
+    }),
     D(MolScript.structureQuery.modifier.wholeResidues, function structureQuery_modifier_wholeResidues(ctx, xs) { return Queries.modifiers.wholeResidues(xs[0] as any)(ctx); }),
     D(MolScript.structureQuery.modifier.union, function structureQuery_modifier_union(ctx, xs) { return Queries.modifiers.union(xs[0] as any)(ctx); }),
     D(MolScript.structureQuery.modifier.expandProperty, function structureQuery_modifier_expandProperty(ctx, xs) { return Queries.modifiers.expandProperty(xs[0] as any, xs['property'])(ctx); }),

+ 1 - 0
src/mol-script/script/mol-script/symbols.ts

@@ -138,6 +138,7 @@ export const SymbolTable = [
             Alias(MolScript.structureQuery.modifier.union, 'sel.atom.union'),
             Alias(MolScript.structureQuery.modifier.cluster, 'sel.atom.cluster'),
             Alias(MolScript.structureQuery.modifier.includeSurroundings, 'sel.atom.include-surroundings'),
+            Alias(MolScript.structureQuery.modifier.surroundingLigands, 'sel.atom.surrounding-ligands'),
             Alias(MolScript.structureQuery.modifier.includeConnected, 'sel.atom.include-connected'),
             Alias(MolScript.structureQuery.modifier.expandProperty, 'sel.atom.expand-property'),