Bladeren bron

Merge pull request #141 from molstar/surrounding-ligands

Surrounding Ligands query
David Sehnal 4 jaren geleden
bovenliggende
commit
7f355ae501

+ 107 - 0
src/mol-model/structure/model/properties/utils/residue-set.ts

@@ -0,0 +1,107 @@
+/**
+ * Copyright (c) 2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { StructureElement } from '../../../structure/element';
+import { StructureProperties } from '../../../structure/properties';
+
+export interface ResidueSetEntry {
+    label_asym_id: string,
+    label_comp_id: string,
+    label_seq_id: number,
+    label_alt_id: string,
+    ins_code: string,
+    // 1_555 by default
+    operator_name?: string
+}
+
+export class ResidueSet {
+    private index = new Map<string, Map<number, ResidueSetEntry[]>>();
+    private checkOperator: boolean = false;
+
+    add(entry: ResidueSetEntry) {
+        let root = this.index.get(entry.label_asym_id);
+        if (!root) {
+            root = new Map();
+            this.index.set(entry.label_asym_id, root);
+        }
+
+        let entries = root.get(entry.label_seq_id);
+        if (!entries) {
+            entries = [];
+            root.set(entry.label_seq_id, entries);
+        }
+
+        const exists = this._find(entry, entries);
+        if (!exists) {
+            entries.push(entry);
+            return true;
+        }
+
+        return false;
+    }
+
+    hasLabelAsymId(asym_id: string) {
+        return this.index.has(asym_id);
+    }
+
+    has(loc: StructureElement.Location) {
+        const asym_id = _asym_id(loc);
+        if (!this.index.has(asym_id)) return;
+        const root = this.index.get(asym_id)!;
+        const seq_id = _seq_id(loc);
+        if (!root.has(seq_id)) return;
+        const entries = root.get(seq_id)!;
+
+        const comp_id = _comp_id(loc);
+        const alt_id = _alt_id(loc);
+        const ins_code = _ins_code(loc);
+        const op_name = _op_name(loc) ?? '1_555';
+
+        for (const e of entries) {
+            if (e.label_comp_id !== comp_id || e.label_alt_id !== alt_id || e.ins_code !== ins_code) continue;
+            if (this.checkOperator && (e.operator_name ?? '1_555') !== op_name) continue;
+
+            return e;
+        }
+    }
+
+    static getLabel(entry: ResidueSetEntry, checkOperator = false) {
+        return `${entry.label_asym_id} ${entry.label_comp_id} ${entry.label_seq_id}:${entry.ins_code}:${entry.label_alt_id}${checkOperator ? ' ' + (entry.operator_name ?? '1_555') : ''}`;
+    }
+
+    static getEntryFromLocation(loc: StructureElement.Location): ResidueSetEntry {
+        return {
+            label_asym_id: _asym_id(loc),
+            label_comp_id: _comp_id(loc),
+            label_seq_id: _seq_id(loc),
+            label_alt_id: _alt_id(loc),
+            ins_code: _ins_code(loc),
+            operator_name: _op_name(loc) ?? '1_555'
+        };
+    }
+
+    private _find(entry: ResidueSetEntry, xs: ResidueSetEntry[]) {
+        for (const e of xs) {
+            if (e.label_comp_id !== entry.label_comp_id || e.label_alt_id !== entry.label_alt_id || e.ins_code !== entry.ins_code) continue;
+            if (this.checkOperator && (e.operator_name ?? '1_555') !== (entry.operator_name ?? '1_555')) continue;
+
+            return true;
+        }
+
+        return false;
+    }
+
+    constructor(options?: { checkOperator?: boolean }) {
+        this.checkOperator = options?.checkOperator ?? false;
+    }
+}
+
+const _asym_id = StructureProperties.chain.label_asym_id;
+const _seq_id = StructureProperties.residue.label_seq_id;
+const _comp_id = StructureProperties.atom.label_comp_id;
+const _alt_id = StructureProperties.atom.label_alt_id;
+const _ins_code = StructureProperties.residue.pdbx_PDB_ins_code;
+const _op_name = StructureProperties.unit.operator_name;

+ 253 - 1
src/mol-model/structure/query/queries/modifiers.ts

@@ -11,10 +11,14 @@ import { StructureSelection } from '../selection';
 import { UniqueStructuresBuilder } from '../utils/builders';
 import { StructureUniqueSubsetBuilder } from '../../structure/util/unique-subset-builder';
 import { QueryContext, QueryFn } from '../context';
-import { structureIntersect, structureSubtract } from '../utils/structure-set';
+import { structureIntersect, structureSubtract, structureUnion } from '../utils/structure-set';
 import { UniqueArray } from '../../../../mol-data/generic';
 import { StructureSubsetBuilder } from '../../structure/util/subset-builder';
 import { StructureElement } from '../../structure/element';
+import { MmcifFormat } from '../../../../mol-model-formats/structure/mmcif';
+import { ResidueSet, ResidueSetEntry } from '../../model/properties/utils/residue-set';
+import { StructureProperties } from '../../structure/properties';
+import { arraySetAdd } from '../../../../mol-util/array';
 
 function getWholeResidues(ctx: QueryContext, source: Structure, structure: Structure) {
     const builder = source.subsetBuilder(true);
@@ -435,4 +439,252 @@ function expandConnected(ctx: QueryContext, structure: Structure) {
     return builder.getStructure();
 }
 
+export interface SurroundingLigandsParams {
+    query: StructureQuery,
+    radius: number,
+    includeWater: boolean
+}
+
+/**
+ * Includes expanded surrounding ligands based on radius from the source, struct_conn entries & pdbx_molecule entries.
+ */
+export function surroundingLigands({ query, radius, includeWater }: SurroundingLigandsParams): StructureQuery {
+    return function query_surroundingLigands(ctx) {
+
+        const inner = StructureSelection.unionStructure(query(ctx));
+        const surroundings = getWholeResidues(ctx, ctx.inputStructure, getIncludeSurroundings(ctx, ctx.inputStructure, inner, { radius }));
+
+        const prd = getPrdAsymIdx(ctx.inputStructure);
+        const graph = getStructConnInfo(ctx.inputStructure);
+
+        const l = StructureElement.Location.create(surroundings);
+
+        const includedPrdChains = new Map<string, string[]>();
+
+        const componentResidues = new ResidueSet({ checkOperator: true });
+
+        for (const unit of surroundings.units) {
+            if (unit.kind !== Unit.Kind.Atomic) continue;
+
+            l.unit = unit;
+
+            const { elements } = unit;
+            const chainsIt = Segmentation.transientSegments(unit.model.atomicHierarchy.chainAtomSegments, elements);
+            const residuesIt = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, elements);
+
+            while (chainsIt.hasNext) {
+                const chainSegment = chainsIt.move();
+                l.element = elements[chainSegment.start];
+
+                const asym_id = StructureProperties.chain.label_asym_id(l);
+                const op_name = StructureProperties.unit.operator_name(l);
+
+                // check for PRD molecules
+                if (prd.has(asym_id)) {
+                    if (includedPrdChains.has(asym_id)) {
+                        arraySetAdd(includedPrdChains.get(asym_id)!, op_name);
+                    } else {
+                        includedPrdChains.set(asym_id, [op_name]);
+                    }
+                    continue;
+                }
+
+                const entityType = StructureProperties.entity.type(l);
+
+                // test entity and chain
+                if (entityType === 'water' || entityType === 'polymer') continue;
+
+                residuesIt.setSegment(chainSegment);
+                while (residuesIt.hasNext) {
+                    const residueSegment = residuesIt.move();
+                    l.element = elements[residueSegment.start];
+                    graph.addComponent(ResidueSet.getEntryFromLocation(l), componentResidues);
+                }
+            }
+
+            ctx.throwIfTimedOut();
+        }
+
+        // assemble the core structure
+
+        const builder = ctx.inputStructure.subsetBuilder(true);
+        for (const unit of ctx.inputStructure.units) {
+            if (unit.kind !== Unit.Kind.Atomic) continue;
+
+            l.unit = unit;
+            const { elements } = unit;
+            const chainsIt = Segmentation.transientSegments(unit.model.atomicHierarchy.chainAtomSegments, elements);
+            const residuesIt = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, elements);
+
+            builder.beginUnit(unit.id);
+
+            while (chainsIt.hasNext) {
+                const chainSegment = chainsIt.move();
+                l.element = elements[chainSegment.start];
+
+                const asym_id = StructureProperties.chain.label_asym_id(l);
+                const op_name = StructureProperties.unit.operator_name(l);
+
+                if (includedPrdChains.has(asym_id) && includedPrdChains.get(asym_id)!.indexOf(op_name) >= 0) {
+                    builder.addElementRange(elements, chainSegment.start, chainSegment.end);
+                    continue;
+                }
+
+                if (!componentResidues.hasLabelAsymId(asym_id)) {
+                    continue;
+                }
+
+                residuesIt.setSegment(chainSegment);
+                while (residuesIt.hasNext) {
+                    const residueSegment = residuesIt.move();
+                    l.element = elements[residueSegment.start];
+
+                    if (!componentResidues.has(l)) continue;
+                    builder.addElementRange(elements, residueSegment.start, residueSegment.end);
+                }
+            }
+            builder.commitUnit();
+
+            ctx.throwIfTimedOut();
+        }
+
+        const components = structureUnion(ctx.inputStructure, [builder.getStructure(), inner]);
+
+        // add water
+        if (includeWater) {
+            const finalBuilder = new StructureUniqueSubsetBuilder(ctx.inputStructure);
+            const lookup = ctx.inputStructure.lookup3d;
+            for (const unit of components.units) {
+                const { x, y, z } = unit.conformation;
+                const elements = unit.elements;
+                for (let i = 0, _i = elements.length; i < _i; i++) {
+                    const e = elements[i];
+                    lookup.findIntoBuilderIf(x(e), y(e), z(e), radius, finalBuilder, testIsWater);
+                    finalBuilder.addToUnit(unit.id, e);
+                }
+
+                ctx.throwIfTimedOut();
+            }
+
+            return StructureSelection.Sequence(ctx.inputStructure, [finalBuilder.getStructure()]);
+        } else {
+            return StructureSelection.Sequence(ctx.inputStructure, [components]);
+        }
+    };
+}
+
+const _entity_type = StructureProperties.entity.type;
+function testIsWater(l: StructureElement.Location) {
+    return _entity_type(l) === 'water';
+}
+
+function getPrdAsymIdx(structure: Structure) {
+    const model = structure.models[0];
+    const ids = new Set<string>();
+    if (!MmcifFormat.is(model.sourceData)) return ids;
+    const { _rowCount, asym_id } = model.sourceData.data.db.pdbx_molecule;
+    for (let i = 0; i < _rowCount; i++) {
+        ids.add(asym_id.value(i));
+    }
+    return ids;
+}
+
+function getStructConnInfo(structure: Structure) {
+    const model = structure.models[0];
+    const graph = new StructConnGraph();
+
+    if (!MmcifFormat.is(model.sourceData)) return graph;
+
+    const struct_conn = model.sourceData.data.db.struct_conn;
+    const { conn_type_id } = struct_conn;
+    const { ptnr1_label_asym_id, ptnr1_label_comp_id, ptnr1_label_seq_id, ptnr1_symmetry, pdbx_ptnr1_label_alt_id, pdbx_ptnr1_PDB_ins_code } = struct_conn;
+    const { ptnr2_label_asym_id, ptnr2_label_comp_id, ptnr2_label_seq_id, ptnr2_symmetry, pdbx_ptnr2_label_alt_id, pdbx_ptnr2_PDB_ins_code } = struct_conn;
+
+    for (let i = 0; i < struct_conn._rowCount; i++) {
+        const bondType = conn_type_id.value(i);
+        if (bondType !== 'covale' && bondType !== 'metalc') continue;
+
+        const a: ResidueSetEntry = {
+            label_asym_id: ptnr1_label_asym_id.value(i),
+            label_comp_id: ptnr1_label_comp_id.value(i),
+            label_seq_id: ptnr1_label_seq_id.value(i),
+            label_alt_id: pdbx_ptnr1_label_alt_id.value(i),
+            ins_code: pdbx_ptnr1_PDB_ins_code.value(i),
+            operator_name: ptnr1_symmetry.value(i) ?? '1_555'
+        };
+
+        const b: ResidueSetEntry = {
+            label_asym_id: ptnr2_label_asym_id.value(i),
+            label_comp_id: ptnr2_label_comp_id.value(i),
+            label_seq_id: ptnr2_label_seq_id.value(i),
+            label_alt_id: pdbx_ptnr2_label_alt_id.value(i),
+            ins_code: pdbx_ptnr2_PDB_ins_code.value(i),
+            operator_name: ptnr2_symmetry.value(i) ?? '1_555'
+        };
+
+        graph.addEdge(a, b);
+    }
+
+    return graph;
+}
+
+class StructConnGraph {
+    vertices = new Map<string, ResidueSetEntry>();
+    edges = new Map<string, string[]>();
+
+    private addVertex(e: ResidueSetEntry, label: string) {
+        if (this.vertices.has(label)) return;
+        this.vertices.set(label, e);
+        this.edges.set(label, []);
+    }
+
+    addEdge(a: ResidueSetEntry, b: ResidueSetEntry) {
+        const al = ResidueSet.getLabel(a);
+        const bl = ResidueSet.getLabel(b);
+        this.addVertex(a, al);
+        this.addVertex(b, bl);
+        arraySetAdd(this.edges.get(al)!, bl);
+        arraySetAdd(this.edges.get(bl)!, al);
+    }
+
+    addComponent(start: ResidueSetEntry, set: ResidueSet) {
+        const startLabel = ResidueSet.getLabel(start);
+
+        if (!this.vertices.has(startLabel)) {
+            set.add(start);
+            return;
+        }
+
+        const visited = new Set<string>();
+        const added = new Set<string>();
+        const stack = [startLabel];
+
+        added.add(startLabel);
+        set.add(start);
+
+        while (stack.length > 0) {
+            const a = stack.pop()!;
+            visited.add(a);
+
+            const u = this.vertices.get(a)!;
+
+            for (const b of this.edges.get(a)!) {
+                if (visited.has(b)) continue;
+                stack.push(b);
+
+                if (added.has(b)) continue;
+                added.add(b);
+
+                const v = this.vertices.get(b)!;
+                if (u.operator_name === v.operator_name) {
+                    set.add({ ...v, operator_name: start.operator_name });
+                } else {
+                    set.add(v);
+                }
+
+            }
+        }
+    }
+}
+
 // TODO: unionBy (skip this one?), cluster

+ 30 - 0
src/mol-model/structure/structure/util/lookup3d.ts

@@ -94,6 +94,36 @@ export class StructureLookup3D {
         }
     }
 
+    findIntoBuilderIf(x: number, y: number, z: number, radius: number, builder: StructureUniqueSubsetBuilder, test: (l: StructureElement.Location) => boolean) {
+        const { units } = this.structure;
+        const closeUnits = this.unitLookup.find(x, y, z, radius);
+        if (closeUnits.count === 0) return;
+
+        const loc = StructureElement.Location.create(this.structure);
+
+        for (let t = 0, _t = closeUnits.count; t < _t; t++) {
+            const unit = units[closeUnits.indices[t]];
+            Vec3.set(this.pivot, x, y, z);
+            if (!unit.conformation.operator.isIdentity) {
+                Vec3.transformMat4(this.pivot, this.pivot, unit.conformation.operator.inverse);
+            }
+            const unitLookup = unit.lookup3d;
+            const groupResult = unitLookup.find(this.pivot[0], this.pivot[1], this.pivot[2], radius);
+            if (groupResult.count === 0) continue;
+
+            const elements = unit.elements;
+            loc.unit = unit;
+            builder.beginUnit(unit.id);
+            for (let j = 0, _j = groupResult.count; j < _j; j++) {
+                loc.element = elements[groupResult.indices[j]];
+                if (test(loc)) {
+                    builder.addElement(loc.element);
+                }
+            }
+            builder.commitUnit();
+        }
+    }
+
     findIntoBuilderWithRadius(x: number, y: number, z: number, pivotR: number, maxRadius: number, radius: number, eRadius: StructureElement.Property<number>, builder: StructureUniqueSubsetBuilder) {
         const { units } = this.structure;
         const closeUnits = this.unitLookup.find(x, y, z, radius);

+ 13 - 0
src/mol-plugin-state/helpers/structure-selection-query.ts

@@ -425,6 +425,18 @@ const surroundings = StructureSelectionQuery('Surrounding Residues (5 \u212B) of
     referencesCurrent: true
 });
 
+const surroundingLigands = StructureSelectionQuery('Surrounding Ligands (5 \u212B) of Selection', MS.struct.modifier.union([
+    MS.struct.modifier.surroundingLigands({
+        0: MS.internal.generator.current(),
+        radius: 5,
+        'include-water': true
+    })
+]), {
+    description: 'Select ligand components within 5 \u212B of the current selection.',
+    category: StructureSelectionCategory.Manipulate,
+    referencesCurrent: true
+});
+
 const complement = StructureSelectionQuery('Inverse / Complement of Selection', MS.struct.modifier.union([
     MS.struct.modifier.exceptBy({
         0: MS.struct.generator.all(),
@@ -645,6 +657,7 @@ export const StructureSelectionQueries = {
     ring,
     aromaticRing,
     surroundings,
+    surroundingLigands,
     complement,
     covalentlyBonded,
     covalentlyOrMetallicBonded,

+ 6 - 0
src/mol-script/language/symbol-table/structure-query.ts

@@ -155,6 +155,12 @@ const modifier = {
         'as-whole-residues': Argument(Type.Bool, { isOptional: true })
     }), Types.ElementSelectionQuery, 'For each atom set in the selection, include all surrouding atoms/residues that are within the specified radius.'),
 
+    surroundingLigands: symbol(Arguments.Dictionary({
+        0: Argument(Types.ElementSelectionQuery),
+        radius: Argument(Type.Num),
+        'include-water': Argument(Type.Bool, { isOptional: true, defaultValue: true })
+    }), Types.ElementSelectionQuery, 'Find all ligands components around the source query.'),
+
     includeConnected: symbol(Arguments.Dictionary({
         0: Argument(Types.ElementSelectionQuery),
         'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }),

+ 7 - 0
src/mol-script/runtime/query/table.ts

@@ -258,6 +258,13 @@ const symbols = [
             elementRadius: xs['atom-radius']
         })(ctx);
     }),
+    D(MolScript.structureQuery.modifier.surroundingLigands, function structureQuery_modifier_includeSurroundingLigands(ctx, xs) {
+        return Queries.modifiers.surroundingLigands({
+            query: xs[0] as any,
+            radius: xs['radius'](ctx),
+            includeWater: !!(xs['include-water'] && xs['include-water'](ctx)),
+        })(ctx);
+    }),
     D(MolScript.structureQuery.modifier.wholeResidues, function structureQuery_modifier_wholeResidues(ctx, xs) { return Queries.modifiers.wholeResidues(xs[0] as any)(ctx); }),
     D(MolScript.structureQuery.modifier.union, function structureQuery_modifier_union(ctx, xs) { return Queries.modifiers.union(xs[0] as any)(ctx); }),
     D(MolScript.structureQuery.modifier.expandProperty, function structureQuery_modifier_expandProperty(ctx, xs) { return Queries.modifiers.expandProperty(xs[0] as any, xs['property'])(ctx); }),

+ 1 - 0
src/mol-script/script/mol-script/symbols.ts

@@ -138,6 +138,7 @@ export const SymbolTable = [
             Alias(MolScript.structureQuery.modifier.union, 'sel.atom.union'),
             Alias(MolScript.structureQuery.modifier.cluster, 'sel.atom.cluster'),
             Alias(MolScript.structureQuery.modifier.includeSurroundings, 'sel.atom.include-surroundings'),
+            Alias(MolScript.structureQuery.modifier.surroundingLigands, 'sel.atom.surrounding-ligands'),
             Alias(MolScript.structureQuery.modifier.includeConnected, 'sel.atom.include-connected'),
             Alias(MolScript.structureQuery.modifier.expandProperty, 'sel.atom.expand-property'),
 

+ 3 - 0
src/servers/model/CHANGELOG.md

@@ -1,3 +1,6 @@
+# 0.9.7
+* add Surrounding Ligands query
+
 # 0.9.6
 * optional download parameter
 

+ 29 - 1
src/servers/model/server/api.ts

@@ -136,6 +136,13 @@ const AssemblyNameParam: QueryParamInfo = {
     description: 'Assembly name. If none is provided, crystal symmetry (where available) or deposited model is used.'
 };
 
+const OmitWaterParam: QueryParamInfo = {
+    name: 'omit_water',
+    type: QueryParamType.Boolean,
+    required: false,
+    defaultValue: false
+};
+
 function Q<Params = any>(definition: Partial<QueryDefinition<Params>>) {
     return definition;
 }
@@ -223,7 +230,28 @@ const QueryMap = {
         jsonParams: [ AtomSiteTestJsonParam, RadiusParam ],
         restParams: [ ...AtomSiteTestRestParams, RadiusParam ],
         filter: QuerySchemas.interaction
-    })
+    }),
+    'surroundingLigands': Q<{ atom_site: AtomSiteSchema, radius: number, assembly_name: string, omit_water: boolean }>({
+        niceName: 'Surrounding Ligands',
+        description: 'Identifies (complete) ligands within the given radius from the source atom set. Takes crystal symmetry into account.',
+        query(p) {
+            const tests = getAtomsTests(p.atom_site);
+            const center = Queries.combinators.merge(tests.map(test => Queries.generators.atoms({
+                ...test,
+                entityTest: test.entityTest
+                    ? ctx => test.entityTest!(ctx) && ctx.element.unit.conformation.operator.isIdentity
+                    : ctx => ctx.element.unit.conformation.operator.isIdentity
+            })));
+            return Queries.modifiers.surroundingLigands({ query: center, radius: p.radius !== void 0 ? p.radius : 5, includeWater: !p.omit_water });
+        },
+        structureTransform(p, s) {
+            if (p.assembly_name) return StructureSymmetry.buildAssembly(s, '' + p.assembly_name).run();
+            return StructureSymmetry.builderSymmetryMates(s, p.radius !== void 0 ? p.radius : 5).run();
+        },
+        jsonParams: [ AtomSiteTestJsonParam, RadiusParam, OmitWaterParam, AssemblyNameParam ],
+        restParams: [ ...AtomSiteTestRestParams, RadiusParam, OmitWaterParam, AssemblyNameParam ],
+        filter: QuerySchemas.interaction
+    }),
 };
 
 export type QueryName = keyof typeof QueryMap

+ 1 - 0
src/servers/model/server/query.ts

@@ -248,6 +248,7 @@ async function resolveJobEntry(entry: JobEntry, structure: StructureWrapper, enc
 
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter(entry.queryDefinition.filter);
         if (result.length > 0) encode_mmCIF_categories(encoder, result, { copyAllCategories: entry.copyAllCategories });
+        else ConsoleLogger.logId(entry.job.id, 'Warning', `Empty result for Query ${entry.key}/${entry.queryDefinition.name}`);
         if (entry.transform && !Mat4.isIdentity(entry.transform)) GlobalModelTransformInfo.writeMmCif(encoder, entry.transform);
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter();
         perf.end('encode');

+ 1 - 1
src/servers/model/version.ts

@@ -4,4 +4,4 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-export const VERSION = '0.9.6';
+export const VERSION = '0.9.7';