Browse Source

Merge pull request #7 from molstar/dev-sb-fu

Support file upload as part of motif queries
Sebastian Bittrich 2 năm trước cách đây
mục cha
commit
d1a88b8302

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
     "name": "@rcsb/rcsb-molstar",
-    "version": "2.5.11",
+    "version": "2.6.0-dev.6",
     "description": "RCSB PDB apps and props based on Mol*.",
     "homepage": "https://github.com/molstar/rcsb-molstar#readme",
     "repository": {

+ 45 - 150
src/viewer/ui/strucmotif.tsx

@@ -19,23 +19,22 @@ import { StructureSelectionHistoryEntry } from 'molstar/lib/mol-plugin-state/man
 import { StructureElement, StructureProperties } from 'molstar/lib/mol-model/structure/structure';
 import { ToggleSelectionModeButton } from 'molstar/lib/mol-plugin-ui/structure/selection';
 import { OrderedSet } from 'molstar/lib/mol-data/int';
-import { DefaultExchanges, ExchangesControl } from './exchanges';
-import { Vec3 } from 'molstar/lib/mol-math/linear-algebra/3d/vec3';
-import { Structure } from 'molstar/lib/mol-model/structure/structure/structure';
+import { DefaultExchanges, ExchangesControl } from './strucmotif/exchanges';
 import { Unit } from 'molstar/lib/mol-model/structure/structure/unit';
-import { UnitIndex } from 'molstar/lib/mol-model/structure/structure/element/element';
 import { ViewerState } from '../types';
+import { MAX_EXCHANGES, MAX_MOTIF_SIZE, MIN_MOTIF_SIZE, validate } from './strucmotif/validation';
+import {
+    createCtx,
+    detectDataSource,
+    ExchangeState,
+    extractResidues,
+    ResidueSelection,
+    uploadStructure
+} from './strucmotif/helpers';
 
 const ABSOLUTE_ADVANCED_SEARCH_URL = 'https://rcsb.org/search?query=';
 const RELATIVE_ADVANCED_SEARCH_URL = '/search?query=';
 const RETURN_TYPE = '&return_type=assembly';
-const CSM_REGEX = /^[A-Z0-9]+_[A-Z0-9]{6,}$/i;
-const CSM_TAG = '&include_csm=true';
-const MIN_MOTIF_SIZE = 2;
-const MAX_MOTIF_SIZE = 10;
-export const MAX_EXCHANGES = 4;
-const MAX_MOTIF_EXTENT = 15;
-const MAX_MOTIF_EXTENT_SQUARED = MAX_MOTIF_EXTENT * MAX_MOTIF_EXTENT;
 
 /**
  * The top-level component that exposes the strucmotif search.
@@ -64,12 +63,6 @@ const _SearchIcon = <svg width='24px' height='24px' viewBox='0 0 12 12'>
 </svg>;
 export function SearchIconSvg() { return _SearchIcon; }
 
-const location = StructureElement.Location.create(void 0);
-
-type ExchangeState = number;
-type ResidueSelection = { label_asym_id: string, struct_oper_id: string, label_seq_id: number }
-type Exchange = { residue_id: ResidueSelection, allowed: string[] }
-
 /**
  * The inner component of strucmotif search that can be collapsed.
  */
@@ -97,154 +90,54 @@ class SubmitControls extends PurePluginUIComponent<{}, { isBusy: boolean, residu
         return this.plugin.managers.structure.selection;
     }
 
-    submitSearch = () => {
-        const { label_atom_id, x, y, z } = StructureProperties.atom;
-        // keep track of seen pdbIds, space-groups, and NCS operators - motifs can only have a single value
-        const pdbId: Set<string> = new Set();
-        const sg: Set<number> = new Set();
-        const hkl: Set<string> = new Set();
-        const ncs: Set<number> = new Set();
-        const residueIds: ResidueSelection[] = [];
-        const exchanges: Exchange[] = [];
-        const coordinates: { coords: Vec3, residueId: ResidueSelection }[] = [];
-
-        /**
-         * This sets the 'location' to the backbone atom (CA or C4').
-         * @param structure context
-         * @param element wraps atom indices of this residue
-         */
-        const determineBackboneAtom = (structure: Structure, element: { unit: Unit; indices: OrderedSet<UnitIndex> }) => {
-            const { indices } = element;
-            for (let i = 0, il = OrderedSet.size(indices); i < il; i++) {
-                StructureElement.Location.set(location, structure, element.unit, element.unit.elements[OrderedSet.getAt(indices, i)]);
-                const atomLabelId = label_atom_id(location);
-                if ('CA' === atomLabelId || `C4'` === atomLabelId) {
-                    return true;
-                }
-            }
-            return false;
-        };
-
-        function join(opers: any[]) {
-            // this makes the assumptions that '1' is the identity operator
-            if (!opers || !opers.length) return '1';
-            if (opers.length > 1) {
-                // Mol* operators are right-to-left
-                return opers[1] + 'x' + opers[0];
-            }
-            return opers[0];
-        }
-
+    submitSearch = async () => {
         const loci = this.plugin.managers.structure.selection.additionsHistory;
-        for (let i = 0; i < Math.min(MAX_MOTIF_SIZE, loci.length); i++) {
-            const l = loci[i];
-            const { structure, elements } = l.loci;
-
-            // only first element and only first index will be considered (ignoring multiple residues)
-            if (!determineBackboneAtom(structure, elements[0])) {
-                alert(`No CA or C4' atom for ${StructureProperties.residue.label_seq_id(location)} | ${StructureProperties.chain.label_asym_id(location)} | ${join(StructureProperties.unit.pdbx_struct_oper_list_ids(location))}`);
-                return;
-            }
+        if (loci.length < MIN_MOTIF_SIZE) return;
 
-            pdbId.add(structure.model.entry);
-            sg.add(StructureProperties.unit.spgrOp(location));
-            hkl.add(StructureProperties.unit.hkl(location).join('-'));
-            ncs.add(StructureProperties.unit.struct_ncs_oper_id(location));
-
-            const struct_oper_list_ids = StructureProperties.unit.pdbx_struct_oper_list_ids(location);
-            const struct_oper_id = join(struct_oper_list_ids);
-
-            // handle pure residue-info
-            const residueId = {
-                label_asym_id: StructureProperties.chain.label_asym_id(location),
-                // can be empty array if model is selected
-                struct_oper_id,
-                label_seq_id: StructureProperties.residue.label_seq_id(location)
-            };
-            residueIds.push(residueId);
-
-            // retrieve CA/C4', used to compute residue distance
-            const coords = [x(location), y(location), z(location)] as Vec3;
-            coordinates.push({ coords, residueId });
-
-            // handle potential exchanges - can be empty if deselected by users
-            const residueMapEntry = this.state.residueMap.get(l)!;
-            if (residueMapEntry.exchanges?.size > 0) {
-                if (residueMapEntry.exchanges.size > MAX_EXCHANGES) {
-                    alert(`Maximum number of exchanges per position is ${MAX_EXCHANGES} - Please remove some exchanges from residue ${residueId.label_seq_id} | ${residueId.label_asym_id} | ${residueId.struct_oper_id}.`);
-                    return;
-                }
-                exchanges.push({ residue_id: residueId, allowed: Array.from(residueMapEntry.exchanges.values()) });
-            }
-        }
+        const ctx = createCtx(this.plugin, loci[0].loci.structure, this.state.residueMap);
+        extractResidues(ctx, loci);
+        if (!validate(ctx)) return;
 
-        if (pdbId.size > 1) {
-            alert('Motifs can only be extracted from a single model!');
-            return;
-        }
-        if (sg.size > 1) {
-            alert('Motifs can only appear in a single space-group!');
-            return;
-        }
-        if (hkl.size > 1) {
-            alert('All motif residues must have matching hkl operators!');
-            return;
-        }
-        if (ncs.size > 1) {
-            alert('All motif residues must have matching NCS operators!');
-            return;
-        }
-        if (residueIds.length > MAX_MOTIF_SIZE) {
-            alert(`Maximum motif size is ${MAX_MOTIF_SIZE} residues!`);
-            return;
-        }
-        if (residueIds.filter(v => v.label_seq_id === 0).length > 0) {
-            alert('Selections may only contain polymeric entities!');
-            return;
-        }
-        // warn if >15 A
-        const a = Vec3();
-        const b = Vec3();
-        // this is not efficient but is good enough for up to 10 residues
-        for (let i = 0, il = coordinates.length; i < il; i++) {
-            Vec3.set(a, coordinates[i].coords[0], coordinates[i].coords[1], coordinates[i].coords[2]);
-            let contact = false;
-            for (let j = 0, jl = coordinates.length; j < jl; j++) {
-                if (i === j) continue;
-                Vec3.set(b, coordinates[j].coords[0], coordinates[j].coords[1], coordinates[j].coords[2]);
-                const d = Vec3.squaredDistance(a, b);
-                if (d < MAX_MOTIF_EXTENT_SQUARED) {
-                    contact = true;
-                }
-            }
-
-            if (!contact) {
-                const { residueId } = coordinates[i];
-                alert(`Residue ${residueId.label_seq_id} | ${residueId.label_asym_id} | ${residueId.struct_oper_id} needs to be less than ${MAX_MOTIF_EXTENT} \u212B from another residue - Consider adding more residues to connect far-apart residues.`);
-                return;
-            }
-        }
-
-        const entry_id = pdbId.values().next().value as string;
         const query = {
             type: 'terminal',
             service: 'strucmotif',
             parameters: {
                 value: {
-                    entry_id,
-                    residue_ids: residueIds.sort((a, b) => this.sortResidueIds(a, b))
+                    residue_ids: ctx.residueIds.sort((a, b) => this.sortResidueIds(a, b))
                 },
                 rmsd_cutoff: 2,
                 atom_pairing_scheme: 'ALL'
             }
         };
-        if (exchanges.length) Object.assign(query.parameters, { exchanges });
+
+        detectDataSource(ctx);
+        const { dataSource, entryId, format, url } = ctx;
+        if (!dataSource || !format) return;
+        switch (dataSource) {
+            case 'identifier':
+                Object.assign(query.parameters.value, { entry_id: entryId });
+                break;
+            case 'url':
+                if (format === 'pdb') {
+                    const uploadUrl = await uploadStructure(ctx);
+                    Object.assign(query.parameters.value, { url: uploadUrl, format: 'bcif' });
+                } else {
+                    Object.assign(query.parameters.value, { url, format });
+                }
+                break;
+            case 'file':
+                const uploadUrl = await uploadStructure(ctx);
+                Object.assign(query.parameters.value, { url: uploadUrl, format: 'bcif' });
+                break;
+        }
+
+        if (ctx.exchanges.length) Object.assign(query.parameters, { exchanges: ctx.exchanges });
         // console.log(query);
         const sierraUrl = (this.plugin.customState as ViewerState).detachedFromSierra ? ABSOLUTE_ADVANCED_SEARCH_URL : RELATIVE_ADVANCED_SEARCH_URL;
-        const csmTag = CSM_REGEX.test(entry_id) ? CSM_TAG : '';
-        const url = sierraUrl + encodeURIComponent(JSON.stringify(query)) + RETURN_TYPE + csmTag;
-        // console.log(url);
-        window.open(url, '_blank');
+        const queryUrl = sierraUrl + encodeURIComponent(JSON.stringify(query)) + RETURN_TYPE;
+        // console.log(queryUrl);
+
+        window.open(queryUrl, '_blank');
     };
 
     sortResidueIds(a: ResidueSelection, b: ResidueSelection): number {
@@ -353,6 +246,7 @@ class SubmitControls extends PurePluginUIComponent<{}, { isBusy: boolean, residu
     }
 }
 
+const location = StructureElement.Location.create(void 0);
 export class Residue {
     readonly exchanges: Set<string>;
 
@@ -362,6 +256,7 @@ export class Residue {
         const structure = entry.loci.structure;
         const e = entry.loci.elements[0];
         StructureElement.Location.set(location, structure, e.unit, e.unit.elements[OrderedSet.getAt(e.indices, 0)]);
+        if (!Unit.isAtomic(location.unit)) return;
 
         const comp = StructureProperties.atom.label_comp_id(location);
         if (DefaultExchanges.has(comp)) {

+ 2 - 1
src/viewer/ui/exchanges.tsx → src/viewer/ui/strucmotif/exchanges.tsx

@@ -5,7 +5,8 @@
  */
 import * as React from 'react';
 import { Button } from 'molstar/lib/mol-plugin-ui/controls/common';
-import { MAX_EXCHANGES, Residue } from './strucmotif';
+import { Residue } from '../strucmotif';
+import { MAX_EXCHANGES } from './validation';
 
 export const DefaultExchanges: Map<string, string> = new Map([
     ['ALA', 'Alanine'],

+ 144 - 0
src/viewer/ui/strucmotif/helpers.ts

@@ -0,0 +1,144 @@
+/**
+ * Copyright (c) 2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Vec3 } from 'molstar/lib/mol-math/linear-algebra/3d/vec3';
+import { Structure } from 'molstar/lib/mol-model/structure/structure/structure';
+import { PluginStateObject } from 'molstar/lib/mol-plugin-state/objects';
+import { determineBackboneAtom, MAX_EXCHANGES, MAX_MOTIF_SIZE } from './validation';
+import { StructureElement, StructureProperties, to_mmCIF } from 'molstar/lib/mol-model/structure/structure';
+import { StructureSelectionHistoryEntry } from 'molstar/lib/mol-plugin-state/manager/structure/selection';
+import { Residue } from '../strucmotif';
+import { PluginContext } from 'molstar/lib/mol-plugin/context';
+
+export type ExchangeState = number;
+export type ResidueSelection = { label_asym_id: string, struct_oper_id: string, label_seq_id: number }
+export type Exchange = { residue_id: ResidueSelection, allowed: string[] }
+const STATIC_URL_REGEX = /^https?:\/\/(models|files).rcsb.org\//;
+const FILE_STORAGE_URL = 'https://user-upload.rcsb.org/v1/';
+const FILE_STORAGE_PUT_URL = FILE_STORAGE_URL + 'putMultipart';
+const FILE_STORAGE_GET_URL = FILE_STORAGE_URL + 'download/';
+const location = StructureElement.Location.create(void 0);
+
+export function createCtx(plugin: PluginContext, structure: Structure, residueMap: Map<StructureSelectionHistoryEntry, Residue>) {
+    return {
+        plugin,
+        structure,
+        entryId: structure.model.entryId,
+
+        pdbId: new Set<string>(),
+        sg: new Set<number>(),
+        hkl: new Set<string>(),
+        ncs: new Set<number>(),
+
+        residueIds: new Array<ResidueSelection>(),
+        residueMap,
+        exchanges: new Array<Exchange>(),
+        coordinates: new Array<{ coords: Vec3, residueId: ResidueSelection }>(),
+
+        dataSource: void 0,
+        format: void 0,
+        url: void 0
+    };
+}
+export type StrucmotifCtx = ReturnType<typeof createCtx>;
+
+export function detectDataSource(ctx: StrucmotifCtx) {
+    const { plugin, structure } = ctx;
+    const parent = plugin.helpers.substructureParent.get(structure)!;
+    const dataCell = plugin.state.data.selectQ(q => q.byValue(parent).rootOfType([PluginStateObject.Data.Binary, PluginStateObject.Data.Blob, PluginStateObject.Data.String]))[0];
+    const url = dataCell.params?.values.url?.url || dataCell.params?.values.url; // nested is the Import UI component, flat is via method call
+    const format = PluginStateObject.Data.Binary.is(dataCell.obj) ? 'bcif' :
+        !!plugin.state.data.selectQ(q => q.byValue(parent).rootOfType(PluginStateObject.Format.Cif))[0] ? 'cif' : 'pdb';
+
+    if (!url) {
+        Object.assign(ctx, { dataSource: 'file', url, format });
+    } else {
+        Object.assign(ctx, { dataSource: STATIC_URL_REGEX.test(url) ? 'identifier' : 'url', url, format });
+    }
+}
+
+export function extractResidues(ctx: StrucmotifCtx, loci: StructureSelectionHistoryEntry[]) {
+    const { x, y, z } = StructureProperties.atom;
+    for (let i = 0; i < Math.min(MAX_MOTIF_SIZE, loci.length); i++) {
+        const l = loci[i];
+        const { structure, elements } = l.loci;
+
+        // only first element and only first index will be considered (ignoring multiple residues)
+        if (!determineBackboneAtom(structure, location, elements[0])) {
+            alert(`No CA or C4' atom for selected residue`);
+            return;
+        }
+
+        ctx.pdbId.add(structure.model.entryId);
+        ctx.sg.add(StructureProperties.unit.spgrOp(location));
+        ctx.hkl.add(StructureProperties.unit.hkl(location).join('-'));
+        ctx.ncs.add(StructureProperties.unit.struct_ncs_oper_id(location));
+
+        const struct_oper_list_ids = StructureProperties.unit.pdbx_struct_oper_list_ids(location);
+        const struct_oper_id = join(struct_oper_list_ids);
+
+        // handle pure residue-info
+        const residueId = {
+            label_asym_id: StructureProperties.chain.label_asym_id(location),
+            // can be empty array if model is selected
+            struct_oper_id,
+            label_seq_id: StructureProperties.residue.label_seq_id(location)
+        };
+        ctx.residueIds.push(residueId);
+
+        // retrieve CA/C4', used to compute residue distance
+        const coords = [x(location), y(location), z(location)] as Vec3;
+        ctx.coordinates.push({ coords, residueId });
+
+        // handle potential exchanges - can be empty if deselected by users
+        const residueMapEntry = ctx.residueMap.get(l)!;
+        if (residueMapEntry.exchanges?.size > 0) {
+            if (residueMapEntry.exchanges.size > MAX_EXCHANGES) {
+                alert(`Maximum number of exchanges per position is ${MAX_EXCHANGES} - Please remove some exchanges from residue ${residueId.label_seq_id} | ${residueId.label_asym_id} | ${residueId.struct_oper_id}.`);
+                return;
+            }
+            ctx.exchanges.push({ residue_id: residueId, allowed: Array.from(residueMapEntry.exchanges.values()) });
+        }
+    }
+}
+
+function join(opers: any[]) {
+    // this makes the assumptions that '1' is the identity operator
+    if (!opers || !opers.length) return '1';
+    if (opers.length > 1) {
+        // Mol* operators are right-to-left
+        return opers[1] + 'x' + opers[0];
+    }
+    return opers[0];
+}
+
+export async function uploadStructure(ctx: StrucmotifCtx) {
+    const { entryId, plugin, structure } = ctx;
+    const name = entryId.replace(/\W/g, '') || 'unknown';
+    plugin.log.info(`Uploading BinaryCIF Representation of ${name} to RCSB Cloud`);
+
+    const formData = new FormData();
+    formData.append('format', 'bcif');
+    formData.append('name', name);
+    const file = new File([to_mmCIF(name, structure, true, { copyAllCategories: true })], name + '.bcif');
+    formData.append('file', file);
+
+    try {
+        const res = await fetch(FILE_STORAGE_PUT_URL, { method: 'POST', body: formData });
+        if (!res.ok || res.status !== 200) {
+            plugin.log.warn('File Upload Failed!');
+            return void 0;
+        }
+
+        const { key } = await res.json();
+        const url = FILE_STORAGE_GET_URL + key;
+        plugin.log.info(`Uploaded File is at: ${url}`);
+        return url;
+    } catch (e) {
+        plugin.log.warn('File Upload Failed!');
+        return void 0;
+    }
+}

+ 92 - 0
src/viewer/ui/strucmotif/validation.ts

@@ -0,0 +1,92 @@
+/**
+ * Copyright (c) 2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Structure } from 'molstar/lib/mol-model/structure/structure/structure';
+import { Unit } from 'molstar/lib/mol-model/structure/structure/unit';
+import { OrderedSet } from 'molstar/lib/mol-data/int';
+import { UnitIndex } from 'molstar/lib/mol-model/structure/structure/element/element';
+import { StructureElement, StructureProperties } from 'molstar/lib/mol-model/structure/structure';
+import { Vec3 } from 'molstar/lib/mol-math/linear-algebra/3d/vec3';
+import { StrucmotifCtx } from './helpers';
+
+export const MIN_MOTIF_SIZE = 2;
+export const MAX_MOTIF_SIZE = 10;
+export const MAX_EXCHANGES = 4;
+const MAX_MOTIF_EXTENT = 15;
+const MAX_MOTIF_EXTENT_SQUARED = MAX_MOTIF_EXTENT * MAX_MOTIF_EXTENT;
+
+export function determineBackboneAtom(structure: Structure, location: StructureElement.Location, element: { unit: Unit; indices: OrderedSet<UnitIndex> }) {
+    const { label_atom_id } = StructureProperties.atom;
+    const { indices } = element;
+    for (let i = 0, il = OrderedSet.size(indices); i < il; i++) {
+        StructureElement.Location.set(location, structure, element.unit, element.unit.elements[OrderedSet.getAt(indices, i)]);
+        if (!Unit.isAtomic(location.unit)) return false;
+
+        const atomLabelId = label_atom_id(location);
+        if ('CA' === atomLabelId || `C4'` === atomLabelId) {
+            return true;
+        }
+    }
+    return false;
+}
+
+export function validate(ctx: StrucmotifCtx) {
+    if (ctx.residueIds.length < MIN_MOTIF_SIZE) return false;
+
+    if (ctx.pdbId.size > 1) {
+        alert('Motifs can only be extracted from a single model!');
+        return false;
+    }
+    if (ctx.sg.size > 1) {
+        alert('Motifs can only appear in a single space-group!');
+        return false;
+    }
+    if (ctx.hkl.size > 1) {
+        alert('All motif residues must have matching hkl operators!');
+        return false;
+    }
+    if (ctx.ncs.size > 1) {
+        alert('All motif residues must have matching NCS operators!');
+        return false;
+    }
+    if (ctx.residueIds.length > MAX_MOTIF_SIZE) {
+        alert(`Maximum motif size is ${MAX_MOTIF_SIZE} residues!`);
+        return false;
+    }
+    if (ctx.residueIds.filter(v => v.label_seq_id === 0).length > 0) {
+        alert('Selections may only contain polymeric entities!');
+        return false;
+    }
+    return validateAtomDistances(ctx);
+}
+
+function validateAtomDistances(ctx: StrucmotifCtx) {
+    const { coordinates } = ctx;
+    // warn if >15 A
+    const a = Vec3();
+    const b = Vec3();
+
+    // this is not efficient but is good enough for up to 10 residues
+    for (let i = 0, il = coordinates.length; i < il; i++) {
+        Vec3.set(a, coordinates[i].coords[0], coordinates[i].coords[1], coordinates[i].coords[2]);
+        let contact = false;
+        for (let j = 0, jl = coordinates.length; j < jl; j++) {
+            if (i === j) continue;
+            Vec3.set(b, coordinates[j].coords[0], coordinates[j].coords[1], coordinates[j].coords[2]);
+            const d = Vec3.squaredDistance(a, b);
+            if (d < MAX_MOTIF_EXTENT_SQUARED) {
+                contact = true;
+            }
+        }
+
+        if (!contact) {
+            const { residueId } = coordinates[i];
+            alert(`Residue ${residueId.label_seq_id} | ${residueId.label_asym_id} | ${residueId.struct_oper_id} needs to be less than ${MAX_MOTIF_EXTENT} \u212B from another residue - Consider adding more residues to connect far-apart residues.`);
+            return false;
+        }
+    }
+    return true;
+}