Browse Source

Structure quality report in ModelServer

David Sehnal 6 years ago
parent
commit
aaefddc135

+ 1 - 0
src/mol-model/structure/model.ts

@@ -10,5 +10,6 @@ import Format from './model/format'
 import { ModelSymmetry } from './model/properties/symmetry'
 import StructureSequence from './model/properties/sequence'
 
+export * from './model/properties/custom'
 export * from './model/indexing'
 export { Model, Types, Format, ModelSymmetry, StructureSequence }

+ 3 - 1
src/mol-model/structure/model/properties/utils/atomic-keys.ts

@@ -10,7 +10,9 @@ import { Entities } from '../common'
 import { ChainIndex, ResidueIndex, EntityIndex } from '../../indexing';
 
 function getResidueId(comp_id: string, seq_id: number, ins_code: string) {
-    return `${comp_id} ${seq_id} ${ins_code}`;
+    // TODO: add new index that support comp_id again?
+    return `${seq_id} ${ins_code}`;
+    //return `${comp_id} ${seq_id} ${ins_code}`;
 }
 
 function getElementKey(map: Map<string, number>, key: string, counter: { index: number }) {

+ 1 - 1
src/mol-model/structure/model/properties/utils/coarse-ranges.ts

@@ -13,7 +13,7 @@ import { ElementIndex } from '../../indexing';
 // TODO assumes all coarse elements are part of a polymer
 // TODO add gaps at the ends of the chains by comparing to the polymer sequence data
 
-export function getCoarseRanges(data: CoarseElementData, chemicalComponentMap: Map<string, ChemicalComponent>): CoarseRanges {
+export function getCoarseRanges(data: CoarseElementData, chemicalComponentMap: ReadonlyMap<string, ChemicalComponent>): CoarseRanges {
     const polymerRanges: number[] = []
     const gapRanges: number[] = []
     const chainIt = Segmentation.transientSegments(data.chainElementSegments, Interval.ofBounds(0, data.count))

+ 20 - 0
src/mol-model/structure/structure/structure.ts

@@ -18,6 +18,7 @@ import { InterUnitBonds, computeInterUnitBonds } from './unit/links';
 import { CrossLinkRestraints, extractCrossLinkRestraints } from './unit/pair-restraints';
 import StructureSymmetry from './symmetry';
 import StructureProperties from './properties';
+import { ResidueIndex } from '../model/indexing';
 
 class Structure {
     readonly unitMap: IntMap<Unit>;
@@ -278,6 +279,25 @@ namespace Structure {
         sortArray(keys.array);
         return keys.array;
     }
+
+    export function getUniqueAtomicResidueIndices(structure: Structure, model: Model): ReadonlyArray<ResidueIndex> {
+        const uniqueResidues = UniqueArray.create<ResidueIndex, ResidueIndex>();
+        const unitGroups = structure.unitSymmetryGroups;
+        for (const unitGroup of unitGroups) {
+            const unit = unitGroup.units[0];
+            if (unit.model !== model || !Unit.isAtomic(unit)) {
+                continue;
+            }
+
+            const residues = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, unit.elements);
+            while (residues.hasNext) {
+                const seg = residues.move();
+                UniqueArray.add(uniqueResidues, seg.index, seg.index);
+            }
+        }
+        sortArray(uniqueResidues.array);
+        return uniqueResidues.array;
+    }
 }
 
 export default Structure

+ 12 - 0
src/servers/model/properties.ts

@@ -0,0 +1,12 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Model } from 'mol-model/structure';
+import { StructureQualityReport } from './properties/structure-quality-report';
+
+export async function attachModelProperties(model: Model) {
+    await StructureQualityReport.attachFromPDBeApi(model);
+}

+ 124 - 0
src/servers/model/properties/structure-quality-report.ts

@@ -0,0 +1,124 @@
+/**
+ * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { ResidueIndex, ModelPropertyDescriptor, Model, Structure, Unit, StructureElement, StructureProperties as P  } from 'mol-model/structure';
+import fetch from 'node-fetch';
+import { CifWriter } from 'mol-io/writer/cif';
+import CifField = CifWriter.Field;
+import { Segmentation } from 'mol-data/int';
+
+type IssueMap = Map<ResidueIndex, string[]>
+
+const _Descriptor: ModelPropertyDescriptor = {
+    isStatic: true,
+    name: 'structure_quality_report',
+    cifExport: {
+        categories: [{
+            name: 'structure_quality_report',
+            instance(ctx) {
+                const issues = StructureQualityReport.get(ctx.model);
+                if (!issues) return CifWriter.Category.Empty;
+
+                const residues = getResidueLoci(ctx.structure, issues);
+                return {
+                    fields: _structure_quality_report_fields,
+                    data: <ExportCtx>{ model: ctx.model, residues, residueIndex: ctx.model.atomicHierarchy.residueAtomSegments.index, issues },
+                    rowCount: residues.length
+                };
+            }
+        }]
+    }
+}
+
+type ExportCtx = { model: Model, residueIndex: ArrayLike<ResidueIndex>, residues: StructureElement[], issues: IssueMap };
+
+const _structure_quality_report_fields: CifField<ResidueIndex, ExportCtx>[] = [
+    CifField.str<ResidueIndex, ExportCtx>('label_comp_id', (i, d) => P.residue.label_comp_id(d.residues[i])),
+    CifField.int<ResidueIndex, ExportCtx>('label_seq_id', (i, d) => P.residue.label_seq_id(d.residues[i])),
+    CifField.str<ResidueIndex, ExportCtx>('pdbx_PDB_ins_code', (i, d) => P.residue.pdbx_PDB_ins_code(d.residues[i])),
+    CifField.str<ResidueIndex, ExportCtx>('label_asym_id', (i, d) => P.chain.label_asym_id(d.residues[i])),
+    CifField.str<ResidueIndex, ExportCtx>('label_entity_id', (i, d) => P.entity.id(d.residues[i])),
+
+    CifField.str<ResidueIndex, ExportCtx>('auth_comp_id', (i, d) => P.residue.auth_comp_id(d.residues[i])),
+    CifField.int<ResidueIndex, ExportCtx>('auth_seq_id', (i, d) => P.residue.auth_seq_id(d.residues[i])),
+    CifField.str<ResidueIndex, ExportCtx>('auth_asym_id', (i, d) => P.chain.auth_asym_id(d.residues[i])),
+
+
+    CifField.str<ResidueIndex, ExportCtx>('issues', (i, d) => d.issues.get(d.residueIndex[d.residues[i].element])!.join(','))
+];
+
+function getResidueLoci(structure: Structure, issues: IssueMap) {
+    const seenResidues = new Set<ResidueIndex>();
+    const unitGroups = structure.unitSymmetryGroups;
+    const loci: StructureElement[] = [];
+
+    for (const unitGroup of unitGroups) {
+        const unit = unitGroup.units[0];
+        if (!Unit.isAtomic(unit)) {
+            continue;
+        }
+
+        const residues = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, unit.elements);
+        while (residues.hasNext) {
+            const seg = residues.move();
+            if (!issues.has(seg.index) || seenResidues.has(seg.index)) continue;
+
+            seenResidues.add(seg.index);
+            loci[loci.length] = StructureElement.create(unit, unit.elements[seg.start]);
+        }
+    }
+
+    loci.sort((x, y) => x.element - y.element);
+    return loci;
+}
+
+function createIssueMap(modelData: Model, data: any): IssueMap | undefined {
+    const ret = new Map<ResidueIndex, string[]>();
+    if (!data.molecules) return;
+
+    for (const entity of data.molecules) {
+        const entity_id = entity.entity_id.toString();
+        for (const chain of entity.chains) {
+            const asym_id = chain.struct_asym_id.toString();
+            for (const model of chain.models) {
+                const model_id = model.model_id.toString();
+                if (+model_id !== modelData.modelNum) continue;
+
+                for (const residue of model.residues) {
+                    const auth_seq_id = residue.author_residue_number, ins_code = residue.author_insertion_code || '';
+                    const idx = modelData.atomicHierarchy.findResidueKey(entity_id, asym_id, '', auth_seq_id, ins_code);
+                    ret.set(idx, residue.outlier_types);
+                }
+            }
+        }
+    }
+
+    return ret;
+}
+
+export namespace StructureQualityReport {
+    export const Descriptor = _Descriptor;
+
+    export async function attachFromPDBeApi(model: Model) {
+        if (model.customProperties.has(Descriptor)) return true;
+
+        const id = model.label.toLowerCase();
+        const rawData = await fetch(`https://www.ebi.ac.uk/pdbe/api/validation/residuewise_outlier_summary/entry/${model.label.toLowerCase()}`);
+        const json = await rawData.json();
+        const data = json[id];
+        if (!data) return false;
+        const issueMap = createIssueMap(model, data);
+        if (!issueMap || issueMap.size === 0) return false;
+
+        model.customProperties.add(Descriptor);
+        model._staticPropertyData.__StructureQualityReport__ = issueMap;
+        return true;
+    }
+
+    export function get(model: Model): IssueMap | undefined {
+        return model._staticPropertyData.__StructureQualityReport__;
+    }
+}

+ 1 - 0
src/servers/model/server/query.ts

@@ -116,6 +116,7 @@ const _model_server_error_fields: CifField<number, string>[] = [
 const _model_server_stats_fields: CifField<number, Stats>[] = [
     int32<Stats>('io_time_ms', ctx => ctx.structure.info.readTime | 0),
     int32<Stats>('parse_time_ms', ctx => ctx.structure.info.parseTime | 0),
+    int32<Stats>('attach_props_time_ms', ctx => ctx.structure.info.attachPropsTime | 0),
     int32<Stats>('create_model_time_ms', ctx => ctx.structure.info.createModelTime | 0),
     int32<Stats>('query_time_ms', ctx => ctx.queryTimeMs | 0),
     int32<Stats>('encode_time_ms', ctx => ctx.encodeTimeMs | 0)

+ 7 - 0
src/servers/model/server/structure-wrapper.ts

@@ -14,6 +14,7 @@ import * as fs from 'fs'
 import * as zlib from 'zlib'
 import { Job } from './jobs';
 import { ConsoleLogger } from 'mol-util/console-logger';
+import { attachModelProperties } from '../properties';
 
 require('util.promisify').shim();
 
@@ -27,6 +28,7 @@ export interface StructureInfo {
     readTime: number;
     parseTime: number;
     createModelTime: number;
+    attachPropsTime: number;
 
     sourceId: string,
     entryId: string
@@ -104,6 +106,10 @@ async function readStructure(key: string, sourceId: string, entryId: string) {
     const models = await Model.create(Format.mmCIF(frame)).run();
     perf.end('createModel');
 
+    perf.start('attachProps');
+    await attachModelProperties(models[0]);
+    perf.end('attachProps');
+
     const structure = Structure.ofModel(models[0]);
 
     const ret: StructureWrapper = {
@@ -112,6 +118,7 @@ async function readStructure(key: string, sourceId: string, entryId: string) {
             readTime: perf.time('read'),
             parseTime: perf.time('parse'),
             createModelTime: perf.time('createModel'),
+            attachPropsTime: perf.time('attachProps'),
             sourceId,
             entryId
         },