structure-quality-report.ts 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import { Column, Table } from 'mol-data/db';
  7. import { toTable } from 'mol-io/reader/cif/schema';
  8. import { mmCIF_residueId_schema } from 'mol-io/reader/cif/schema/mmcif-extras';
  9. import { CifWriter } from 'mol-io/writer/cif';
  10. import { Model, ModelPropertyDescriptor, ResidueIndex, Unit, IndexedCustomProperty } from 'mol-model/structure';
  11. import { residueIdFields } from 'mol-model/structure/export/categories/atom_site';
  12. import { StructureElement, CifExportContext } from 'mol-model/structure/structure';
  13. import { CustomPropSymbol } from 'mol-script/language/symbol';
  14. import Type from 'mol-script/language/type';
  15. import { QuerySymbolRuntime } from 'mol-script/runtime/query/compiler';
  16. import { PropertyWrapper } from '../common/wrapper';
  17. export namespace StructureQualityReport {
  18. export type IssueMap = IndexedCustomProperty.Residue<string[]>
  19. export type Property = PropertyWrapper<IssueMap | undefined>
  20. export function get(model: Model): Property | undefined {
  21. // must be defined before the descriptor so it's not undefined.
  22. return model._dynamicPropertyData.__StructureQualityReport__;
  23. }
  24. export const Schema = {
  25. pdbe_structure_quality_report: {
  26. updated_datetime_utc: Column.Schema.str
  27. },
  28. pdbe_structure_quality_report_issues: {
  29. id: Column.Schema.int,
  30. ...mmCIF_residueId_schema,
  31. pdbx_PDB_model_num: Column.Schema.int,
  32. issue_type_group_id: Column.Schema.int
  33. },
  34. pdbe_structure_quality_report_issue_types: {
  35. group_id: Column.Schema.int,
  36. issue_type: Column.Schema.str
  37. }
  38. };
  39. export type Schema = typeof Schema
  40. export const Descriptor = ModelPropertyDescriptor({
  41. isStatic: false,
  42. name: 'structure_quality_report',
  43. cifExport: {
  44. prefix: 'pdbe',
  45. context(ctx) {
  46. return createExportContext(ctx);
  47. },
  48. categories: [
  49. PropertyWrapper.defaultInfoCategory<ReportExportContext>('pdbe_structure_quality_report', ctx => ctx.info),
  50. {
  51. name: 'pdbe_structure_quality_report_issues',
  52. instance(ctx: ReportExportContext) {
  53. return {
  54. fields: _structure_quality_report_issues_fields,
  55. source: ctx.models.map(data => ({ data, rowCount: data.elements.length }))
  56. }
  57. }
  58. }, {
  59. name: 'pdbe_structure_quality_report_issue_types',
  60. instance(ctx: ReportExportContext) {
  61. return CifWriter.Category.ofTable(ctx.issueTypes);
  62. }
  63. }]
  64. },
  65. symbols: {
  66. issueCount: QuerySymbolRuntime.Dynamic(CustomPropSymbol('pdbe', 'structure-quality.issue-count', Type.Num),
  67. ctx => StructureQualityReport.getIssues(ctx.element).length),
  68. // TODO: add (hasIssue :: IssueType(extends string) -> boolean) symbol
  69. }
  70. });
  71. function getCifData(model: Model) {
  72. if (model.sourceData.kind !== 'mmCIF') throw new Error('Data format must be mmCIF.');
  73. return {
  74. residues: toTable(Schema.pdbe_structure_quality_report_issues, model.sourceData.frame.categories.pdbe_structure_quality_report_issues),
  75. groups: toTable(Schema.pdbe_structure_quality_report_issue_types, model.sourceData.frame.categories.pdbe_structure_quality_report_issue_types),
  76. }
  77. }
  78. export async function attachFromCifOrApi(model: Model, params: {
  79. // optional JSON source
  80. PDBe_apiSourceJson?: (model: Model) => Promise<any>
  81. }) {
  82. if (get(model)) return true;
  83. let issueMap: IssueMap | undefined;
  84. let info = PropertyWrapper.tryGetInfoFromCif('pdbe_structure_quality_report', model);
  85. if (info) {
  86. const data = getCifData(model);
  87. issueMap = createIssueMapFromCif(model, data.residues, data.groups);
  88. } else if (params.PDBe_apiSourceJson) {
  89. const data = await params.PDBe_apiSourceJson(model);
  90. if (!data) return false;
  91. info = PropertyWrapper.createInfo();
  92. issueMap = createIssueMapFromJson(model, data);
  93. } else {
  94. return false;
  95. }
  96. model.customProperties.add(Descriptor);
  97. set(model, { info, data: issueMap });
  98. return true;
  99. }
  100. function set(model: Model, prop: Property) {
  101. (model._dynamicPropertyData.__StructureQualityReport__ as Property) = prop;
  102. }
  103. export function getIssueMap(model: Model): IssueMap | undefined {
  104. const prop = get(model);
  105. return prop && prop.data;
  106. }
  107. const _emptyArray: string[] = [];
  108. export function getIssues(e: StructureElement) {
  109. if (!Unit.isAtomic(e.unit)) return _emptyArray;
  110. const prop = StructureQualityReport.get(e.unit.model);
  111. if (!prop || !prop.data) return _emptyArray;
  112. const rI = e.unit.residueIndex[e.element];
  113. return prop.data.has(rI) ? prop.data.get(rI)! : _emptyArray;
  114. }
  115. }
  116. const _structure_quality_report_issues_fields = CifWriter.fields<number, ReportExportContext['models'][0]>()
  117. .index('id')
  118. .many(residueIdFields((i, d) => d.elements[i], { includeModelNum: true }))
  119. .int('issue_type_group_id', (i, d) => d.groupId[i])
  120. .getFields();
  121. interface ReportExportContext {
  122. models: {
  123. elements: StructureElement[],
  124. groupId: number[]
  125. }[],
  126. info: PropertyWrapper.Info,
  127. issueTypes: Table<StructureQualityReport.Schema['pdbe_structure_quality_report_issue_types']>,
  128. }
  129. function createExportContext(ctx: CifExportContext): ReportExportContext {
  130. const groupMap = new Map<string, number>();
  131. const models: ReportExportContext['models'] = [];
  132. const group_id: number[] = [], issue_type: string[] = [];
  133. let info: PropertyWrapper.Info = PropertyWrapper.createInfo();
  134. for (const s of ctx.structures) {
  135. const prop = StructureQualityReport.get(s.model);
  136. if (prop) info = prop.info;
  137. if (!prop || !prop.data) continue;
  138. const { elements, property } = prop.data.getElements(s);
  139. if (elements.length === 0) continue;
  140. const elementGroupId: number[] = [];
  141. for (let i = 0; i < elements.length; i++) {
  142. const issues = property(i);
  143. const key = issues.join(',');
  144. if (!groupMap.has(key)) {
  145. const idx = groupMap.size + 1;
  146. groupMap.set(key, idx);
  147. for (const issue of issues) {
  148. group_id.push(idx);
  149. issue_type.push(issue);
  150. }
  151. }
  152. elementGroupId[i] = groupMap.get(key)!;
  153. }
  154. models.push({ elements, groupId: elementGroupId });
  155. }
  156. return {
  157. info,
  158. models,
  159. issueTypes: Table.ofArrays(StructureQualityReport.Schema.pdbe_structure_quality_report_issue_types, { group_id, issue_type })
  160. }
  161. }
  162. function createIssueMapFromJson(modelData: Model, data: any): StructureQualityReport.IssueMap | undefined {
  163. const ret = new Map<ResidueIndex, string[]>();
  164. if (!data.molecules) return;
  165. for (const entity of data.molecules) {
  166. const entity_id = entity.entity_id.toString();
  167. for (const chain of entity.chains) {
  168. const asym_id = chain.struct_asym_id.toString();
  169. for (const model of chain.models) {
  170. const model_id = model.model_id.toString();
  171. if (+model_id !== modelData.modelNum) continue;
  172. for (const residue of model.residues) {
  173. const auth_seq_id = residue.author_residue_number, ins_code = residue.author_insertion_code || '';
  174. const idx = modelData.atomicHierarchy.index.findResidue(entity_id, asym_id, auth_seq_id, ins_code);
  175. ret.set(idx, residue.outlier_types);
  176. }
  177. }
  178. }
  179. }
  180. return IndexedCustomProperty.fromResidueMap(ret);
  181. }
  182. function createIssueMapFromCif(modelData: Model,
  183. residueData: Table<typeof StructureQualityReport.Schema.pdbe_structure_quality_report_issues>,
  184. groupData: Table<typeof StructureQualityReport.Schema.pdbe_structure_quality_report_issue_types>): StructureQualityReport.IssueMap | undefined {
  185. const ret = new Map<ResidueIndex, string[]>();
  186. const { label_entity_id, label_asym_id, auth_seq_id, pdbx_PDB_ins_code, issue_type_group_id, pdbx_PDB_model_num, _rowCount } = residueData;
  187. const groups = parseIssueTypes(groupData);
  188. for (let i = 0; i < _rowCount; i++) {
  189. if (pdbx_PDB_model_num.value(i) !== modelData.modelNum) continue;
  190. const idx = modelData.atomicHierarchy.index.findResidue(label_entity_id.value(i), label_asym_id.value(i), auth_seq_id.value(i), pdbx_PDB_ins_code.value(i));
  191. ret.set(idx, groups.get(issue_type_group_id.value(i))!);
  192. }
  193. return IndexedCustomProperty.fromResidueMap(ret);
  194. }
  195. function parseIssueTypes(groupData: Table<typeof StructureQualityReport.Schema.pdbe_structure_quality_report_issue_types>): Map<number, string[]> {
  196. const ret = new Map<number, string[]>();
  197. const { group_id, issue_type } = groupData;
  198. for (let i = 0; i < groupData._rowCount; i++) {
  199. let group: string[];
  200. const id = group_id.value(i);
  201. if (ret.has(id)) group = ret.get(id)!;
  202. else {
  203. group = [];
  204. ret.set(id, group);
  205. }
  206. group.push(issue_type.value(i));
  207. }
  208. return ret;
  209. }