mmcif.ts 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import { Column, Table } from 'mol-data/db';
  7. import { Interval, Segmentation } from 'mol-data/int';
  8. import { Spacegroup, SpacegroupCell } from 'mol-math/geometry';
  9. import { Vec3 } from 'mol-math/linear-algebra';
  10. import UUID from 'mol-util/uuid';
  11. import Format from '../format';
  12. import Model from '../model';
  13. import { AtomicConformation, AtomicData, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from '../properties/atomic';
  14. import { Entities } from '../properties/common';
  15. import { ModelSymmetry } from '../properties/symmetry';
  16. import { getAtomicKeys } from '../properties/utils/atomic-keys';
  17. import { ElementSymbol } from '../types';
  18. import { createAssemblies } from './mmcif/assembly';
  19. import { getIHMCoarse } from './mmcif/ihm';
  20. import { getSequence } from './mmcif/sequence';
  21. import mmCIF_Format = Format.mmCIF
  22. import { Task } from 'mol-task';
  23. import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
  24. function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
  25. const num = data.atom_site.pdbx_PDB_model_num;
  26. const atomCount = num.rowCount;
  27. if (!num.isDefined) return Interval.ofBounds(startIndex, atomCount);
  28. let endIndex = startIndex + 1;
  29. while (endIndex < atomCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
  30. return Interval.ofBounds(startIndex, endIndex);
  31. }
  32. function findHierarchyOffsets({ data }: mmCIF_Format, bounds: Interval) {
  33. if (Interval.size(bounds) === 0) return { residues: [], chains: [] };
  34. const start = Interval.start(bounds), end = Interval.end(bounds);
  35. const residues = [start], chains = [start];
  36. const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = data.atom_site;
  37. for (let i = start + 1; i < end; i++) {
  38. const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !label_asym_id.areValuesEqual(i - 1, i);
  39. const newResidue = newChain
  40. || !label_seq_id.areValuesEqual(i - 1, i)
  41. || !auth_seq_id.areValuesEqual(i - 1, i)
  42. || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i)
  43. || !label_comp_id.areValuesEqual(i - 1, i);
  44. if (newResidue) residues[residues.length] = i;
  45. if (newChain) chains[chains.length] = i;
  46. }
  47. return { residues, chains };
  48. }
  49. function createHierarchyData({ data }: mmCIF_Format, bounds: Interval, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
  50. const { atom_site } = data;
  51. const start = Interval.start(bounds), end = Interval.end(bounds);
  52. const atoms = Table.ofColumns(AtomsSchema, {
  53. type_symbol: Column.ofArray({ array: Column.mapToArray(Column.window(atom_site.type_symbol, start, end), ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }),
  54. label_atom_id: Column.window(atom_site.label_atom_id, start, end),
  55. auth_atom_id: Column.window(atom_site.auth_atom_id, start, end),
  56. label_alt_id: Column.window(atom_site.label_alt_id, start, end),
  57. pdbx_formal_charge: Column.window(atom_site.pdbx_formal_charge, start, end)
  58. });
  59. const residues = Table.view(atom_site, ResiduesSchema, offsets.residues);
  60. // Optimize the numeric columns
  61. Table.columnToArray(residues, 'label_seq_id', Int32Array);
  62. Table.columnToArray(residues, 'auth_seq_id', Int32Array);
  63. const chains = Table.view(atom_site, ChainsSchema, offsets.chains);
  64. return { atoms, residues, chains };
  65. }
  66. function getConformation({ data }: mmCIF_Format, bounds: Interval): AtomicConformation {
  67. const start = Interval.start(bounds), end = Interval.end(bounds);
  68. const { atom_site } = data;
  69. return {
  70. id: UUID.create(),
  71. atomId: Column.window(atom_site.id, start, end),
  72. occupancy: Column.window(atom_site.occupancy, start, end),
  73. B_iso_or_equiv: Column.window(atom_site.B_iso_or_equiv, start, end),
  74. x: atom_site.Cartn_x.toArray({ array: Float32Array, start, end }),
  75. y: atom_site.Cartn_y.toArray({ array: Float32Array, start, end }),
  76. z: atom_site.Cartn_z.toArray({ array: Float32Array, start, end }),
  77. }
  78. }
  79. function getSymmetry(format: mmCIF_Format): ModelSymmetry {
  80. const assemblies = createAssemblies(format);
  81. const spacegroup = getSpacegroup(format);
  82. const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
  83. return { assemblies, spacegroup, isNonStandardCrytalFrame };
  84. }
  85. function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
  86. const { atom_sites } = format.data;
  87. if (atom_sites._rowCount === 0) return false;
  88. // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
  89. return false;
  90. }
  91. function getSpacegroup(format: mmCIF_Format): Spacegroup {
  92. const { symmetry, cell } = format.data;
  93. if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
  94. const groupName = symmetry['space_group_name_H-M'].value(0);
  95. const spaceCell = SpacegroupCell.create(groupName,
  96. Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
  97. Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
  98. return Spacegroup.create(spaceCell);
  99. }
  100. function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
  101. // need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300
  102. return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>)
  103. && Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>)
  104. && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
  105. }
  106. function modResMap(format: mmCIF_Format) {
  107. const data = format.data.pdbx_struct_mod_residue;
  108. const map = new Map<string, string>();
  109. const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
  110. const parent_id = data.parent_comp_id;
  111. for (let i = 0; i < data._rowCount; i++) {
  112. map.set(comp_id.value(i), parent_id.value(i));
  113. }
  114. return map;
  115. }
  116. function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model): Model {
  117. const hierarchyOffsets = findHierarchyOffsets(format, bounds);
  118. const hierarchyData = createHierarchyData(format, bounds, hierarchyOffsets);
  119. if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) {
  120. return {
  121. ...previous,
  122. atomicConformation: getConformation(format, bounds)
  123. };
  124. }
  125. const hierarchySegments: AtomicSegments = {
  126. residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, bounds),
  127. chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds),
  128. }
  129. const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
  130. const hierarchyKeys = getAtomicKeys(hierarchyData, entities, hierarchySegments);
  131. const atomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments };
  132. const coarse = getIHMCoarse(format.data, entities);
  133. const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
  134. ? format.data.entry.id.value(0)
  135. : format.data._name;
  136. const modifiedResidueNameMap = modResMap(format);
  137. return {
  138. id: UUID.create(),
  139. label,
  140. sourceData: format,
  141. modelNum: format.data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)),
  142. entities,
  143. atomicHierarchy,
  144. sequence: getSequence(format.data, entities, atomicHierarchy, modifiedResidueNameMap),
  145. atomicConformation: getConformation(format, bounds),
  146. coarseHierarchy: coarse.hierarchy,
  147. coarseConformation: coarse.conformation,
  148. properties: {
  149. secondaryStructure: getSecondaryStructureMmCif(format.data, atomicHierarchy),
  150. modifiedResidueNameMap
  151. },
  152. symmetry: getSymmetry(format)
  153. };
  154. }
  155. function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
  156. return Task.create('Create mmCIF Model', async ctx => {
  157. const atomCount = format.data.atom_site._rowCount;
  158. const isIHM = format.data.ihm_model_list._rowCount > 0;
  159. if (atomCount === 0) {
  160. return isIHM
  161. ? [createModel(format, Interval.Empty, void 0)]
  162. : [];
  163. }
  164. const models: Model[] = [];
  165. let modelStart = 0;
  166. while (modelStart < atomCount) {
  167. const bounds = findModelBounds(format, modelStart);
  168. const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0);
  169. models.push(model);
  170. modelStart = Interval.end(bounds);
  171. }
  172. return models;
  173. });
  174. }
  175. export default buildModels;