mmcif.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import { Column, Table } from 'mol-data/db';
  7. import { Interval, Segmentation } from 'mol-data/int';
  8. import { Spacegroup, SpacegroupCell } from 'mol-math/geometry';
  9. import { Vec3 } from 'mol-math/linear-algebra';
  10. import UUID from 'mol-util/uuid';
  11. import Format from '../format';
  12. import Model from '../model';
  13. import { AtomicConformation, AtomicData, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from '../properties/atomic';
  14. import { Entities } from '../properties/common';
  15. import { ModelSymmetry } from '../properties/symmetry';
  16. import { getAtomicKeys } from '../properties/utils/atomic-keys';
  17. import { ElementSymbol } from '../types';
  18. import { createAssemblies } from './mmcif/assembly';
  19. import { getIHMCoarse } from './mmcif/ihm';
  20. import { getSequence } from './mmcif/sequence';
  21. import mmCIF_Format = Format.mmCIF
  22. import { Task } from 'mol-task';
  23. import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
  24. function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
  25. const num = data.atom_site.pdbx_PDB_model_num;
  26. const atomCount = num.rowCount;
  27. if (!num.isDefined) return Interval.ofBounds(startIndex, atomCount);
  28. let endIndex = startIndex + 1;
  29. while (endIndex < atomCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
  30. return Interval.ofBounds(startIndex, endIndex);
  31. }
  32. function findHierarchyOffsets({ data }: mmCIF_Format, bounds: Interval) {
  33. if (Interval.size(bounds) === 0) return { residues: [], chains: [] };
  34. const start = Interval.start(bounds), end = Interval.end(bounds);
  35. const residues = [start], chains = [start];
  36. const { label_entity_id, auth_asym_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = data.atom_site;
  37. for (let i = start + 1; i < end; i++) {
  38. const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !auth_asym_id.areValuesEqual(i - 1, i);
  39. const newResidue = newChain
  40. || !auth_seq_id.areValuesEqual(i - 1, i)
  41. || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i)
  42. || !label_comp_id.areValuesEqual(i - 1, i);
  43. if (newResidue) residues[residues.length] = i;
  44. if (newChain) chains[chains.length] = i;
  45. }
  46. return { residues, chains };
  47. }
  48. function createHierarchyData({ data }: mmCIF_Format, bounds: Interval, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
  49. const { atom_site } = data;
  50. const start = Interval.start(bounds), end = Interval.end(bounds);
  51. const atoms = Table.ofColumns(AtomsSchema, {
  52. type_symbol: Column.ofArray({ array: Column.mapToArray(Column.window(atom_site.type_symbol, start, end), ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }),
  53. label_atom_id: Column.window(atom_site.label_atom_id, start, end),
  54. auth_atom_id: Column.window(atom_site.auth_atom_id, start, end),
  55. label_alt_id: Column.window(atom_site.label_alt_id, start, end),
  56. pdbx_formal_charge: Column.window(atom_site.pdbx_formal_charge, start, end)
  57. });
  58. const residues = Table.view(atom_site, ResiduesSchema, offsets.residues);
  59. // Optimize the numeric columns
  60. Table.columnToArray(residues, 'label_seq_id', Int32Array);
  61. Table.columnToArray(residues, 'auth_seq_id', Int32Array);
  62. const chains = Table.view(atom_site, ChainsSchema, offsets.chains);
  63. return { atoms, residues, chains };
  64. }
  65. function getConformation({ data }: mmCIF_Format, bounds: Interval): AtomicConformation {
  66. const start = Interval.start(bounds), end = Interval.end(bounds);
  67. const { atom_site } = data;
  68. return {
  69. id: UUID.create(),
  70. atomId: Column.window(atom_site.id, start, end),
  71. occupancy: Column.window(atom_site.occupancy, start, end),
  72. B_iso_or_equiv: Column.window(atom_site.B_iso_or_equiv, start, end),
  73. x: atom_site.Cartn_x.toArray({ array: Float32Array, start, end }),
  74. y: atom_site.Cartn_y.toArray({ array: Float32Array, start, end }),
  75. z: atom_site.Cartn_z.toArray({ array: Float32Array, start, end }),
  76. }
  77. }
  78. function getSymmetry(format: mmCIF_Format): ModelSymmetry {
  79. const assemblies = createAssemblies(format);
  80. const spacegroup = getSpacegroup(format);
  81. const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
  82. return { assemblies, spacegroup, isNonStandardCrytalFrame };
  83. }
  84. function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
  85. const { atom_sites } = format.data;
  86. if (atom_sites._rowCount === 0) return false;
  87. // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
  88. return false;
  89. }
  90. function getSpacegroup(format: mmCIF_Format): Spacegroup {
  91. const { symmetry, cell } = format.data;
  92. if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
  93. const groupName = symmetry['space_group_name_H-M'].value(0);
  94. const spaceCell = SpacegroupCell.create(groupName,
  95. Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
  96. Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
  97. return Spacegroup.create(spaceCell);
  98. }
  99. function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
  100. // need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300
  101. return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>)
  102. && Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>)
  103. && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
  104. }
  105. function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model): Model {
  106. const hierarchyOffsets = findHierarchyOffsets(format, bounds);
  107. const hierarchyData = createHierarchyData(format, bounds, hierarchyOffsets);
  108. if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) {
  109. return {
  110. ...previous,
  111. atomicConformation: getConformation(format, bounds)
  112. };
  113. }
  114. const hierarchySegments: AtomicSegments = {
  115. residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, bounds),
  116. chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds),
  117. }
  118. const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
  119. const hierarchyKeys = getAtomicKeys(hierarchyData, entities, hierarchySegments);
  120. const atomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments };
  121. const coarse = getIHMCoarse(format.data, entities);
  122. return {
  123. id: UUID.create(),
  124. label: format.data.entry.id.value(0),
  125. sourceData: format,
  126. modelNum: format.data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)),
  127. entities,
  128. atomicHierarchy,
  129. sequence: getSequence(format.data, entities, atomicHierarchy),
  130. atomicConformation: getConformation(format, bounds),
  131. coarseHierarchy: coarse.hierarchy,
  132. coarseConformation: coarse.conformation,
  133. properties: {
  134. secondaryStructure: getSecondaryStructureMmCif(format.data, atomicHierarchy)
  135. },
  136. symmetry: getSymmetry(format)
  137. };
  138. }
  139. function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
  140. return Task.create('Create mmCIF Model', async ctx => {
  141. const atomCount = format.data.atom_site._rowCount;
  142. const isIHM = format.data.ihm_model_list._rowCount > 0;
  143. if (atomCount === 0) {
  144. return isIHM
  145. ? [createModel(format, Interval.Empty, void 0)]
  146. : [];
  147. }
  148. const models: Model[] = [];
  149. let modelStart = 0;
  150. while (modelStart < atomCount) {
  151. const bounds = findModelBounds(format, modelStart);
  152. const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0);
  153. models.push(model);
  154. modelStart = Interval.end(bounds);
  155. }
  156. return models;
  157. });
  158. }
  159. export default buildModels;