hierarchy.ts 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /**
  2. * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { Column, Table } from 'mol-data/db'
  8. import { Segmentation } from 'mol-data/int'
  9. import { mmCIF_Schema as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
  10. import { ElementSymbol, MoleculeType } from '../../types'
  11. import { ChainIndex, EntityIndex, ResidueIndex, ElementIndex } from '../../indexing';
  12. import SortedRanges from 'mol-data/int/sorted-ranges';
  13. export const AtomsSchema = {
  14. /**
  15. * The chemical element of this atom site.
  16. * For mmCIF files, this points to atom_type.symbol in the ATOM_TYPE category.
  17. */
  18. type_symbol: Column.Schema.Aliased<ElementSymbol>(mmCIF.atom_site.type_symbol),
  19. /**
  20. * A component of the identifier for this atom site.
  21. * This is a standardized name for the atom within its residue.
  22. * For mmCIF files, this points to chem_comp_atom.atom_id in the CHEM_COMP_ATOM category.
  23. */
  24. label_atom_id: mmCIF.atom_site.label_atom_id,
  25. /**
  26. * An alternative identifier for label_atom_id that may be provided by an author
  27. * in order to match the identification used in the publication that describes the structure.
  28. */
  29. auth_atom_id: mmCIF.atom_site.auth_atom_id,
  30. /**
  31. * A component of the identifier for this atom site.
  32. * Identifies an alternative conformation for this atom site.
  33. */
  34. label_alt_id: mmCIF.atom_site.label_alt_id,
  35. /**
  36. * The net integer charge assigned to this atom.
  37. * This is the formal charge assignment normally found in chemical diagrams.
  38. */
  39. pdbx_formal_charge: mmCIF.atom_site.pdbx_formal_charge
  40. // id, occupancy and B_iso_or_equiv are part of conformation
  41. };
  42. export type AtomsSchema = typeof AtomsSchema
  43. export interface Atoms extends Table<AtomsSchema> { }
  44. export const ResiduesSchema = {
  45. /**
  46. * The group of atoms to which the atom site belongs. This data item is provided for
  47. * compatibility with the original Protein Data Bank format, and only for that purpose.
  48. */
  49. group_PDB: mmCIF.atom_site.group_PDB,
  50. /**
  51. * A component of the identifier for this atom site.
  52. * For mmCIF files, this points to chem_comp.id in the CHEM_COMP category.
  53. */
  54. label_comp_id: mmCIF.atom_site.label_comp_id,
  55. /**
  56. * An alternative identifier for atom_site.label_comp_id that may be provided by an author
  57. * in order to match the identification used in the publication that describes the structure.
  58. */
  59. auth_comp_id: mmCIF.atom_site.auth_comp_id,
  60. /**
  61. * For mmCIF files, this points to entity_poly_seq.num in the ENTITY_POLY_SEQ category.
  62. */
  63. label_seq_id: mmCIF.atom_site.label_seq_id,
  64. /**
  65. * An alternative identifier for atom_site.label_seq_id that may be provided by an author
  66. * in order to match the identification used in the publication that describes the structure.
  67. */
  68. auth_seq_id: mmCIF.atom_site.auth_seq_id,
  69. /**
  70. * PDB insertion code.
  71. */
  72. pdbx_PDB_ins_code: mmCIF.atom_site.pdbx_PDB_ins_code,
  73. };
  74. export type ResiduesSchema = typeof ResiduesSchema
  75. export interface Residues extends Table<ResiduesSchema> { }
  76. export const ChainsSchema = {
  77. /**
  78. * A component of the identifier for this atom site.
  79. * For mmCIF files, this points to struct_asym.id in the STRUCT_ASYM category.
  80. */
  81. label_asym_id: mmCIF.atom_site.label_asym_id,
  82. /**
  83. * An alternative identifier for atomsite.label_asym_id that may be provided by an author
  84. * in order to match the identification used in the publication that describes the structure.
  85. */
  86. auth_asym_id: mmCIF.atom_site.auth_asym_id,
  87. /**
  88. * For mmCIF files, this points to _entity.id in the ENTITY category.
  89. */
  90. label_entity_id: mmCIF.atom_site.label_entity_id
  91. }
  92. export type ChainsSchema = typeof ChainsSchema
  93. export interface Chains extends Table<ChainsSchema> { }
  94. export interface AtomicData {
  95. atoms: Atoms,
  96. residues: Residues,
  97. chains: Chains
  98. }
  99. export interface AtomicDerivedData {
  100. readonly residue: {
  101. readonly traceElementIndex: ArrayLike<ElementIndex>
  102. readonly directionElementIndex: ArrayLike<ElementIndex>
  103. readonly moleculeType: ArrayLike<MoleculeType>
  104. }
  105. }
  106. export interface AtomicSegments {
  107. /** Maps residueIndex to a range of atoms [segments[rI], segments[rI + 1]) */
  108. residueAtomSegments: Segmentation<ElementIndex, ResidueIndex>,
  109. /**
  110. * Maps chainIndex to a range of atoms [segments[cI], segments[cI + 1]),
  111. *
  112. * residues of i-th chain are accessed like this:
  113. * const rI = residueAtomSegments.index, offsets = chainAtomSegments.offsets;
  114. * const start = rI[offsets[i]], const end = rI[offsets[i + 1] - 1] + 1;
  115. * for (let j = start; j < end; i++) { }
  116. */
  117. chainAtomSegments: Segmentation<ElementIndex, ChainIndex>,
  118. // TODO: include entity segments?
  119. }
  120. export interface AtomicIndex {
  121. /** @returns index or -1 if not present. */
  122. getEntityFromChain(cI: ChainIndex): EntityIndex,
  123. /**
  124. * Find chain using label_ mmCIF properties
  125. * @returns index or -1 if not present.
  126. */
  127. findChainLabel(key: AtomicIndex.ChainLabelKey): ChainIndex,
  128. /**
  129. * Find chain using auth_ mmCIF properties
  130. * @returns index or -1 if not present.
  131. */
  132. findChainAuth(key: AtomicIndex.ChainAuthKey): ChainIndex,
  133. /**
  134. * Index of the 1st occurence of this residue.
  135. * auth_seq_id is used because label_seq_id is undefined for "ligands" in mmCIF.
  136. * @param pdbx_PDB_ins_code Empty string for undefined
  137. * @returns index or -1 if not present.
  138. */
  139. findResidue(key: AtomicIndex.ResidueKey): ResidueIndex,
  140. findResidue(label_entity_id: string, label_asym_id: string, auth_seq_id: number, pdbx_PDB_ins_code?: string): ResidueIndex,
  141. /**
  142. * Index of the 1st occurence of this residue.
  143. * @param pdbx_PDB_ins_code Empty string for undefined
  144. * @returns index or -1 if not present.
  145. */
  146. findResidueAuth(key: AtomicIndex.ResidueAuthKey): ResidueIndex,
  147. /**
  148. * Find the residue index where the spefied residue should be inserted to maintain the ordering (entity_id, asym_id, seq_id, ins_code).
  149. * Useful for determining ranges for sequence-level annotations.
  150. * @param pdbx_PDB_ins_code Empty string for undefined
  151. */
  152. findResidueInsertion(key: AtomicIndex.ResidueLabelKey): ResidueIndex,
  153. /**
  154. * Find element index of an atom.
  155. * @param key
  156. * @returns index or -1 if the atom is not present.
  157. */
  158. findAtom(key: AtomicIndex.AtomKey): ElementIndex,
  159. /**
  160. * Find element index of an atom.
  161. * @param key
  162. * @returns index or -1 if the atom is not present.
  163. */
  164. findAtomAuth(key: AtomicIndex.AtomAuthKey): ElementIndex,
  165. /**
  166. * Find element index of an atom on a given residue.
  167. * @param key
  168. * @returns index or -1 if the atom is not present.
  169. */
  170. findAtomOnResidue(residueIndex: ResidueIndex, label_atom_id: string, label_alt_id?: string): ElementIndex
  171. // TODO: add indices that support comp_id?
  172. }
  173. export namespace AtomicIndex {
  174. export interface ChainLabelKey { label_entity_id: string, label_asym_id: string }
  175. export interface ChainAuthKey { auth_asym_id: string }
  176. export interface ResidueKey { label_entity_id: string, label_asym_id: string, auth_seq_id: number, pdbx_PDB_ins_code?: string }
  177. export function EmptyResidueKey(): ResidueKey { return { label_entity_id: '', label_asym_id: '', auth_seq_id: 0, pdbx_PDB_ins_code: void 0 }; }
  178. export interface ResidueAuthKey { auth_asym_id: string, auth_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code?: string }
  179. export interface ResidueLabelKey { label_entity_id: string, label_asym_id: string, label_seq_id: number, pdbx_PDB_ins_code?: string }
  180. export interface AtomKey extends ResidueKey { label_atom_id: string, label_alt_id?: string }
  181. export interface AtomAuthKey extends ResidueAuthKey { auth_atom_id: string, label_alt_id?: string }
  182. }
  183. export interface AtomicRanges {
  184. polymerRanges: SortedRanges<ElementIndex>
  185. gapRanges: SortedRanges<ElementIndex>
  186. cyclicPolymerMap: Map<ResidueIndex, ResidueIndex>
  187. }
  188. type _Hierarchy = AtomicData & AtomicSegments & AtomicRanges
  189. export interface AtomicHierarchy extends _Hierarchy {
  190. index: AtomicIndex
  191. derived: AtomicDerivedData
  192. }
  193. export namespace AtomicHierarchy {
  194. /** Start residue inclusive */
  195. export function chainStartResidueIndex(segs: AtomicSegments, cI: ChainIndex) {
  196. return segs.residueAtomSegments.index[segs.chainAtomSegments.offsets[cI]];
  197. }
  198. /** End residue exclusive */
  199. export function chainEndResidueIndexExcl(segs: AtomicSegments, cI: ChainIndex) {
  200. return segs.residueAtomSegments.index[segs.chainAtomSegments.offsets[cI + 1] - 1] + 1 as ResidueIndex;
  201. }
  202. }