component.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /**
  2. * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { Table, Column } from '../../../mol-data/db';
  7. import { WaterNames, PolymerNames } from '../../../mol-model/structure/model/types';
  8. import { SetUtils } from '../../../mol-util/set';
  9. import { BasicSchema } from '../basic/schema';
  10. import { mmCIF_chemComp_schema } from '../../../mol-io/reader/cif/schema/mmcif-extras';
  11. type Component = Table.Row<Pick<mmCIF_chemComp_schema, 'id' | 'name' | 'type'>>
  12. const ProteinAtomIdsList = [
  13. new Set(['CA']),
  14. new Set(['C']),
  15. new Set(['N'])
  16. ];
  17. const RnaAtomIdsList = [
  18. new Set(['P', 'O3\'', 'O3*']),
  19. new Set(['C4\'', 'C4*']),
  20. new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
  21. ];
  22. const DnaAtomIdsList = [
  23. new Set(['P', 'O3\'', 'O3*']),
  24. new Set(['C3\'', 'C3*']),
  25. new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
  26. ];
  27. /** Used to reduce false positives for atom name-based type guessing */
  28. const NonPolymerNames = new Set([
  29. 'FMN', 'NCN', 'FNS', 'FMA', 'ATP', 'ADP', 'AMP', 'GTP', 'GDP', 'GMP' // Mononucleotides
  30. ]);
  31. const StandardComponents = (function () {
  32. const map = new Map<string, Component>();
  33. const components: Component[] = [
  34. { id: 'HIS', name: 'HISTIDINE', type: 'L-peptide linking' },
  35. { id: 'ARG', name: 'ARGININE', type: 'L-peptide linking' },
  36. { id: 'LYS', name: 'LYSINE', type: 'L-peptide linking' },
  37. { id: 'ILE', name: 'ISOLEUCINE', type: 'L-peptide linking' },
  38. { id: 'PHE', name: 'PHENYLALANINE', type: 'L-peptide linking' },
  39. { id: 'LEU', name: 'LEUCINE', type: 'L-peptide linking' },
  40. { id: 'TRP', name: 'TRYPTOPHAN', type: 'L-peptide linking' },
  41. { id: 'ALA', name: 'ALANINE', type: 'L-peptide linking' },
  42. { id: 'MET', name: 'METHIONINE', type: 'L-peptide linking' },
  43. { id: 'CYS', name: 'CYSTEINE', type: 'L-peptide linking' },
  44. { id: 'ASN', name: 'ASPARAGINE', type: 'L-peptide linking' },
  45. { id: 'VAL', name: 'VALINE', type: 'L-peptide linking' },
  46. { id: 'GLY', name: 'GLYCINE', type: 'peptide linking' },
  47. { id: 'SER', name: 'SERINE', type: 'L-peptide linking' },
  48. { id: 'GLN', name: 'GLUTAMINE', type: 'L-peptide linking' },
  49. { id: 'TYR', name: 'TYROSINE', type: 'L-peptide linking' },
  50. { id: 'ASP', name: 'ASPARTIC ACID', type: 'L-peptide linking' },
  51. { id: 'GLU', name: 'GLUTAMIC ACID', type: 'L-peptide linking' },
  52. { id: 'THR', name: 'THREONINE', type: 'L-peptide linking' },
  53. { id: 'PRO', name: 'PROLINE', type: 'L-peptide linking' },
  54. { id: 'SEC', name: 'SELENOCYSTEINE', type: 'L-peptide linking' },
  55. { id: 'PYL', name: 'PYRROLYSINE', type: 'L-peptide linking' },
  56. { id: 'MSE', name: 'SELENOMETHIONINE', type: 'L-peptide linking' },
  57. { id: 'SEP', name: 'PHOSPHOSERINE', type: 'L-peptide linking' },
  58. { id: 'TPO', name: 'PHOSPHOTHREONINE', type: 'L-peptide linking' },
  59. { id: 'PTR', name: 'O-PHOSPHOTYROSINE', type: 'L-peptide linking' },
  60. { id: 'PCA', name: 'PYROGLUTAMIC ACID', type: 'L-peptide linking' },
  61. { id: 'A', name: 'ADENOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  62. { id: 'C', name: 'CYTIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  63. { id: 'T', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  64. { id: 'G', name: 'GUANOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  65. { id: 'I', name: 'INOSINIC ACID', type: 'RNA linking' },
  66. { id: 'U', name: 'URIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  67. { id: 'DA', name: '2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  68. { id: 'DC', name: '2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  69. { id: 'DT', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  70. { id: 'DG', name: '2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  71. { id: 'DI', name: '2\'-DEOXYINOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  72. { id: 'DU', name: '2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  73. ];
  74. components.forEach(c => map.set(c.id, c));
  75. return map;
  76. })();
  77. const CharmmIonComponents = (function () {
  78. const map = new Map<string, Component>();
  79. const components: Component[] = [
  80. { id: 'ZN2', name: 'ZINC ION', type: 'Ion' },
  81. { id: 'SOD', name: 'SODIUM ION', type: 'Ion' },
  82. { id: 'CES', name: 'CESIUM ION', type: 'Ion' },
  83. { id: 'CLA', name: 'CHLORIDE ION', type: 'Ion' },
  84. { id: 'CAL', name: 'CALCIUM ION', type: 'Ion' },
  85. { id: 'POT', name: 'POTASSIUM ION', type: 'Ion' },
  86. ];
  87. components.forEach(c => map.set(c.id, c));
  88. return map;
  89. })();
  90. export class ComponentBuilder {
  91. private namesMap = new Map<string, string>();
  92. private comps = new Map<string, Component>();
  93. private ids: string[] = [];
  94. private names: string[] = [];
  95. private types: mmCIF_chemComp_schema['type']['T'][] = [];
  96. private mon_nstd_flags: mmCIF_chemComp_schema['mon_nstd_flag']['T'][] = [];
  97. private set(c: Component) {
  98. this.comps.set(c.id, c);
  99. this.ids.push(c.id);
  100. this.names.push(c.name);
  101. this.types.push(c.type);
  102. this.mon_nstd_flags.push(PolymerNames.has(c.id) ? 'y' : 'n');
  103. }
  104. private getAtomIds(index: number) {
  105. const atomIds = new Set<string>();
  106. const prevSeqId = this.seqId.value(index);
  107. while (index < this.seqId.rowCount) {
  108. const seqId = this.seqId.value(index);
  109. if (seqId !== prevSeqId) break;
  110. atomIds.add(this.atomId.value(index));
  111. prevSeqId - seqId;
  112. index += 1;
  113. }
  114. return atomIds;
  115. }
  116. private hasAtomIds(atomIds: Set<string>, atomIdsList: Set<string>[]) {
  117. for (let i = 0, il = atomIdsList.length; i < il; ++i) {
  118. if (!SetUtils.areIntersecting(atomIds, atomIdsList[i])) {
  119. return false;
  120. }
  121. }
  122. return true;
  123. }
  124. private getType(atomIds: Set<string>): Component['type'] {
  125. if (this.hasAtomIds(atomIds, ProteinAtomIdsList)) {
  126. return 'peptide linking';
  127. } else if (this.hasAtomIds(atomIds, RnaAtomIdsList)) {
  128. return 'RNA linking';
  129. } else if (this.hasAtomIds(atomIds, DnaAtomIdsList)) {
  130. return 'DNA linking';
  131. } else {
  132. return 'other';
  133. }
  134. }
  135. has(compId: string) { return this.comps.has(compId); }
  136. get(compId: string) { return this.comps.get(compId); }
  137. add(compId: string, index: number) {
  138. if (!this.has(compId)) {
  139. if (StandardComponents.has(compId)) {
  140. this.set(StandardComponents.get(compId)!);
  141. } else if (WaterNames.has(compId)) {
  142. this.set({ id: compId, name: 'WATER', type: 'non-polymer' });
  143. } else if (NonPolymerNames.has(compId.toUpperCase())) {
  144. this.set({ id: compId, name: this.namesMap.get(compId) || compId, type: 'non-polymer' });
  145. } else {
  146. const atomIds = this.getAtomIds(index);
  147. if (atomIds.size === 1 && CharmmIonComponents.has(compId)) {
  148. this.set(CharmmIonComponents.get(compId)!);
  149. } else {
  150. const type = this.getType(atomIds);
  151. this.set({ id: compId, name: this.namesMap.get(compId) || compId, type });
  152. }
  153. }
  154. }
  155. return this.get(compId)!;
  156. }
  157. getChemCompTable() {
  158. return Table.ofPartialColumns(BasicSchema.chem_comp, {
  159. id: Column.ofStringArray(this.ids),
  160. name: Column.ofStringArray(this.names),
  161. type: Column.ofStringAliasArray(this.types),
  162. mon_nstd_flag: Column.ofStringAliasArray(this.mon_nstd_flags),
  163. }, this.ids.length);
  164. }
  165. setNames(names: [string, string][]) {
  166. names.forEach(n => this.namesMap.set(n[0], n[1]));
  167. }
  168. constructor(private seqId: Column<number>, private atomId: Column<string>) {
  169. }
  170. }