component.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /**
  2. * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { Table, Column } from '../../../mol-data/db';
  7. import { WaterNames, PolymerNames } from '../../../mol-model/structure/model/types';
  8. import { SetUtils } from '../../../mol-util/set';
  9. import { BasicSchema } from '../basic/schema';
  10. import { mmCIF_chemComp_schema } from '../../../mol-io/reader/cif/schema/mmcif-extras';
  11. import { SaccharideCompIdMap } from '../../../mol-model/structure/structure/carbohydrates/constants';
  12. type Component = Table.Row<Pick<mmCIF_chemComp_schema, 'id' | 'name' | 'type'>>
  13. const ProteinAtomIdsList = [
  14. new Set(['CA']),
  15. new Set(['C']),
  16. new Set(['N'])
  17. ];
  18. const RnaAtomIdsList = [
  19. new Set(['P', 'O3\'', 'O3*']),
  20. new Set(['C4\'', 'C4*']),
  21. new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
  22. ];
  23. const DnaAtomIdsList = [
  24. new Set(['P', 'O3\'', 'O3*']),
  25. new Set(['C3\'', 'C3*']),
  26. new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
  27. ];
  28. /** Used to reduce false positives for atom name-based type guessing */
  29. const NonPolymerNames = new Set([
  30. 'FMN', 'NCN', 'FNS', 'FMA', 'ATP', 'ADP', 'AMP', 'GTP', 'GDP', 'GMP', // Mononucleotides
  31. ]);
  32. const StandardComponents = (function () {
  33. const map = new Map<string, Component>();
  34. const components: Component[] = [
  35. { id: 'HIS', name: 'HISTIDINE', type: 'L-peptide linking' },
  36. { id: 'ARG', name: 'ARGININE', type: 'L-peptide linking' },
  37. { id: 'LYS', name: 'LYSINE', type: 'L-peptide linking' },
  38. { id: 'ILE', name: 'ISOLEUCINE', type: 'L-peptide linking' },
  39. { id: 'PHE', name: 'PHENYLALANINE', type: 'L-peptide linking' },
  40. { id: 'LEU', name: 'LEUCINE', type: 'L-peptide linking' },
  41. { id: 'TRP', name: 'TRYPTOPHAN', type: 'L-peptide linking' },
  42. { id: 'ALA', name: 'ALANINE', type: 'L-peptide linking' },
  43. { id: 'MET', name: 'METHIONINE', type: 'L-peptide linking' },
  44. { id: 'CYS', name: 'CYSTEINE', type: 'L-peptide linking' },
  45. { id: 'ASN', name: 'ASPARAGINE', type: 'L-peptide linking' },
  46. { id: 'VAL', name: 'VALINE', type: 'L-peptide linking' },
  47. { id: 'GLY', name: 'GLYCINE', type: 'peptide linking' },
  48. { id: 'SER', name: 'SERINE', type: 'L-peptide linking' },
  49. { id: 'GLN', name: 'GLUTAMINE', type: 'L-peptide linking' },
  50. { id: 'TYR', name: 'TYROSINE', type: 'L-peptide linking' },
  51. { id: 'ASP', name: 'ASPARTIC ACID', type: 'L-peptide linking' },
  52. { id: 'GLU', name: 'GLUTAMIC ACID', type: 'L-peptide linking' },
  53. { id: 'THR', name: 'THREONINE', type: 'L-peptide linking' },
  54. { id: 'PRO', name: 'PROLINE', type: 'L-peptide linking' },
  55. { id: 'SEC', name: 'SELENOCYSTEINE', type: 'L-peptide linking' },
  56. { id: 'PYL', name: 'PYRROLYSINE', type: 'L-peptide linking' },
  57. { id: 'MSE', name: 'SELENOMETHIONINE', type: 'L-peptide linking' },
  58. { id: 'SEP', name: 'PHOSPHOSERINE', type: 'L-peptide linking' },
  59. { id: 'TPO', name: 'PHOSPHOTHREONINE', type: 'L-peptide linking' },
  60. { id: 'PTR', name: 'O-PHOSPHOTYROSINE', type: 'L-peptide linking' },
  61. { id: 'PCA', name: 'PYROGLUTAMIC ACID', type: 'L-peptide linking' },
  62. { id: 'A', name: 'ADENOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  63. { id: 'C', name: 'CYTIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  64. { id: 'T', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  65. { id: 'G', name: 'GUANOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  66. { id: 'I', name: 'INOSINIC ACID', type: 'RNA linking' },
  67. { id: 'U', name: 'URIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
  68. { id: 'DA', name: '2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  69. { id: 'DC', name: '2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  70. { id: 'DT', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  71. { id: 'DG', name: '2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  72. { id: 'DI', name: '2\'-DEOXYINOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  73. { id: 'DU', name: '2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
  74. ];
  75. components.forEach(c => map.set(c.id, c));
  76. return map;
  77. })();
  78. const CharmmIonComponents = (function () {
  79. const map = new Map<string, Component>();
  80. const components: Component[] = [
  81. { id: 'ZN2', name: 'ZINC ION', type: 'Ion' },
  82. { id: 'SOD', name: 'SODIUM ION', type: 'Ion' },
  83. { id: 'CES', name: 'CESIUM ION', type: 'Ion' },
  84. { id: 'CLA', name: 'CHLORIDE ION', type: 'Ion' },
  85. { id: 'CAL', name: 'CALCIUM ION', type: 'Ion' },
  86. { id: 'POT', name: 'POTASSIUM ION', type: 'Ion' },
  87. ];
  88. components.forEach(c => map.set(c.id, c));
  89. return map;
  90. })();
  91. export class ComponentBuilder {
  92. private namesMap = new Map<string, string>();
  93. private comps = new Map<string, Component>();
  94. private ids: string[] = [];
  95. private names: string[] = [];
  96. private types: mmCIF_chemComp_schema['type']['T'][] = [];
  97. private mon_nstd_flags: mmCIF_chemComp_schema['mon_nstd_flag']['T'][] = [];
  98. private set(c: Component) {
  99. this.comps.set(c.id, c);
  100. this.ids.push(c.id);
  101. this.names.push(c.name);
  102. this.types.push(c.type);
  103. this.mon_nstd_flags.push(PolymerNames.has(c.id) ? 'y' : 'n');
  104. }
  105. private getAtomIds(index: number) {
  106. const atomIds = new Set<string>();
  107. const prevSeqId = this.seqId.value(index);
  108. while (index < this.seqId.rowCount) {
  109. const seqId = this.seqId.value(index);
  110. if (seqId !== prevSeqId) break;
  111. atomIds.add(this.atomId.value(index));
  112. prevSeqId - seqId;
  113. index += 1;
  114. }
  115. return atomIds;
  116. }
  117. private hasAtomIds(atomIds: Set<string>, atomIdsList: Set<string>[]) {
  118. for (let i = 0, il = atomIdsList.length; i < il; ++i) {
  119. if (!SetUtils.areIntersecting(atomIds, atomIdsList[i])) {
  120. return false;
  121. }
  122. }
  123. return true;
  124. }
  125. private getType(atomIds: Set<string>): Component['type'] {
  126. if (this.hasAtomIds(atomIds, ProteinAtomIdsList)) {
  127. return 'peptide linking';
  128. } else if (this.hasAtomIds(atomIds, RnaAtomIdsList)) {
  129. return 'RNA linking';
  130. } else if (this.hasAtomIds(atomIds, DnaAtomIdsList)) {
  131. return 'DNA linking';
  132. } else {
  133. return 'other';
  134. }
  135. }
  136. has(compId: string) { return this.comps.has(compId); }
  137. get(compId: string) { return this.comps.get(compId); }
  138. add(compId: string, index: number) {
  139. if (!this.has(compId)) {
  140. if (StandardComponents.has(compId)) {
  141. this.set(StandardComponents.get(compId)!);
  142. } else if (WaterNames.has(compId)) {
  143. this.set({ id: compId, name: 'WATER', type: 'non-polymer' });
  144. } else if (NonPolymerNames.has(compId.toUpperCase())) {
  145. this.set({ id: compId, name: this.namesMap.get(compId) || compId, type: 'non-polymer' });
  146. } else if (SaccharideCompIdMap.has(compId.toUpperCase())) {
  147. this.set({ id: compId, name: this.namesMap.get(compId) || compId, type: 'saccharide' });
  148. } else {
  149. const atomIds = this.getAtomIds(index);
  150. if (atomIds.size === 1 && CharmmIonComponents.has(compId)) {
  151. this.set(CharmmIonComponents.get(compId)!);
  152. } else {
  153. const type = this.getType(atomIds);
  154. this.set({ id: compId, name: this.namesMap.get(compId) || compId, type });
  155. }
  156. }
  157. }
  158. return this.get(compId)!;
  159. }
  160. getChemCompTable() {
  161. return Table.ofPartialColumns(BasicSchema.chem_comp, {
  162. id: Column.ofStringArray(this.ids),
  163. name: Column.ofStringArray(this.names),
  164. type: Column.ofStringAliasArray(this.types),
  165. mon_nstd_flag: Column.ofStringAliasArray(this.mon_nstd_flags),
  166. }, this.ids.length);
  167. }
  168. setNames(names: [string, string][]) {
  169. names.forEach(n => this.namesMap.set(n[0], n[1]));
  170. }
  171. constructor(private seqId: Column<number>, private atomId: Column<string>) {
  172. }
  173. }