atom-site.ts 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /**
  2. * Copyright (c) 2019-2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { CifField } from '../../../mol-io/reader/cif';
  8. import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
  9. import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
  10. import { guessElementSymbolTokens } from '../util';
  11. import { Column } from '../../../mol-data/db';
  12. import { areTokensEmpty } from '../../../mol-io/reader/common/text/column/token';
  13. type AtomSiteTemplate = typeof getAtomSiteTemplate extends (...args: any) => infer T ? T : never
  14. export function getAtomSiteTemplate(data: string, count: number) {
  15. const str = () => [] as string[];
  16. const ts = () => TokenBuilder.create(data, 2 * count);
  17. return {
  18. index: 0,
  19. group_PDB: ts(),
  20. id: str(),
  21. auth_atom_id: ts(),
  22. label_alt_id: ts(),
  23. auth_comp_id: ts(),
  24. auth_asym_id: ts(),
  25. auth_seq_id: ts(),
  26. pdbx_PDB_ins_code: ts(),
  27. Cartn_x: ts(),
  28. Cartn_y: ts(),
  29. Cartn_z: ts(),
  30. occupancy: ts(),
  31. B_iso_or_equiv: ts(),
  32. type_symbol: ts(),
  33. pdbx_PDB_model_num: str(),
  34. label_entity_id: str(),
  35. partial_charge: ts(),
  36. };
  37. }
  38. export function getAtomSite(sites: AtomSiteTemplate, terIndices: Set<number>, options: { hasAssemblies: boolean }): { [K in keyof mmCIF_Schema['atom_site'] | 'partial_charge']?: CifField } {
  39. const pdbx_PDB_model_num = CifField.ofStrings(sites.pdbx_PDB_model_num);
  40. const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
  41. const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
  42. const pdbx_PDB_ins_code = CifField.ofTokens(sites.pdbx_PDB_ins_code);
  43. const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
  44. const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
  45. const id = CifField.ofStrings(sites.id);
  46. //
  47. let currModelNum = pdbx_PDB_model_num.str(0);
  48. let currAsymId = auth_asym_id.str(0);
  49. let currSeqId = auth_seq_id.int(0);
  50. let currInsCode = pdbx_PDB_ins_code.str(0);
  51. let currLabelAsymId = currAsymId;
  52. let currLabelSeqId = currSeqId;
  53. const asymIdCounts = new Map<string, number>();
  54. const atomIdCounts = new Map<string, number>();
  55. const labelAsymIds: string[] = [];
  56. const labelAtomIds: string[] = [];
  57. const labelSeqIds: number[] = [];
  58. // serial label_seq_id if there are ins codes
  59. let hasInsCode = false;
  60. for (let i = 0, il = id.rowCount; i < il; ++i) {
  61. if (pdbx_PDB_ins_code.str(i) !== '') {
  62. hasInsCode = true;
  63. break;
  64. }
  65. }
  66. // ensure unique asym ids per model and unique atom ids per seq id
  67. for (let i = 0, il = id.rowCount; i < il; ++i) {
  68. const modelNum = pdbx_PDB_model_num.str(i);
  69. const asymId = auth_asym_id.str(i);
  70. const seqId = auth_seq_id.int(i);
  71. const insCode = pdbx_PDB_ins_code.str(i);
  72. let atomId = auth_atom_id.str(i);
  73. if (modelNum !== currModelNum) {
  74. asymIdCounts.clear();
  75. atomIdCounts.clear();
  76. currModelNum = modelNum;
  77. currAsymId = asymId;
  78. currSeqId = seqId;
  79. currInsCode = insCode;
  80. currLabelAsymId = asymId;
  81. currLabelSeqId = seqId;
  82. } else if (currAsymId !== asymId) {
  83. atomIdCounts.clear();
  84. currAsymId = asymId;
  85. currSeqId = seqId;
  86. currInsCode = insCode;
  87. currLabelAsymId = asymId;
  88. currLabelSeqId = seqId;
  89. } else if (currSeqId !== seqId) {
  90. atomIdCounts.clear();
  91. if (currSeqId === currLabelSeqId) {
  92. currLabelSeqId = seqId;
  93. } else {
  94. currLabelSeqId += 1;
  95. }
  96. currSeqId = seqId;
  97. currInsCode = insCode;
  98. } else if (currInsCode !== insCode) {
  99. atomIdCounts.clear();
  100. currInsCode = insCode;
  101. currLabelSeqId += 1;
  102. }
  103. if (asymIdCounts.has(asymId)) {
  104. // only change the chains name if there are TER records
  105. // otherwise assume repeated chain name use is from interleaved chains
  106. // also don't change the chains name if there are assemblies
  107. // as those require the original chain name
  108. if (terIndices.has(i) && !options.hasAssemblies) {
  109. const asymIdCount = asymIdCounts.get(asymId)! + 1;
  110. asymIdCounts.set(asymId, asymIdCount);
  111. currLabelAsymId = `${asymId}_${asymIdCount}`;
  112. }
  113. } else {
  114. asymIdCounts.set(asymId, 0);
  115. }
  116. labelAsymIds[i] = currLabelAsymId;
  117. if (atomIdCounts.has(atomId)) {
  118. const atomIdCount = atomIdCounts.get(atomId)! + 1;
  119. atomIdCounts.set(atomId, atomIdCount);
  120. atomId = `${atomId}_${atomIdCount}`;
  121. } else {
  122. atomIdCounts.set(atomId, 0);
  123. }
  124. labelAtomIds[i] = atomId;
  125. if (hasInsCode) {
  126. labelSeqIds[i] = currLabelSeqId;
  127. }
  128. }
  129. const labelAsymId = Column.ofStringArray(labelAsymIds);
  130. const labelAtomId = Column.ofStringArray(labelAtomIds);
  131. const label_seq_id = hasInsCode
  132. ? CifField.ofColumn(Column.ofIntArray(labelSeqIds))
  133. : CifField.ofUndefined(sites.index, Column.Schema.int);
  134. //
  135. return {
  136. auth_asym_id,
  137. auth_atom_id,
  138. auth_comp_id,
  139. auth_seq_id,
  140. B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
  141. Cartn_x: CifField.ofTokens(sites.Cartn_x),
  142. Cartn_y: CifField.ofTokens(sites.Cartn_y),
  143. Cartn_z: CifField.ofTokens(sites.Cartn_z),
  144. group_PDB: CifField.ofTokens(sites.group_PDB),
  145. id,
  146. label_alt_id: CifField.ofTokens(sites.label_alt_id),
  147. label_asym_id: CifField.ofColumn(labelAsymId),
  148. label_atom_id: CifField.ofColumn(labelAtomId),
  149. label_comp_id: auth_comp_id,
  150. label_seq_id,
  151. label_entity_id: CifField.ofStrings(sites.label_entity_id),
  152. occupancy: areTokensEmpty(sites.occupancy) ? CifField.ofUndefined(sites.index, Column.Schema.float) : CifField.ofTokens(sites.occupancy),
  153. type_symbol: CifField.ofTokens(sites.type_symbol),
  154. pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
  155. pdbx_PDB_model_num,
  156. partial_charge: CifField.ofTokens(sites.partial_charge)
  157. };
  158. }
  159. export function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isPdbqt: boolean) {
  160. const { data: str } = data;
  161. const length = e - s;
  162. // TODO: filter invalid atoms
  163. // COLUMNS DATA TYPE CONTENTS
  164. // --------------------------------------------------------------------------------
  165. // 1 - 6 Record name "ATOM "
  166. TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
  167. // 7 - 11 Integer Atom serial number.
  168. // TODO: support HEX
  169. Tokenizer.trim(data, s + 6, s + 11);
  170. sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
  171. // 13 - 16 Atom Atom name.
  172. TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
  173. // 17 Character Alternate location indicator.
  174. if (str.charCodeAt(s + 16) === 32) { // ' '
  175. TokenBuilder.add(sites.label_alt_id, 0, 0);
  176. } else {
  177. TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
  178. }
  179. // 18 - 20 Residue name Residue name.
  180. TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
  181. // 22 Character Chain identifier.
  182. TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
  183. // 23 - 26 Integer Residue sequence number.
  184. // TODO: support HEX
  185. TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
  186. // 27 AChar Code for insertion of residues.
  187. if (str.charCodeAt(s + 26) === 32) { // ' '
  188. TokenBuilder.add(sites.pdbx_PDB_ins_code, 0, 0);
  189. } else {
  190. TokenBuilder.add(sites.pdbx_PDB_ins_code, s + 26, s + 27);
  191. }
  192. // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
  193. TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
  194. // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
  195. TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
  196. // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
  197. TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
  198. // 55 - 60 Real(6.2) Occupancy.
  199. TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
  200. // 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
  201. if (length >= 66) {
  202. TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
  203. } else {
  204. TokenBuilder.add(sites.B_iso_or_equiv, 0, 0);
  205. }
  206. // 73 - 76 LString(4) Segment identifier, left-justified.
  207. if (isPdbqt) {
  208. TokenBuilder.addToken(sites.partial_charge, Tokenizer.trim(data, s + 70, s + 76));
  209. } else {
  210. // ignored
  211. }
  212. // 77 - 78 LString(2) Element symbol, right-justified.
  213. if (length >= 78 && !isPdbqt) {
  214. Tokenizer.trim(data, s + 76, s + 78);
  215. if (data.tokenStart < data.tokenEnd) {
  216. TokenBuilder.addToken(sites.type_symbol, data);
  217. } else {
  218. guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16);
  219. }
  220. } else {
  221. guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16);
  222. }
  223. // 79 - 80 LString(2) charge Charge on the atom.
  224. // TODO
  225. sites.pdbx_PDB_model_num[sites.index] = model;
  226. sites.index++;
  227. }