pdb.ts 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /**
  2. * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import Format from '../format';
  7. import { Model } from '../model';
  8. import { Task } from 'mol-task';
  9. import { PdbFile } from 'mol-io/reader/pdb/schema';
  10. import from_mmCIF from './mmcif';
  11. import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
  12. import { substringStartsWith } from 'mol-util/string';
  13. import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
  14. import { CifField, CifCategory } from 'mol-io/reader/cif';
  15. import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field';
  16. function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
  17. return {
  18. name,
  19. fieldNames: Object.keys(fields),
  20. rowCount,
  21. getField(f: string) {
  22. return fields[f];
  23. }
  24. }
  25. }
  26. function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
  27. return {
  28. id: CifTextValueField(['1', '2', '3']),
  29. type: CifTextValueField(['polymer', 'non-polymer', 'water'])
  30. }
  31. }
  32. function atom_site_template(data: string, count: number) {
  33. const str = () => new Array(count) as string[];
  34. const ts = () => TokenBuilder.create(data, 2 * count);
  35. return {
  36. count,
  37. group_PDB: ts(),
  38. id: str(),
  39. auth_atom_id: ts(),
  40. label_alt_id: ts(),
  41. auth_comp_id: ts(),
  42. auth_asym_id: ts(),
  43. auth_seq_id: ts(),
  44. pdbx_PDB_ins_code: ts(),
  45. Cartn_x: ts(),
  46. Cartn_y: ts(),
  47. Cartn_z: ts(),
  48. occupancy: ts(),
  49. B_iso_or_equiv: ts(),
  50. type_symbol: ts(),
  51. pdbx_PDB_model_num: str(),
  52. label_entity_id: str()
  53. };
  54. }
  55. function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
  56. const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
  57. const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
  58. const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
  59. const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
  60. return {
  61. auth_asym_id,
  62. auth_atom_id,
  63. auth_comp_id,
  64. auth_seq_id,
  65. B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
  66. Cartn_x: CifTextField(sites.Cartn_x, sites.count),
  67. Cartn_y: CifTextField(sites.Cartn_y, sites.count),
  68. Cartn_z: CifTextField(sites.Cartn_z, sites.count),
  69. group_PDB: CifTextField(sites.group_PDB, sites.count),
  70. id: CifTextValueField(sites.id),
  71. label_alt_id: CifTextField(sites.label_alt_id, sites.count),
  72. label_asym_id: auth_asym_id,
  73. label_atom_id: auth_atom_id,
  74. label_comp_id: auth_comp_id,
  75. label_seq_id: auth_seq_id,
  76. label_entity_id: CifTextValueField(sites.label_entity_id),
  77. occupancy: CifTextField(sites.occupancy, sites.count),
  78. type_symbol: CifTextField(sites.type_symbol, sites.count),
  79. pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
  80. pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
  81. };
  82. }
  83. function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
  84. const { data: str } = data;
  85. let startPos = s;
  86. let start = s;
  87. const end = e;
  88. const length = end - start;
  89. // TODO: filter invalid atoms
  90. // COLUMNS DATA TYPE CONTENTS
  91. // --------------------------------------------------------------------------------
  92. // 1 - 6 Record name "ATOM "
  93. Tokenizer.trim(data, start, start + 6);
  94. TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
  95. // 7 - 11 Integer Atom serial number.
  96. // TODO: support HEX
  97. start = startPos + 6;
  98. Tokenizer.trim(data, start, start + 5);
  99. sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd);
  100. // 13 - 16 Atom Atom name.
  101. start = startPos + 12;
  102. Tokenizer.trim(data, start, start + 4);
  103. TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
  104. // 17 Character Alternate location indicator.
  105. if (str.charCodeAt(startPos + 16) === 32) { // ' '
  106. TokenBuilder.add(sites.label_alt_id, 0, 0);
  107. } else {
  108. TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
  109. }
  110. // 18 - 20 Residue name Residue name.
  111. start = startPos + 17;
  112. Tokenizer.trim(data, start, start + 3);
  113. TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
  114. // 22 Character Chain identifier.
  115. TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
  116. // 23 - 26 Integer Residue sequence number.
  117. // TODO: support HEX
  118. start = startPos + 22;
  119. Tokenizer.trim(data, start, start + 4);
  120. TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
  121. // 27 AChar Code for insertion of residues.
  122. if (str.charCodeAt(startPos + 26) === 32) { // ' '
  123. TokenBuilder.add(sites.label_alt_id, 0, 0);
  124. } else {
  125. TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
  126. }
  127. // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
  128. start = startPos + 30;
  129. Tokenizer.trim(data, start, start + 8);
  130. TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
  131. // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
  132. start = startPos + 38;
  133. Tokenizer.trim(data, start, start + 8);
  134. TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
  135. // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
  136. start = startPos + 46;
  137. Tokenizer.trim(data, start, start + 8);
  138. TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
  139. // 55 - 60 Real(6.2) Occupancy.
  140. start = startPos + 54;
  141. Tokenizer.trim(data, start, start + 6);
  142. TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
  143. // 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
  144. if (length >= 66) {
  145. start = startPos + 60;
  146. Tokenizer.trim(data, start, start + 6);
  147. TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
  148. } else {
  149. TokenBuilder.add(sites.label_alt_id, 0, 0);
  150. }
  151. // 73 - 76 LString(4) Segment identifier, left-justified.
  152. // ignored
  153. // 77 - 78 LString(2) Element symbol, right-justified.
  154. if (length >= 78) {
  155. start = startPos + 76;
  156. Tokenizer.trim(data, start, start + 2);
  157. if (data.tokenStart < data.tokenEnd) {
  158. TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
  159. } else {
  160. // "guess" the symbol
  161. TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
  162. }
  163. } else {
  164. TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
  165. }
  166. // TODO
  167. sites.label_entity_id.push('1');
  168. sites.pdbx_PDB_model_num.push(model);
  169. }
  170. type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
  171. async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> {
  172. const { lines } = pdb;
  173. const { data, indices } = lines;
  174. const tokenizer = Tokenizer(data);
  175. // Count the atoms
  176. let atomCount = 0;
  177. for (let i = 0, _i = lines.count; i < _i; i++) {
  178. const s = indices[2 * i], e = indices[2 * i + 1];
  179. switch (data[s]) {
  180. case 'A':
  181. if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++;
  182. break;
  183. case 'H':
  184. if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++;
  185. break;
  186. }
  187. }
  188. const atom_site = atom_site_template(data, atomCount);
  189. let modelNum = 0, modelStr = '';
  190. for (let i = 0, _i = lines.count; i < _i; i++) {
  191. const s = indices[2 * i], e = indices[2 * i + 1];
  192. switch (data[s]) {
  193. case 'A':
  194. if (!substringStartsWith(data, s, e, 'ATOM ')) continue;
  195. if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
  196. addAtom(atom_site, modelStr, tokenizer, s, e);
  197. break;
  198. case 'H':
  199. if (!substringStartsWith(data, s, e, 'HETATM')) continue;
  200. if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
  201. addAtom(atom_site, modelStr, tokenizer, s, e);
  202. break;
  203. case 'M':
  204. if (substringStartsWith(data, s, e, 'MODEL ')) {
  205. modelNum++;
  206. modelStr = '' + modelNum;
  207. }
  208. break;
  209. }
  210. }
  211. const categories = {
  212. entity: toCategory('entity', _entity(), 3),
  213. atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
  214. }
  215. return Format.mmCIF({
  216. header: pdb.id || 'PDB',
  217. categoryNames: Object.keys(categories),
  218. categories
  219. });
  220. }
  221. function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> {
  222. return Task.create('Create PDB Model', async ctx => {
  223. await ctx.update('Converting to mmCIF...');
  224. const cif = await pdbToMmCIF(format.data);
  225. return from_mmCIF(cif).runInContext(ctx);
  226. });
  227. }
  228. export default buildModels;