to-cif.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /**
  2. * Copyright (c) 2019-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. * @author Yana Rose <yana.v.rose@gmail.com>
  7. */
  8. import { substringStartsWith } from '../../../mol-util/string';
  9. import { CifCategory, CifField, CifFrame } from '../../../mol-io/reader/cif';
  10. import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
  11. import { PdbFile } from '../../../mol-io/reader/pdb/schema';
  12. import { parseCryst1, parseRemark350, parseMtrix } from './assembly';
  13. import { parseHelix, parseSheet } from './secondary-structure';
  14. import { parseCmpnd, parseHetnam } from './entity';
  15. import { ComponentBuilder } from '../common/component';
  16. import { EntityBuilder } from '../common/entity';
  17. import { Column } from '../../../mol-data/db';
  18. import { getMoleculeType } from '../../../mol-model/structure/model/types';
  19. import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site';
  20. import { addAnisotropic, getAnisotropicTemplate, getAnisotropic } from './anisotropic';
  21. import { parseConect } from './conect';
  22. import { isDebugMode } from '../../../mol-util/debug';
  23. import { PdbHeaderData, addHeader } from './header';
  24. import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
  25. export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
  26. const { lines } = pdb;
  27. const { data, indices } = lines;
  28. const tokenizer = Tokenizer(data);
  29. const isPdbqt = !!pdb.isPdbqt;
  30. // Count the atoms
  31. let atomCount = 0;
  32. let anisotropicCount = 0;
  33. for (let i = 0, _i = lines.count; i < _i; i++) {
  34. const s = indices[2 * i], e = indices[2 * i + 1];
  35. switch (data[s]) {
  36. case 'A':
  37. if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++;
  38. else if (substringStartsWith(data, s, e, 'ANISOU')) anisotropicCount++;
  39. break;
  40. case 'H':
  41. if (substringStartsWith(data, s, e, 'HETATM')) atomCount++;
  42. break;
  43. }
  44. }
  45. const header: PdbHeaderData = {};
  46. const atomSite = getAtomSiteTemplate(data, atomCount);
  47. const anisotropic = getAnisotropicTemplate(data, anisotropicCount);
  48. const entityBuilder = new EntityBuilder();
  49. const helperCategories: CifCategory[] = [];
  50. const heteroNames: [string, string][] = [];
  51. let modelNum = 0, modelStr = '';
  52. let conectRange: [number, number] | undefined = undefined;
  53. const terIndices = new Set<number>();
  54. for (let i = 0, _i = lines.count; i < _i; i++) {
  55. let s = indices[2 * i], e = indices[2 * i + 1];
  56. switch (data[s]) {
  57. case 'A':
  58. if (substringStartsWith(data, s, e, 'ATOM ')) {
  59. if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
  60. addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt);
  61. } else if (substringStartsWith(data, s, e, 'ANISOU')) {
  62. addAnisotropic(anisotropic, modelStr, tokenizer, s, e);
  63. }
  64. break;
  65. case 'C':
  66. if (substringStartsWith(data, s, e, 'CRYST1')) {
  67. helperCategories.push(...parseCryst1(pdb.id || '?', data.substring(s, e)));
  68. } else if (substringStartsWith(data, s, e, 'CONECT')) {
  69. let j = i + 1;
  70. while (true) {
  71. s = indices[2 * j]; e = indices[2 * j + 1];
  72. if (!substringStartsWith(data, s, e, 'CONECT')) break;
  73. j++;
  74. }
  75. if (conectRange) {
  76. if (isDebugMode) {
  77. console.log('only single CONECT block allowed, ignoring others');
  78. }
  79. } else {
  80. conectRange = [i, j];
  81. }
  82. i = j - 1;
  83. } else if (substringStartsWith(data, s, e, 'COMPND')) {
  84. let j = i + 1;
  85. while (true) {
  86. s = indices[2 * j]; e = indices[2 * j + 1];
  87. if (!substringStartsWith(data, s, e, 'COMPND')) break;
  88. j++;
  89. }
  90. entityBuilder.setCompounds(parseCmpnd(lines, i, j));
  91. i = j - 1;
  92. }
  93. break;
  94. case 'H':
  95. if (substringStartsWith(data, s, e, 'HEADER')) {
  96. addHeader(data, s, e, header);
  97. } else if (substringStartsWith(data, s, e, 'HETATM')) {
  98. if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
  99. addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt);
  100. } else if (substringStartsWith(data, s, e, 'HELIX')) {
  101. let j = i + 1;
  102. while (true) {
  103. s = indices[2 * j]; e = indices[2 * j + 1];
  104. if (!substringStartsWith(data, s, e, 'HELIX')) break;
  105. j++;
  106. }
  107. helperCategories.push(parseHelix(lines, i, j));
  108. i = j - 1;
  109. } else if (substringStartsWith(data, s, e, 'HETNAM')) {
  110. let j = i + 1;
  111. while (true) {
  112. s = indices[2 * j]; e = indices[2 * j + 1];
  113. if (!substringStartsWith(data, s, e, 'HETNAM')) break;
  114. j++;
  115. }
  116. heteroNames.push(...Array.from(parseHetnam(lines, i, j).entries()));
  117. i = j - 1;
  118. }
  119. break;
  120. case 'M':
  121. if (substringStartsWith(data, s, e, 'MODEL ')) {
  122. modelNum++;
  123. modelStr = '' + modelNum;
  124. }
  125. if (substringStartsWith(data, s, e, 'MTRIX')) {
  126. let j = i + 1;
  127. while (true) {
  128. s = indices[2 * j]; e = indices[2 * j + 1];
  129. if (!substringStartsWith(data, s, e, 'MTRIX')) break;
  130. j++;
  131. }
  132. helperCategories.push(...parseMtrix(lines, i, j));
  133. i = j - 1;
  134. }
  135. // TODO: MODRES records => pdbx_struct_mod_residue
  136. break;
  137. case 'O':
  138. // TODO: ORIGX record => cif.database_PDB_matrix.origx, cif.database_PDB_matrix.origx_vector
  139. break;
  140. case 'R':
  141. if (substringStartsWith(data, s, e, 'REMARK 350')) {
  142. let j = i + 1;
  143. while (true) {
  144. s = indices[2 * j]; e = indices[2 * j + 1];
  145. if (!substringStartsWith(data, s, e, 'REMARK 350')) break;
  146. j++;
  147. }
  148. helperCategories.push(...parseRemark350(lines, i, j));
  149. i = j - 1;
  150. }
  151. break;
  152. case 'S':
  153. if (substringStartsWith(data, s, e, 'SHEET')) {
  154. let j = i + 1;
  155. while (true) {
  156. s = indices[2 * j]; e = indices[2 * j + 1];
  157. if (!substringStartsWith(data, s, e, 'SHEET')) break;
  158. j++;
  159. }
  160. helperCategories.push(parseSheet(lines, i, j));
  161. i = j - 1;
  162. }
  163. // TODO: SCALE record => cif.atom_sites.fract_transf_matrix, cif.atom_sites.fract_transf_vector
  164. break;
  165. case 'T':
  166. if (substringStartsWith(data, s, e, 'TER')) {
  167. terIndices.add(atomSite.index);
  168. }
  169. }
  170. }
  171. // build entry, struct_keywords and pdbx_database_status
  172. if (header.id_code) {
  173. const entry: CifCategory.SomeFields<mmCIF_Schema['entry']> = {
  174. id: CifField.ofString(header.id_code)
  175. };
  176. helperCategories.push(CifCategory.ofFields('entry', entry));
  177. }
  178. if (header.classification) {
  179. const struct_keywords: CifCategory.SomeFields<mmCIF_Schema['struct_keywords']> = {
  180. pdbx_keywords: CifField.ofString(header.classification)
  181. };
  182. helperCategories.push(CifCategory.ofFields('struct_keywords', struct_keywords));
  183. }
  184. if (header.dep_date) {
  185. const pdbx_database_status: CifCategory.SomeFields<mmCIF_Schema['pdbx_database_status']> = {
  186. recvd_initial_deposition_date: CifField.ofString(header.dep_date)
  187. };
  188. helperCategories.push(CifCategory.ofFields('pdbx_database_status', pdbx_database_status));
  189. }
  190. // build entity and chem_comp categories
  191. const seqIds = Column.ofIntTokens(atomSite.auth_seq_id);
  192. const atomIds = Column.ofStringTokens(atomSite.auth_atom_id);
  193. const compIds = Column.ofStringTokens(atomSite.auth_comp_id);
  194. const asymIds = Column.ofStringTokens(atomSite.auth_asym_id);
  195. const componentBuilder = new ComponentBuilder(seqIds, atomIds);
  196. componentBuilder.setNames(heteroNames);
  197. entityBuilder.setNames(heteroNames);
  198. for (let i = 0, il = compIds.rowCount; i < il; ++i) {
  199. const compId = compIds.value(i);
  200. const moleculeType = getMoleculeType(componentBuilder.add(compId, i).type, compId);
  201. atomSite.label_entity_id[i] = entityBuilder.getEntityId(compId, moleculeType, asymIds.value(i));
  202. }
  203. const atom_site = getAtomSite(atomSite, terIndices);
  204. if (!isPdbqt) delete atom_site.partial_charge;
  205. if (conectRange) {
  206. helperCategories.push(parseConect(lines, conectRange[0], conectRange[1], atom_site));
  207. }
  208. const categories = {
  209. entity: CifCategory.ofTable('entity', entityBuilder.getEntityTable()),
  210. chem_comp: CifCategory.ofTable('chem_comp', componentBuilder.getChemCompTable()),
  211. atom_site: CifCategory.ofFields('atom_site', atom_site),
  212. atom_site_anisotrop: CifCategory.ofFields('atom_site_anisotrop', getAnisotropic(anisotropic))
  213. } as any;
  214. for (const c of helperCategories) {
  215. categories[c.name] = c;
  216. }
  217. return {
  218. header: pdb.id || 'PDB',
  219. categoryNames: Object.keys(categories),
  220. categories
  221. };
  222. }