secondary-structure.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /**
  2. * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { CifCategory, CifField } from '../../../mol-io/reader/cif';
  7. import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
  8. import { Tokens } from '../../../mol-io/reader/common/text/tokenizer';
  9. import { Column } from '../../../mol-data/db';
  10. const HelixTypes: {[k: string]: mmCIF_Schema['struct_conf']['conf_type_id']['T']} = {
  11. // CLASS NUMBER
  12. // TYPE OF HELIX (COLUMNS 39 - 40)
  13. // --------------------------------------------------------------
  14. // Right-handed alpha (default) 1
  15. // Right-handed omega 2
  16. // Right-handed pi 3
  17. // Right-handed gamma 4
  18. // Right-handed 3 - 10 5
  19. // Left-handed alpha 6
  20. // Left-handed omega 7
  21. // Left-handed gamma 8
  22. // 2 - 7 ribbon/helix 9
  23. // Polyproline 10
  24. 1: 'helx_rh_al_p',
  25. 2: 'helx_rh_om_p',
  26. 3: 'helx_rh_pi_p',
  27. 4: 'helx_rh_ga_p',
  28. 5: 'helx_rh_3t_p',
  29. 6: 'helx_lh_al_p',
  30. 7: 'helx_lh_om_p',
  31. 8: 'helx_lh_ga_p',
  32. 9: 'helx_rh_27_p', // TODO or left-handed???
  33. 10: 'helx_rh_pp_p', // TODO or left-handed???
  34. };
  35. function getStructConfTypeId(type: string): mmCIF_Schema['struct_conf']['conf_type_id']['T'] {
  36. return HelixTypes[type] || 'helx_p';
  37. }
  38. interface PdbHelix {
  39. serNum: string,
  40. helixID: string,
  41. initResName: string,
  42. initChainID: string,
  43. initSeqNum: string,
  44. initICode: string,
  45. endResName: string,
  46. endChainID: string,
  47. endSeqNum: string,
  48. endICode: string,
  49. helixClass: string,
  50. comment: string,
  51. length: string
  52. }
  53. export function parseHelix(lines: Tokens, lineStart: number, lineEnd: number): CifCategory {
  54. const helices: PdbHelix[] = [];
  55. const getLine = (n: number) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]);
  56. for (let i = lineStart; i < lineEnd; i++) {
  57. const line = getLine(i);
  58. // COLUMNS DATA TYPE FIELD DEFINITION
  59. // -----------------------------------------------------------------------------------
  60. // 1 - 6 Record name "HELIX "
  61. // 8 - 10 Integer serNum Serial number of the helix. This starts
  62. // at 1 and increases incrementally.
  63. // 12 - 14 LString(3) helixID Helix identifier. In addition to a serial
  64. // number, each helix is given an
  65. // alphanumeric character helix identifier.
  66. // 16 - 18 Residue name initResName Name of the initial residue.
  67. // 20 Character initChainID Chain identifier for the chain containing
  68. // this helix.
  69. // 22 - 25 Integer initSeqNum Sequence number of the initial residue.
  70. // 26 AChar initICode Insertion code of the initial residue.
  71. // 28 - 30 Residue name endResName Name of the terminal residue of the helix.
  72. // 32 Character endChainID Chain identifier for the chain containing
  73. // this helix.
  74. // 34 - 37 Integer endSeqNum Sequence number of the terminal residue.
  75. // 38 AChar endICode Insertion code of the terminal residue.
  76. // 39 - 40 Integer helixClass Helix class (see below).
  77. // 41 - 70 String comment Comment about this helix.
  78. // 72 - 76 Integer length Length of this helix.
  79. helices.push({
  80. serNum: line.substr(7, 3).trim(),
  81. helixID: line.substr(11, 3).trim(),
  82. initResName: line.substr(15, 3).trim(),
  83. initChainID: line.substr(19, 1).trim(),
  84. initSeqNum: line.substr(21, 4).trim(),
  85. initICode: line.substr(25, 1).trim(),
  86. endResName: line.substr(27, 3).trim(),
  87. endChainID: line.substr(31, 3).trim(),
  88. endSeqNum: line.substr(33, 4).trim(),
  89. endICode: line.substr(37, 1).trim(),
  90. helixClass: line.substr(38, 2).trim(),
  91. comment: line.substr(40, 30).trim(),
  92. length: line.substr(71, 5).trim()
  93. });
  94. }
  95. const beg_auth_asym_id = CifField.ofStrings(helices.map(h => h.initChainID));
  96. const beg_auth_comp_id = CifField.ofStrings(helices.map(h => h.initResName));
  97. const end_auth_asym_id = CifField.ofStrings(helices.map(h => h.endChainID));
  98. const end_auth_comp_id = CifField.ofStrings(helices.map(h => h.endResName));
  99. const struct_conf: CifCategory.Fields<mmCIF_Schema['struct_conf']> = {
  100. beg_label_asym_id: beg_auth_asym_id,
  101. beg_label_comp_id: beg_auth_comp_id,
  102. beg_label_seq_id: CifField.ofUndefined(helices.length, Column.Schema.int),
  103. beg_auth_asym_id,
  104. beg_auth_comp_id,
  105. beg_auth_seq_id: CifField.ofStrings(helices.map(h => h.initSeqNum)),
  106. conf_type_id: CifField.ofStrings(helices.map(h => getStructConfTypeId(h.helixClass))),
  107. details: CifField.ofStrings(helices.map(h => h.comment)),
  108. end_label_asym_id: end_auth_asym_id,
  109. end_label_comp_id: end_auth_comp_id,
  110. end_label_seq_id: CifField.ofUndefined(helices.length, Column.Schema.int),
  111. end_auth_asym_id,
  112. end_auth_comp_id,
  113. end_auth_seq_id: CifField.ofStrings(helices.map(h => h.endSeqNum)),
  114. id: CifField.ofStrings(helices.map(h => h.serNum)),
  115. pdbx_beg_PDB_ins_code: CifField.ofStrings(helices.map(h => h.initICode)),
  116. pdbx_end_PDB_ins_code: CifField.ofStrings(helices.map(h => h.endICode)),
  117. pdbx_PDB_helix_class: CifField.ofStrings(helices.map(h => h.helixClass)),
  118. pdbx_PDB_helix_length: CifField.ofStrings(helices.map(h => h.length)),
  119. pdbx_PDB_helix_id: CifField.ofStrings(helices.map(h => h.helixID)),
  120. };
  121. return CifCategory.ofFields('struct_conf', struct_conf);
  122. }
  123. //
  124. interface PdbSheet {
  125. strand: string,
  126. sheetID: string,
  127. numStrands: string,
  128. initResName: string,
  129. initChainID: string,
  130. initSeqNum: string,
  131. initICode: string,
  132. endResName: string,
  133. endChainID: string,
  134. endSeqNum: string,
  135. endICode: string,
  136. sense: string,
  137. curAtom: string,
  138. curResName: string,
  139. curChainId: string,
  140. curResSeq: string,
  141. curICode: string,
  142. prevAtom: string,
  143. prevResName: string,
  144. prevChainId: string,
  145. prevResSeq: string,
  146. prevICode: string,
  147. }
  148. export function parseSheet(lines: Tokens, lineStart: number, lineEnd: number): CifCategory {
  149. const sheets: PdbSheet[] = [];
  150. const getLine = (n: number) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]);
  151. for (let i = lineStart; i < lineEnd; i++) {
  152. const line = getLine(i);
  153. // COLUMNS DATA TYPE FIELD DEFINITION
  154. // -------------------------------------------------------------------------------------
  155. // 1 - 6 Record name "SHEET "
  156. // 8 - 10 Integer strand Strand number which starts at 1 for each
  157. // strand within a sheet and increases by one.
  158. // 12 - 14 LString(3) sheetID Sheet identifier.
  159. // 15 - 16 Integer numStrands Number of strands in sheet.
  160. // 18 - 20 Residue name initResName Residue name of initial residue.
  161. // 22 Character initChainID Chain identifier of initial residue
  162. // in strand.
  163. // 23 - 26 Integer initSeqNum Sequence number of initial residue
  164. // in strand.
  165. // 27 AChar initICode Insertion code of initial residue
  166. // in strand.
  167. // 29 - 31 Residue name endResName Residue name of terminal residue.
  168. // 33 Character endChainID Chain identifier of terminal residue.
  169. // 34 - 37 Integer endSeqNum Sequence number of terminal residue.
  170. // 38 AChar endICode Insertion code of terminal residue.
  171. // 39 - 40 Integer sense Sense of strand with respect to previous
  172. // strand in the sheet. 0 if first strand,
  173. // 1 if parallel,and -1 if anti-parallel.
  174. // 42 - 45 Atom curAtom Registration. Atom name in current strand.
  175. // 46 - 48 Residue name curResName Registration. Residue name in current strand
  176. // 50 Character curChainId Registration. Chain identifier in
  177. // current strand.
  178. // 51 - 54 Integer curResSeq Registration. Residue sequence number
  179. // in current strand.
  180. // 55 AChar curICode Registration. Insertion code in
  181. // current strand.
  182. // 57 - 60 Atom prevAtom Registration. Atom name in previous strand.
  183. // 61 - 63 Residue name prevResName Registration. Residue name in
  184. // previous strand.
  185. // 65 Character prevChainId Registration. Chain identifier in
  186. // previous strand.
  187. // 66 - 69 Integer prevResSeq Registration. Residue sequence number
  188. // in previous strand.
  189. // 70 AChar prevICode Registration. Insertion code in
  190. // previous strand.
  191. sheets.push({
  192. strand: line.substr(7, 3).trim(),
  193. sheetID: line.substr(11, 3).trim(),
  194. numStrands: line.substr(14, 2).trim(),
  195. initResName: line.substr(17, 3).trim(),
  196. initChainID: line.substr(21, 1).trim(),
  197. initSeqNum: line.substr(22, 4).trim(),
  198. initICode: line.substr(26, 1).trim(),
  199. endResName: line.substr(28, 3).trim(),
  200. endChainID: line.substr(32, 1).trim(),
  201. endSeqNum: line.substr(33, 4).trim(),
  202. endICode: line.substr(37, 1).trim(),
  203. sense: line.substr(38, 2).trim(),
  204. curAtom: line.substr(41, 4).trim(),
  205. curResName: line.substr(45, 3).trim(),
  206. curChainId: line.substr(49, 1).trim(),
  207. curResSeq: line.substr(50, 4).trim(),
  208. curICode: line.substr(54, 1).trim(),
  209. prevAtom: line.substr(56, 4).trim(),
  210. prevResName: line.substr(60, 3).trim(),
  211. prevChainId: line.substr(64, 1).trim(),
  212. prevResSeq: line.substr(65, 4).trim(),
  213. prevICode: line.substr(69, 1).trim(),
  214. });
  215. }
  216. const beg_auth_asym_id = CifField.ofStrings(sheets.map(s => s.initChainID));
  217. const beg_auth_comp_id = CifField.ofStrings(sheets.map(s => s.initResName));
  218. const beg_auth_seq_id = CifField.ofStrings(sheets.map(s => s.initSeqNum));
  219. const end_auth_asym_id = CifField.ofStrings(sheets.map(s => s.endChainID));
  220. const end_auth_comp_id = CifField.ofStrings(sheets.map(s => s.endResName));
  221. const end_auth_seq_id = CifField.ofStrings(sheets.map(s => s.endSeqNum));
  222. const struct_sheet_range: CifCategory.Fields<mmCIF_Schema['struct_sheet_range']> = {
  223. beg_label_asym_id: beg_auth_asym_id,
  224. beg_label_comp_id: beg_auth_comp_id,
  225. beg_label_seq_id: beg_auth_seq_id,
  226. beg_auth_asym_id,
  227. beg_auth_comp_id,
  228. beg_auth_seq_id,
  229. end_label_asym_id: end_auth_asym_id,
  230. end_label_comp_id: end_auth_asym_id,
  231. end_label_seq_id: end_auth_seq_id,
  232. end_auth_asym_id,
  233. end_auth_comp_id,
  234. end_auth_seq_id,
  235. id: CifField.ofStrings(sheets.map(s => s.strand)),
  236. sheet_id: CifField.ofStrings(sheets.map(s => s.sheetID)), // TODO wrong, needs to point to _struct_sheet.id
  237. pdbx_beg_PDB_ins_code: CifField.ofStrings(sheets.map(s => s.initICode)),
  238. pdbx_end_PDB_ins_code: CifField.ofStrings(sheets.map(s => s.endICode)),
  239. };
  240. return CifCategory.ofFields('struct_sheet_range', struct_sheet_range);
  241. }