model.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. * @author David Sehnal <david.sehnal@gmail.com>
  6. */
  7. import * as argparse from 'argparse'
  8. require('util.promisify').shim();
  9. import { CifFrame } from 'mol-io/reader/cif'
  10. import { Model, Structure, StructureElement, Unit, StructureProperties, UnitRing } from 'mol-model/structure'
  11. // import { Run, Progress } from 'mol-task'
  12. import { OrderedSet } from 'mol-data/int';
  13. import { openCif, downloadCif } from './helpers';
  14. import { Vec3 } from 'mol-math/linear-algebra';
  15. import { trajectoryFromMmCIF } from 'mol-model-formats/structure/mmcif';
  16. async function downloadFromPdb(pdb: string) {
  17. // `https://files.rcsb.org/download/${pdb}.cif`
  18. const parsed = await downloadCif(`http://www.ebi.ac.uk/pdbe/static/entry/${pdb}_updated.cif`, false);
  19. return parsed.blocks[0];
  20. }
  21. export async function readCifFile(path: string) {
  22. const parsed = await openCif(path);
  23. return parsed.blocks[0];
  24. }
  25. export function atomLabel(model: Model, aI: number) {
  26. const { atoms, residues, chains, residueAtomSegments, chainAtomSegments } = model.atomicHierarchy
  27. const { label_atom_id } = atoms
  28. const { label_comp_id, label_seq_id } = residues
  29. const { label_asym_id } = chains
  30. const rI = residueAtomSegments.index[aI]
  31. const cI = chainAtomSegments.index[aI]
  32. return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)} ${label_atom_id.value(aI)}`
  33. }
  34. export function residueLabel(model: Model, rI: number) {
  35. const { residues, chains, residueAtomSegments, chainAtomSegments } = model.atomicHierarchy
  36. const { label_comp_id, label_seq_id } = residues
  37. const { label_asym_id } = chains
  38. const cI = chainAtomSegments.index[residueAtomSegments.offsets[rI]]
  39. return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)}`
  40. }
  41. export function printSecStructure(model: Model) {
  42. console.log('\nSecondary Structure\n=============');
  43. const { residues } = model.atomicHierarchy;
  44. const { key, elements } = model.properties.secondaryStructure;
  45. const count = residues._rowCount;
  46. let rI = 0;
  47. while (rI < count) {
  48. let start = rI;
  49. while (rI < count && key[start] === key[rI]) rI++;
  50. rI--;
  51. const e = elements[key[start]];
  52. if (e.kind !== 'none') console.log(`${e.kind}: ${residueLabel(model, start)} - ${residueLabel(model, rI)}`);
  53. rI++;
  54. }
  55. }
  56. export function printLinks(structure: Structure, showIntra: boolean, showInter: boolean) {
  57. if (showIntra) {
  58. console.log('\nIntra Unit Links\n=============');
  59. for (const unit of structure.units) {
  60. if (!Unit.isAtomic(unit)) continue;
  61. const elements = unit.elements;
  62. const { a, b, edgeCount } = unit.links;
  63. const { model } = unit;
  64. if (!edgeCount) continue;
  65. for (let bI = 0, _bI = edgeCount * 2; bI < _bI; bI++) {
  66. const x = a[bI], y = b[bI];
  67. if (x >= y) continue;
  68. console.log(`${atomLabel(model, elements[x])} -- ${atomLabel(model, elements[y])}`);
  69. }
  70. }
  71. }
  72. if (showInter) {
  73. console.log('\nInter Unit Links\n=============');
  74. const links = structure.links;
  75. for (const unit of structure.units) {
  76. if (!Unit.isAtomic(unit)) continue;
  77. for (const pairLinks of links.getLinkedUnits(unit)) {
  78. if (!pairLinks.areUnitsOrdered || pairLinks.bondCount === 0) continue;
  79. const { unitA, unitB } = pairLinks;
  80. console.log(`${pairLinks.unitA.id} - ${pairLinks.unitB.id}: ${pairLinks.bondCount} bond(s)`);
  81. for (const aI of pairLinks.linkedElementIndices) {
  82. for (const link of pairLinks.getBonds(aI)) {
  83. console.log(`${atomLabel(unitA.model, unitA.elements[aI])} -- ${atomLabel(unitB.model, unitB.elements[link.indexB])}`);
  84. }
  85. }
  86. }
  87. }
  88. }
  89. }
  90. export function printSequence(model: Model) {
  91. console.log('\nSequence\n=============');
  92. const { byEntityKey } = model.sequence;
  93. for (const key of Object.keys(byEntityKey)) {
  94. const seq = byEntityKey[+key];
  95. console.log(`${seq.entityId} (${seq.sequence.kind} ${seq.num.value(0)} (offset ${seq.sequence.offset}), ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`);
  96. console.log(`${seq.sequence.sequence}`);
  97. }
  98. console.log();
  99. }
  100. export function printModRes(model: Model) {
  101. console.log('\nModified Residues\n=============');
  102. const map = model.properties.modifiedResidues.parentId;
  103. const { label_comp_id, _rowCount } = model.atomicHierarchy.residues;
  104. for (let i = 0; i < _rowCount; i++) {
  105. const comp_id = label_comp_id.value(i);
  106. if (!map.has(comp_id)) continue;
  107. console.log(`[${i}] ${map.get(comp_id)} -> ${comp_id}`);
  108. }
  109. console.log();
  110. }
  111. export function printRings(structure: Structure) {
  112. console.log('\nRings\n=============');
  113. for (const unit of structure.units) {
  114. if (!Unit.isAtomic(unit)) continue;
  115. const { all, byFingerprint } = unit.rings;
  116. const fps: string[] = [];
  117. for (let i = 0, _i = Math.min(5, all.length); i < _i; i++) {
  118. fps[fps.length] = UnitRing.fingerprint(unit, all[i]);
  119. }
  120. if (all.length > 5) fps.push('...')
  121. console.log(`Unit ${unit.id}, ${all.length} ring(s), ${byFingerprint.size} different fingerprint(s).\n ${fps.join(', ')}`);
  122. }
  123. console.log();
  124. }
  125. export function printUnits(structure: Structure) {
  126. console.log('\nUnits\n=============');
  127. const l = StructureElement.create();
  128. for (const unit of structure.units) {
  129. l.unit = unit;
  130. const elements = unit.elements;
  131. const size = OrderedSet.size(elements);
  132. if (Unit.isAtomic(l.unit)) {
  133. console.log(`Atomic unit ${unit.id} ${unit.conformation.operator.name}: ${size} elements`);
  134. } else if (Unit.isCoarse(l.unit)) {
  135. console.log(`Coarse unit ${unit.id} ${unit.conformation.operator.name} (${Unit.isSpheres(l.unit) ? 'spheres' : 'gaussians'}): ${size} elements.`);
  136. const props = StructureProperties.coarse;
  137. const seq = l.unit.model.sequence;
  138. for (let j = 0, _j = Math.min(size, 3); j < _j; j++) {
  139. l.element = OrderedSet.getAt(elements, j);
  140. const residues: string[] = [];
  141. const start = props.seq_id_begin(l), end = props.seq_id_end(l);
  142. const compId = seq.byEntityKey[props.entityKey(l)].compId.value;
  143. for (let e = start; e <= end; e++) residues.push(compId(e));
  144. console.log(`${props.asym_id(l)}:${start}-${end} (${residues.join('-')}) ${props.asym_id(l)} [${props.x(l).toFixed(2)}, ${props.y(l).toFixed(2)}, ${props.z(l).toFixed(2)}]`);
  145. }
  146. if (size > 3) console.log(`...`);
  147. }
  148. }
  149. }
  150. export function printSymmetryInfo(model: Model) {
  151. console.log('\nSymmetry Info\n=============');
  152. const { symmetry } = model;
  153. const { size, anglesInRadians } = symmetry.spacegroup.cell;
  154. console.log(`Spacegroup: ${symmetry.spacegroup.name} size: ${Vec3.toString(size)} angles: ${Vec3.toString(anglesInRadians)}`);
  155. console.log(`Assembly names: ${symmetry.assemblies.map(a => a.id).join(', ')}`);
  156. // NCS example: 1auy
  157. console.log(`NCS operators: ${symmetry.ncsOperators && symmetry.ncsOperators.map(a => a.name).join(', ')}`);
  158. }
  159. export function printModelStats(models: ReadonlyArray<Model>) {
  160. console.log('\nModels\n=============');
  161. for (const m of models) {
  162. if (m.coarseHierarchy.isDefined) {
  163. console.log(`${m.label} ${m.modelNum}: ${m.atomicHierarchy.atoms._rowCount} atom(s), ${m.coarseHierarchy.spheres.count} sphere(s), ${m.coarseHierarchy.gaussians.count} gaussian(s)`);
  164. } else {
  165. console.log(`${m.label} ${m.modelNum}: ${m.atomicHierarchy.atoms._rowCount} atom(s)`);
  166. }
  167. }
  168. console.log();
  169. }
  170. export async function getModelsAndStructure(frame: CifFrame) {
  171. const models = await trajectoryFromMmCIF(frame).run();
  172. const structure = Structure.ofModel(models[0]);
  173. return { models, structure };
  174. }
  175. async function run(frame: CifFrame, args: Args) {
  176. const { models, structure } = await getModelsAndStructure(frame);
  177. if (args.models) printModelStats(models);
  178. if (args.seq) printSequence(models[0]);
  179. if (args.units) printUnits(structure);
  180. if (args.sym) printSymmetryInfo(models[0]);
  181. if (args.rings) printRings(structure);
  182. if (args.intraLinks) printLinks(structure, true, false);
  183. if (args.interLinks) printLinks(structure, false, true);
  184. if (args.mod) printModRes(models[0]);
  185. if (args.sec) printSecStructure(models[0]);
  186. }
  187. async function runDL(pdb: string, args: Args) {
  188. const mmcif = await downloadFromPdb(pdb)
  189. run(mmcif, args);
  190. }
  191. async function runFile(filename: string, args: Args) {
  192. const mmcif = await readCifFile(filename);
  193. run(mmcif, args);
  194. }
  195. const parser = new argparse.ArgumentParser({
  196. addHelp: true,
  197. description: 'Print info about a structure, mainly to test and showcase the mol-model module'
  198. });
  199. parser.addArgument(['--download', '-d'], { help: 'Pdb entry id' });
  200. parser.addArgument(['--file', '-f'], { help: 'filename' });
  201. parser.addArgument(['--models'], { help: 'print models info', action: 'storeTrue' });
  202. parser.addArgument(['--seq'], { help: 'print sequence', action: 'storeTrue' });
  203. parser.addArgument(['--units'], { help: 'print units', action: 'storeTrue' });
  204. parser.addArgument(['--sym'], { help: 'print symmetry', action: 'storeTrue' });
  205. parser.addArgument(['--rings'], { help: 'print rings', action: 'storeTrue' });
  206. parser.addArgument(['--intraLinks'], { help: 'print intra unit links', action: 'storeTrue' });
  207. parser.addArgument(['--interLinks'], { help: 'print inter unit links', action: 'storeTrue' });
  208. parser.addArgument(['--mod'], { help: 'print modified residues', action: 'storeTrue' });
  209. parser.addArgument(['--sec'], { help: 'print secoundary structure', action: 'storeTrue' });
  210. interface Args {
  211. download?: string,
  212. file?: string,
  213. models?: boolean,
  214. seq?: boolean,
  215. ihm?: boolean,
  216. units?: boolean,
  217. sym?: boolean,
  218. rings?: boolean,
  219. intraLinks?: boolean,
  220. interLinks?: boolean,
  221. mod?: boolean,
  222. sec?: boolean,
  223. }
  224. const args: Args = parser.parseArgs();
  225. if (args.download) runDL(args.download, args)
  226. else if (args.file) runFile(args.file, args)