structure.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import * as B from 'benchmark';
  7. import * as util from 'util';
  8. import * as fs from 'fs';
  9. import fetch from 'node-fetch';
  10. import { CIF } from '../mol-io/reader/cif';
  11. import { Structure, Model, Queries as Q, StructureElement, StructureSelection, StructureSymmetry, StructureQuery, StructureProperties as SP } from '../mol-model/structure';
  12. // import { Segmentation, OrderedSet } from '../mol-data/int'
  13. import { to_mmCIF } from '../mol-model/structure/export/mmcif';
  14. import { Vec3 } from '../mol-math/linear-algebra';
  15. import { trajectoryFromMmCIF, MmcifFormat } from '../mol-model-formats/structure/mmcif';
  16. // import { printUnits } from '../apps/structure-info/model';
  17. // import { EquivalenceClasses } from '../mol-data/util';
  18. require('util.promisify').shim();
  19. const readFileAsync = util.promisify(fs.readFile);
  20. const writeFileAsync = util.promisify(fs.writeFile);
  21. async function readData(path: string) {
  22. if (path.match(/\.bcif$/)) {
  23. const input = await readFileAsync(path);
  24. const data = new Uint8Array(input.byteLength);
  25. for (let i = 0; i < input.byteLength; i++) data[i] = input[i];
  26. return data;
  27. } else {
  28. return readFileAsync(path, 'utf8');
  29. }
  30. }
  31. // (Symbol as any).asyncIterator = (Symbol as any).asyncIterator || Symbol.for('Symbol.asyncIterator');
  32. // interface ProgressGenerator<T> extends AsyncIterableIterator<number | T> {
  33. // next(cont?: boolean): Promise<IteratorResult<number | T>>
  34. // }
  35. // async function *test(): ProgressGenerator<boolean> {
  36. // const r = yield await new Promise<number>(res => res(10));
  37. // return r;
  38. // }
  39. // async function runIt(itP: () => ProgressGenerator<boolean>) {
  40. // const it = itP();
  41. // while (true) {
  42. // const { value, done } = await it.next(true);
  43. // if (done) return value;
  44. // }
  45. // }
  46. // runIt(test).then(r => console.log('rerdasdasda', r))
  47. export async function readCIF(path: string) {
  48. console.time('readData');
  49. const input = await readData(path);
  50. console.timeEnd('readData');
  51. console.time('parse');
  52. const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
  53. const parsed = await comp.run();
  54. console.timeEnd('parse');
  55. if (parsed.isError) {
  56. throw parsed;
  57. }
  58. const data = parsed.result.blocks[0];
  59. console.time('buildModels');
  60. const models = await trajectoryFromMmCIF(data).run();
  61. console.timeEnd('buildModels');
  62. const structures = [Structure.ofModel(models.representative)];
  63. return { mmcif: models.representative.sourceData.data, models, structures };
  64. }
  65. const DATA_DIR = './build/data';
  66. if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR);
  67. function getBcifUrl(pdbId: string) {
  68. return `http://www.ebi.ac.uk/pdbe/coordinates/${pdbId.toLowerCase()}/full?encoding=bcif`;
  69. }
  70. function getBcifPath(pdbId: string) {
  71. return `${DATA_DIR}/${pdbId.toLowerCase()}_full.bcif`;
  72. }
  73. async function ensureBcifAvailable(pdbId: string) {
  74. const bcifPath = getBcifPath(pdbId);
  75. if (!fs.existsSync(bcifPath)) {
  76. console.log(`downloading ${pdbId} bcif...`);
  77. const data = await fetch(getBcifUrl(pdbId));
  78. await writeFileAsync(bcifPath, await data.buffer());
  79. console.log(`done downloading ${pdbId} bcif`);
  80. }
  81. }
  82. export async function getBcif(pdbId: string) {
  83. await ensureBcifAvailable(pdbId);
  84. return await readCIF(getBcifPath(pdbId));
  85. }
  86. export namespace PropertyAccess {
  87. function baseline(model: Model) {
  88. if (!MmcifFormat.is(model.sourceData)) throw new Error('Model must be mmCIF');
  89. const atom_site = model.sourceData.data.db.atom_site;
  90. const id = atom_site.id.value;
  91. let s = 0;
  92. for (let i = 0, _i = atom_site._rowCount; i < _i; i++) {
  93. s += id(i);
  94. }
  95. return s;
  96. }
  97. function sumProperty(structure: Structure, p: StructureElement.Property<number>) {
  98. const l = StructureElement.Location.create(structure);
  99. let s = 0;
  100. for (const unit of structure.units) {
  101. l.unit = unit;
  102. const elements = unit.elements;
  103. for (let j = 0, _j = elements.length; j < _j; j++) {
  104. l.element = elements[j];
  105. s += p(l);
  106. }
  107. }
  108. return s;
  109. }
  110. // function sumPropertySegmented(structure: Structure, p: Element.Property<number>) {
  111. // const { elements, units } = structure;
  112. // const unitIds = ElementSet.unitIndices(elements);
  113. // const l = Element.Location();
  114. // let s = 0;
  115. // let vA = 0, cC = 0, rC = 0;
  116. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  117. // const unit = units[unitIds[i]] as Unit.Atomic;
  118. // l.unit = unit;
  119. // const set = ElementSet.groupAt(elements, i);
  120. // const chainsIt = Segmentation.transientSegments(unit.hierarchy.chainSegments, set.elements);
  121. // const residues = unit.hierarchy.residueSegments;
  122. // while (chainsIt.hasNext) {
  123. // cC++;
  124. // const chainSegment = chainsIt.move();
  125. // const residuesIt = Segmentation.transientSegments(residues, set.elements, chainSegment);
  126. // while (residuesIt.hasNext) {
  127. // rC++;
  128. // const residueSegment = residuesIt.move();
  129. // // l.element= OrdSet.getAt(set, residueSegment.start);
  130. // // console.log(unit.hierarchy.residues.auth_comp_id.value(unit.residueIndex[l.atom]), l.atom, OrdSet.getAt(set, residueSegment.end))
  131. // for (let j = residueSegment.start, _j = residueSegment.end; j < _j; j++) {
  132. // l.element= ElementGroup.getAt(set, j);
  133. // vA++;
  134. // s += p(l);
  135. // }
  136. // }
  137. // }
  138. // }
  139. // console.log('seg atom count', vA, cC, rC);
  140. // return s;
  141. // }
  142. // function sumPropertyResidue(structure: Structure, p: Element.Property<number>) {
  143. // const { atoms, units } = structure;
  144. // const unitIds = ElementSet.unitIds(atoms);
  145. // const l = Element.Location();
  146. // let s = 0;
  147. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  148. // const unit = units[unitIds[i]];
  149. // l.unit = unit;
  150. // const set = ElementSet.unitGetByIndex(atoms, i);
  151. // const residuesIt = Segmentation.transientSegments(unit.hierarchy.residueSegments, set.atoms);
  152. // while (residuesIt.hasNext) {
  153. // l.element= ElementGroup.getAt(set, residuesIt.move().start);
  154. // s += p(l);
  155. // }
  156. // }
  157. // return s;
  158. // }
  159. // function sumPropertyAtomSetIt(structure: Structure, p: Element.Property<number>) {
  160. // const { elements, units } = structure;
  161. // let s = 0;
  162. // const atomsIt = ElementSet.elements(elements);
  163. // const l = Element.Location();
  164. // while (atomsIt.hasNext) {
  165. // const a = atomsIt.move();
  166. // l.unit = units[Element.unit(a)];
  167. // l.element= Element.index(a);
  168. // s += p(l);
  169. // }
  170. // return s;
  171. // }
  172. // function sumPropertySegmentedMutable(structure: Structure, p: Property<number>) {
  173. // const { atoms, units } = structure;
  174. // const unitIds = ElementSet.unitIds(atoms);
  175. // const l = Property.createLocation();
  176. // let s = 0;
  177. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  178. // const unit = units[unitIds[i]];
  179. // l.unit = unit;
  180. // const set = ElementSet.unitGetByIndex(atoms, i);
  181. // const chainsIt = Segmentation.transientSegments(unit.hierarchy.chainSegments, set);
  182. // const residuesIt = Segmentation.transientSegments(unit.hierarchy.residueSegments, set);
  183. // while (chainsIt.hasNext) {
  184. // const chainSegment = chainsIt.move();
  185. // residuesIt.updateRange(chainSegment);
  186. // while (residuesIt.hasNext) {
  187. // const residueSegment = residuesIt.move();
  188. // for (let j = residueSegment.start, _j = residueSegment.end; j < _j; j++) {
  189. // l.element= OrdSet.getAt(set, j);
  190. // s += p(l);
  191. // }
  192. // }
  193. // }
  194. // }
  195. // return s;
  196. // }
  197. // function sumDirect(structure: Structure) {
  198. // const { atoms, units } = structure;
  199. // const unitIds = ElementSet.unitIds(atoms);
  200. // let s = 0;
  201. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  202. // const unitId = unitIds[i];
  203. // const unit = units[unitId];
  204. // const set = ElementSet.unitGetByIndex(atoms, i);
  205. // //const { residueIndex, chainIndex } = unit;
  206. // const p = unit.conformation.atomId.value;
  207. // for (let j = 0, _j = ElementGroup.size(set); j < _j; j++) {
  208. // const aI = ElementGroup.getAt(set, j);
  209. // s += p(aI);
  210. // }
  211. // }
  212. // return s;
  213. // }
  214. // function concatProperty(structure: Structure, p: Property<string>) {
  215. // const { atoms, units } = structure;
  216. // const unitIds = ElementSet.unitIds(atoms);
  217. // const l = Property.createLocation(structure);
  218. // let s = [];
  219. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  220. // const unitId = unitIds[i];
  221. // l.unit = units[unitId];
  222. // const set = ElementSet.unitGetByIndex(atoms, i);
  223. // const { residueIndex, chainIndex } = l.unit;
  224. // for (let j = 0, _j = OrdSet.size(set); j < _j; j++) {
  225. // const aI = OrdSet.getAt(set, j);
  226. // l.element= aI;
  227. // l.residueIndex = residueIndex[aI];
  228. // l.chainIndex = chainIndex[aI];
  229. // s[s.length] = p(l);
  230. // }
  231. // }
  232. // return s;
  233. // }
  234. export function write(s: Structure) {
  235. console.log(to_mmCIF('test', s));
  236. }
  237. export async function testAssembly(id: string, s: Structure) {
  238. console.time('assembly');
  239. const a = await StructureSymmetry.buildAssembly(s, '1').run();
  240. // const auth_comp_id = SP.residue.auth_comp_id;
  241. // const q1 = Query(Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'ALA' }));
  242. // const alas = await query(q1, a);
  243. console.timeEnd('assembly');
  244. fs.writeFileSync(`${DATA_DIR}/${id}_assembly.bcif`, to_mmCIF(id, a, true));
  245. // fs.writeFileSync(`${DATA_DIR}/${id}_assembly.bcif`, to_mmCIF(id, Selection.unionStructure(alas), true));
  246. console.log('exported');
  247. }
  248. export async function testSymmetry(id: string, s: Structure) {
  249. console.time('symmetry');
  250. const a = await StructureSymmetry.buildSymmetryRange(s, Vec3.create(-1, -1, -1), Vec3.create(1, 1, 1)).run();
  251. // const auth_comp_id = SP.residue.auth_comp_id;
  252. // const q1 = Query(Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'ALA' }));
  253. // const alas = await query(q1, a);
  254. console.timeEnd('symmetry');
  255. fs.writeFileSync(`${DATA_DIR}/${id}_symm.bcif`, to_mmCIF(id, a, true));
  256. // fs.writeFileSync(`${DATA_DIR}/${id}_assembly.bcif`, to_mmCIF(id, Selection.unionStructure(alas), true));
  257. console.log('exported');
  258. }
  259. export async function testIncludeSurroundings(id: string, s: Structure) {
  260. // const a = s;
  261. console.time('symmetry');
  262. const a = await StructureSymmetry.buildSymmetryRange(s, Vec3.create(-2, -2, -2), Vec3.create(2, 2, 2)).run();
  263. // console.log(printUnits(a));
  264. const auth_comp_id = SP.atom.auth_comp_id, op = SP.unit.operator_name;
  265. // const q1 = Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'REA' });
  266. const q1 = Q.modifiers.includeSurroundings(Q.generators.atoms({
  267. chainTest: l => op(l.element) === '1_555',
  268. atomTest: l => auth_comp_id(l.element) === 'REA'
  269. }), {
  270. radius: 5,
  271. wholeResidues: true
  272. });
  273. const surr = StructureSelection.unionStructure(StructureQuery.run(q1, a));
  274. console.timeEnd('symmetry');
  275. // for (const u of surr.units) {
  276. // const { atomId } = u.model.atomicConformation;
  277. // console.log(`${u.id}, ${u.conformation.operator.name}`);
  278. // for (let i = 0; i < u.elements.length; i++) {
  279. // console.log(` ${atomId.value(u.elements[i])}`);
  280. // }
  281. // }
  282. // const it = surr.elementLocations();
  283. // while (it.hasNext) {
  284. // const e = it.move();
  285. // console.log(`${SP.unit.operator_name(e)} ${SP.atom.id(e)}`);
  286. // }
  287. // fs.writeFileSync(`${DATA_DIR}/${id}_surr.bcif`, to_mmCIF(id, a, true));
  288. fs.writeFileSync(`${DATA_DIR}/${id}_surr.cif`, to_mmCIF(id, surr, false));
  289. console.log('exported');
  290. }
  291. // export async function testGrouping(structure: Structure) {
  292. // const { elements, units } = await Run(Symmetry.buildAssembly(structure, '1'));
  293. // console.log('grouping', units.length);
  294. // console.log('built asm');
  295. // const uniqueGroups = EquivalenceClasses<number, { unit: Unit, group: ElementGroup }>(
  296. // ({ unit, group }) => ElementGroup.hashCode(group),
  297. // (a, b) => a.unit.model.id === b.unit.model.id && (a.group.key === b.group.key && OrderedSet.areEqual(a.group.elements, b.group.elements))
  298. // );
  299. // for (let i = 0, _i = ElementSet.groupCount(elements); i < _i; i++) {
  300. // const group = ElementSet.groupAt(elements, i);
  301. // const unitId = ElementSet.groupUnitIndex(elements, i);
  302. // uniqueGroups.add(unitId, { unit: units[unitId], group });
  303. // }
  304. // console.log('group count', uniqueGroups.groups.length);
  305. // }
  306. function query(q: StructureQuery, s: Structure) {
  307. return StructureQuery.run(q, s);
  308. }
  309. export async function runBonds() {
  310. const { structures } = await readCIF('e:/test/quick/3j3q_full.bcif');
  311. console.time('bonds');
  312. structures[0].interUnitBonds;
  313. console.timeEnd('bonds');
  314. }
  315. export async function run() {
  316. // const { structures, models/* , mmcif */ } = await getBcif('1cbs');
  317. // const { structures, models } = await getBcif('3j3q');
  318. const { structures, models /* , mmcif */ } = await readCIF('e:/test/quick/1cbs_updated.cif');
  319. // const { structures: s1, /* , mmcif */ } = await readCIF('e:/test/quick/1tqn_updated.cif');
  320. // testGrouping(structures[0]);
  321. // console.log('------');
  322. // testGrouping(s1[0]);
  323. // const { structures, models/*, mmcif*/ } = await readCIF('e:/test/quick/5j7v_updated.cif');
  324. // console.log(mmcif.pdbx_struct_oper_list.matrix.toArray());
  325. // console.log(mmcif.pdbx_struct_oper_list.vector.toArray());
  326. // await testAssembly('1hrv', structures[0]);
  327. // await testSymmetry('1cbs', structures[0]);
  328. await testIncludeSurroundings('1cbs', structures[0]);
  329. // throw '';
  330. // console.log(models[0].symmetry.assemblies);
  331. // const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');
  332. // fs.writeFileSync('e:/test/molstar/3j3q.bcif', to_mmCIF('test', structures[0], true));
  333. // return;
  334. // console.log(toMmCIFString('test', structures[0]));
  335. // return;
  336. console.log('bs', baseline(models.representative));
  337. console.log('sp', sumProperty(structures[0], l => l.unit.model.atomicConformation.atomId.value(l.element)));
  338. // console.log(sumPropertySegmented(structures[0], l => l.unit.model.atomSiteConformation.atomId.value(l.element)));
  339. // console.log(sumPropertySegmentedMutable(structures[0], l => l.unit.model.conformation.atomId.value(l.element));
  340. // console.log(sumPropertyAtomSetIt(structures[0], l => l.unit.model.atomSiteConformation.atomId.value(l.element)));
  341. // console.log(sumProperty(structures[0], Property.cachedAtomColumn(m => m.conformation.atomId)));
  342. // console.log(sumDirect(structures[0]));
  343. // console.log('r', sumPropertyResidue(structures[0], l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom])));
  344. console.time('atom.x');
  345. console.log('atom.x', sumProperty(structures[0], SP.atom.x));
  346. console.timeEnd('atom.x');
  347. console.time('__x');
  348. // console.log('__x', sumProperty(structures[0], l => l.unit.conformation.x[l.atom]));
  349. console.timeEnd('__x');
  350. // const authSeqId = Element.property(l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom]));
  351. // const auth_seq_id = SP.residue.auth_seq_id;
  352. const auth_comp_id = SP.atom.auth_comp_id;
  353. // const auth_asym_id = SP.chain.auth_asym_id;
  354. // const set = new Set(['A', 'B', 'C', 'D']);
  355. // const q = Q.generators.atomGroups({ atomTest: l => auth_seq_id(l) < 3 });
  356. const q = Q.generators.atoms({ atomTest: Q.pred.eq(l => SP.atom.auth_comp_id(l.element), 'ALA') });
  357. const P = SP;
  358. // const q0 = Q.generators.atoms({ atomTest: l => auth_comp_id(l) === 'ALA' });
  359. const q1 = (Q.generators.atoms({ atomTest: l => auth_comp_id(l.element) === 'ALA' }));
  360. const q2 = (Q.generators.atoms({ atomTest: l => auth_comp_id(l.element) === 'ALA', groupBy: l => SP.residue.key(l.element) }));
  361. const q3 = (Q.generators.atoms({
  362. chainTest: Q.pred.inSet(l => P.chain.auth_asym_id(l.element), ['A', 'B', 'C', 'D']),
  363. atomTest: Q.pred.eq(l => P.atom.auth_comp_id(l.element), 'ALA')
  364. }));
  365. await query(q, structures[0]);
  366. // console.log(to_mmCIF('test', Selection.union(q0r)));
  367. console.time('q1');
  368. query(q1, structures[0]);
  369. console.timeEnd('q1');
  370. console.time('q1');
  371. query(q1, structures[0]);
  372. console.timeEnd('q1');
  373. console.time('q2');
  374. const q2r = query(q2, structures[0]);
  375. console.timeEnd('q2');
  376. console.log(StructureSelection.structureCount(q2r));
  377. // console.log(q1(structures[0]));
  378. const col = models.representative.atomicConformation.atomId.value;
  379. const suite = new B.Suite();
  380. suite
  381. // .add('test q', () => q1(structures[0]))
  382. // .add('test q', () => q(structures[0]))
  383. .add('test int', () => sumProperty(structures[0], l => col(l.element)))
  384. .add('test q1', async () => query(q1, structures[0]))
  385. .add('test q3', async () => query(q3, structures[0]))
  386. // .add('sum residue', () => sumPropertyResidue(structures[0], l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom])))
  387. // .add('baseline', () => baseline(models[0]))
  388. // .add('direct', () => sumDirect(structures[0]))
  389. // .add('normal int', () => sumProperty(structures[0], l => l.unit.model.conformation.atomId.value(l.element))
  390. // .add('atom set it int', () => sumPropertyAtomSetIt(structures[0], l => l.unit.conformation.atomId.value(l.element))
  391. // .add('segmented faster int', () => sumPropertySegmented(structures[0], l => l.unit.conformation.atomId.value(l.element))
  392. // .add('faster int', () => sumProperty(structures[0], l => l.unit.conformation.atomId.value(l.element))
  393. // .add('segmented faster _x', () => sumPropertySegmented(structures[0], l => l.unit.conformation.__x[l.atom]))
  394. // .add('faster _x', () => sumProperty(structures[0], l => l.unit.conformation.__x[l.atom] + l.unit.conformation.__y[l.atom] + l.unit.conformation.__z[l.atom]))
  395. // .add('segmented mut faster int', () => sumPropertySegmentedMutable(structures[0], l => l.unit.conformation.atomId.value(l.element))
  396. // .add('normal shortcut int', () => sumProperty(structures[0], l => l.conformation.atomId.value(l.element))
  397. // .add('cached int', () => sumProperty(structures[0], Property.cachedAtomColumn(m => m.conformation.atomId)))
  398. // .add('concat str', () => concatProperty(structures[0], l => l.unit.model.hierarchy.atoms.auth_atom_id.value(l.element))
  399. // .add('cached concat str', () => concatProperty(structures[0], Property.cachedAtomColumn(m => m.hierarchy.atoms.auth_atom_id)))
  400. .on('cycle', (e: any) => console.log(String(e.target)))
  401. .run();
  402. }
  403. }
  404. PropertyAccess.runBonds();