structure.ts 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import * as B from 'benchmark'
  7. import * as util from 'util'
  8. import * as fs from 'fs'
  9. import fetch from 'node-fetch'
  10. import CIF from 'mol-io/reader/cif'
  11. import { Structure, Model, Queries as Q, Element, Selection, StructureSymmetry, Query } from 'mol-model/structure'
  12. //import { Segmentation, OrderedSet } from 'mol-data/int'
  13. import to_mmCIF from 'mol-model/structure/export/mmcif'
  14. import { Run } from 'mol-task';
  15. //import { EquivalenceClasses } from 'mol-data/util';
  16. require('util.promisify').shim();
  17. const readFileAsync = util.promisify(fs.readFile);
  18. const writeFileAsync = util.promisify(fs.writeFile);
  19. async function readData(path: string) {
  20. if (path.match(/\.bcif$/)) {
  21. const input = await readFileAsync(path)
  22. const data = new Uint8Array(input.byteLength);
  23. for (let i = 0; i < input.byteLength; i++) data[i] = input[i];
  24. return data;
  25. } else {
  26. return readFileAsync(path, 'utf8');
  27. }
  28. }
  29. (Symbol as any).asyncIterator = (Symbol as any).asyncIterator || Symbol.for('Symbol.asyncIterator');
  30. interface ProgressGenerator<T> extends AsyncIterableIterator<number | T> {
  31. next(cont?: boolean): Promise<IteratorResult<number | T>>
  32. }
  33. async function *test(): ProgressGenerator<boolean> {
  34. const r = yield await new Promise<number>(res => res(10));
  35. return r;
  36. }
  37. async function runIt(itP: () => ProgressGenerator<boolean>) {
  38. const it = itP();
  39. while (true) {
  40. const { value, done } = await it.next(true);
  41. if (done) return value;
  42. }
  43. }
  44. runIt(test).then(r => console.log('rerdasdasda', r))
  45. export async function readCIF(path: string) {
  46. console.time('readData');
  47. const input = await readData(path)
  48. console.timeEnd('readData');
  49. console.time('parse');
  50. const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
  51. const parsed = await Run(comp);
  52. console.timeEnd('parse');
  53. if (parsed.isError) {
  54. throw parsed;
  55. }
  56. const data = parsed.result.blocks[0];
  57. console.time('schema')
  58. const mmcif = CIF.schema.mmCIF(data);
  59. console.timeEnd('schema')
  60. console.time('buildModels')
  61. const models = Model.create({ kind: 'mmCIF', data: mmcif });
  62. console.timeEnd('buildModels')
  63. const structures = models.map(Structure.ofModel);
  64. return { mmcif, models, structures };
  65. }
  66. const DATA_DIR = './build/data';
  67. if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR);
  68. function getBcifUrl(pdbId: string) {
  69. return `http://www.ebi.ac.uk/pdbe/coordinates/${pdbId.toLowerCase()}/full?encoding=bcif`
  70. }
  71. function getBcifPath(pdbId: string) {
  72. return `${DATA_DIR}/${pdbId.toLowerCase()}_full.bcif`
  73. }
  74. async function ensureBcifAvailable(pdbId: string) {
  75. const bcifPath = getBcifPath(pdbId);
  76. if (!fs.existsSync(bcifPath)) {
  77. console.log(`downloading ${pdbId} bcif...`)
  78. const data = await fetch(getBcifUrl(pdbId))
  79. await writeFileAsync(bcifPath, await data.buffer())
  80. console.log(`done downloading ${pdbId} bcif`)
  81. }
  82. }
  83. export async function getBcif(pdbId: string) {
  84. await ensureBcifAvailable(pdbId);
  85. return await readCIF(getBcifPath(pdbId));
  86. }
  87. export namespace PropertyAccess {
  88. function baseline(model: Model) {
  89. if (model.sourceData.kind !== 'mmCIF') throw new Error('Model must be mmCIF');
  90. const atom_site = model.sourceData.data.atom_site;
  91. const id = atom_site.id.value;
  92. let s = 0;
  93. for (let i = 0, _i = atom_site._rowCount; i < _i; i++) {
  94. s += id(i);
  95. }
  96. return s;
  97. }
  98. function sumProperty(structure: Structure, p: Element.Property<number>) {
  99. const l = Element.Location();
  100. let s = 0;
  101. for (const unit of structure.units) {
  102. l.unit = unit;
  103. const elements = unit.elements;
  104. for (let j = 0, _j = elements.length; j < _j; j++) {
  105. l.element = elements[j];
  106. s += p(l);
  107. }
  108. }
  109. return s;
  110. }
  111. // function sumPropertySegmented(structure: Structure, p: Element.Property<number>) {
  112. // const { elements, units } = structure;
  113. // const unitIds = ElementSet.unitIndices(elements);
  114. // const l = Element.Location();
  115. // let s = 0;
  116. // let vA = 0, cC = 0, rC = 0;
  117. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  118. // const unit = units[unitIds[i]] as Unit.Atomic;
  119. // l.unit = unit;
  120. // const set = ElementSet.groupAt(elements, i);
  121. // const chainsIt = Segmentation.transientSegments(unit.hierarchy.chainSegments, set.elements);
  122. // const residues = unit.hierarchy.residueSegments;
  123. // while (chainsIt.hasNext) {
  124. // cC++;
  125. // const chainSegment = chainsIt.move();
  126. // const residuesIt = Segmentation.transientSegments(residues, set.elements, chainSegment);
  127. // while (residuesIt.hasNext) {
  128. // rC++;
  129. // const residueSegment = residuesIt.move();
  130. // // l.element= OrdSet.getAt(set, residueSegment.start);
  131. // // console.log(unit.hierarchy.residues.auth_comp_id.value(unit.residueIndex[l.atom]), l.atom, OrdSet.getAt(set, residueSegment.end))
  132. // for (let j = residueSegment.start, _j = residueSegment.end; j < _j; j++) {
  133. // l.element= ElementGroup.getAt(set, j);
  134. // vA++;
  135. // s += p(l);
  136. // }
  137. // }
  138. // }
  139. // }
  140. // console.log('seg atom count', vA, cC, rC);
  141. // return s;
  142. // }
  143. // function sumPropertyResidue(structure: Structure, p: Element.Property<number>) {
  144. // const { atoms, units } = structure;
  145. // const unitIds = ElementSet.unitIds(atoms);
  146. // const l = Element.Location();
  147. // let s = 0;
  148. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  149. // const unit = units[unitIds[i]];
  150. // l.unit = unit;
  151. // const set = ElementSet.unitGetByIndex(atoms, i);
  152. // const residuesIt = Segmentation.transientSegments(unit.hierarchy.residueSegments, set.atoms);
  153. // while (residuesIt.hasNext) {
  154. // l.element= ElementGroup.getAt(set, residuesIt.move().start);
  155. // s += p(l);
  156. // }
  157. // }
  158. // return s;
  159. // }
  160. // function sumPropertyAtomSetIt(structure: Structure, p: Element.Property<number>) {
  161. // const { elements, units } = structure;
  162. // let s = 0;
  163. // const atomsIt = ElementSet.elements(elements);
  164. // const l = Element.Location();
  165. // while (atomsIt.hasNext) {
  166. // const a = atomsIt.move();
  167. // l.unit = units[Element.unit(a)];
  168. // l.element= Element.index(a);
  169. // s += p(l);
  170. // }
  171. // return s;
  172. // }
  173. // function sumPropertySegmentedMutable(structure: Structure, p: Property<number>) {
  174. // const { atoms, units } = structure;
  175. // const unitIds = ElementSet.unitIds(atoms);
  176. // const l = Property.createLocation();
  177. // let s = 0;
  178. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  179. // const unit = units[unitIds[i]];
  180. // l.unit = unit;
  181. // const set = ElementSet.unitGetByIndex(atoms, i);
  182. // const chainsIt = Segmentation.transientSegments(unit.hierarchy.chainSegments, set);
  183. // const residuesIt = Segmentation.transientSegments(unit.hierarchy.residueSegments, set);
  184. // while (chainsIt.hasNext) {
  185. // const chainSegment = chainsIt.move();
  186. // residuesIt.updateRange(chainSegment);
  187. // while (residuesIt.hasNext) {
  188. // const residueSegment = residuesIt.move();
  189. // for (let j = residueSegment.start, _j = residueSegment.end; j < _j; j++) {
  190. // l.element= OrdSet.getAt(set, j);
  191. // s += p(l);
  192. // }
  193. // }
  194. // }
  195. // }
  196. // return s;
  197. // }
  198. // function sumDirect(structure: Structure) {
  199. // const { atoms, units } = structure;
  200. // const unitIds = ElementSet.unitIds(atoms);
  201. // let s = 0;
  202. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  203. // const unitId = unitIds[i];
  204. // const unit = units[unitId];
  205. // const set = ElementSet.unitGetByIndex(atoms, i);
  206. // //const { residueIndex, chainIndex } = unit;
  207. // const p = unit.conformation.atomId.value;
  208. // for (let j = 0, _j = ElementGroup.size(set); j < _j; j++) {
  209. // const aI = ElementGroup.getAt(set, j);
  210. // s += p(aI);
  211. // }
  212. // }
  213. // return s;
  214. // }
  215. // function concatProperty(structure: Structure, p: Property<string>) {
  216. // const { atoms, units } = structure;
  217. // const unitIds = ElementSet.unitIds(atoms);
  218. // const l = Property.createLocation(structure);
  219. // let s = [];
  220. // for (let i = 0, _i = unitIds.length; i < _i; i++) {
  221. // const unitId = unitIds[i];
  222. // l.unit = units[unitId];
  223. // const set = ElementSet.unitGetByIndex(atoms, i);
  224. // const { residueIndex, chainIndex } = l.unit;
  225. // for (let j = 0, _j = OrdSet.size(set); j < _j; j++) {
  226. // const aI = OrdSet.getAt(set, j);
  227. // l.element= aI;
  228. // l.residueIndex = residueIndex[aI];
  229. // l.chainIndex = chainIndex[aI];
  230. // s[s.length] = p(l);
  231. // }
  232. // }
  233. // return s;
  234. // }
  235. export function write(s: Structure) {
  236. console.log(to_mmCIF('test', s));
  237. }
  238. export async function testAssembly(id: string, s: Structure) {
  239. console.time('assembly')
  240. const a = await Run(StructureSymmetry.buildAssembly(s, '1'));
  241. //const auth_comp_id = Q.props.residue.auth_comp_id;
  242. //const q1 = Query(Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'ALA' }));
  243. //const alas = await query(q1, a);
  244. console.timeEnd('assembly')
  245. fs.writeFileSync(`${DATA_DIR}/${id}_assembly.bcif`, to_mmCIF(id, a, true));
  246. //fs.writeFileSync(`${DATA_DIR}/${id}_assembly.bcif`, to_mmCIF(id, Selection.unionStructure(alas), true));
  247. console.log('exported');
  248. }
  249. // export async function testGrouping(structure: Structure) {
  250. // const { elements, units } = await Run(Symmetry.buildAssembly(structure, '1'));
  251. // console.log('grouping', units.length);
  252. // console.log('built asm');
  253. // const uniqueGroups = EquivalenceClasses<number, { unit: Unit, group: ElementGroup }>(
  254. // ({ unit, group }) => ElementGroup.hashCode(group),
  255. // (a, b) => a.unit.model.id === b.unit.model.id && (a.group.key === b.group.key && OrderedSet.areEqual(a.group.elements, b.group.elements))
  256. // );
  257. // for (let i = 0, _i = ElementSet.groupCount(elements); i < _i; i++) {
  258. // const group = ElementSet.groupAt(elements, i);
  259. // const unitId = ElementSet.groupUnitIndex(elements, i);
  260. // uniqueGroups.add(unitId, { unit: units[unitId], group });
  261. // }
  262. // console.log('group count', uniqueGroups.groups.length);
  263. // }
  264. function query(q: Query, s: Structure) {
  265. return Run((q(s)));
  266. }
  267. export async function run() {
  268. //const { structures, models/*, mmcif*/ } = await getBcif('1cbs');
  269. // const { structures, models } = await getBcif('3j3q');
  270. const { structures, models /*, mmcif*/ } = await readCIF('e:/test/quick/1hrv_updated.cif');
  271. //const { structures: s1, /*, mmcif*/ } = await readCIF('e:/test/quick/1tqn_updated.cif');
  272. // testGrouping(structures[0]);
  273. // console.log('------');
  274. // testGrouping(s1[0]);
  275. //const { structures, models/*, mmcif*/ } = await readCIF('e:/test/quick/5j7v_updated.cif');
  276. //console.log(mmcif.pdbx_struct_oper_list.matrix.toArray());
  277. // console.log(mmcif.pdbx_struct_oper_list.vector.toArray());
  278. await testAssembly('1hrv', structures[0]);
  279. // throw '';
  280. // console.log(models[0].symmetry.assemblies);
  281. //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');
  282. // fs.writeFileSync('e:/test/molstar/3j3q.bcif', to_mmCIF('test', structures[0], true));
  283. // return;
  284. // console.log(toMmCIFString('test', structures[0]));
  285. // return;
  286. console.log('bs', baseline(models[0]));
  287. console.log('sp', sumProperty(structures[0], l => l.unit.model.atomSiteConformation.atomId.value(l.element)));
  288. //console.log(sumPropertySegmented(structures[0], l => l.unit.model.atomSiteConformation.atomId.value(l.element)));
  289. //console.log(sumPropertySegmentedMutable(structures[0], l => l.unit.model.conformation.atomId.value(l.element));
  290. //console.log(sumPropertyAtomSetIt(structures[0], l => l.unit.model.atomSiteConformation.atomId.value(l.element)));
  291. //console.log(sumProperty(structures[0], Property.cachedAtomColumn(m => m.conformation.atomId)));
  292. //console.log(sumDirect(structures[0]));
  293. //console.log('r', sumPropertyResidue(structures[0], l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom])));
  294. console.time('atom.x');
  295. console.log('atom.x', sumProperty(structures[0], Q.props.atom.x));
  296. console.timeEnd('atom.x');
  297. console.time('__x')
  298. //console.log('__x', sumProperty(structures[0], l => l.unit.conformation.x[l.atom]));
  299. console.timeEnd('__x')
  300. //const authSeqId = Element.property(l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom]));
  301. //const auth_seq_id = Q.props.residue.auth_seq_id;
  302. const auth_comp_id = Q.props.residue.auth_comp_id;
  303. //const auth_asym_id = Q.props.chain.auth_asym_id;
  304. //const set = new Set(['A', 'B', 'C', 'D']);
  305. //const q = Q.generators.atomGroups({ atomTest: l => auth_seq_id(l) < 3 });
  306. const q = Query(Q.generators.atoms({ atomTest: Q.pred.eq(Q.props.residue.auth_comp_id, 'ALA') }));
  307. const P = Q.props
  308. //const q0 = Q.generators.atoms({ atomTest: l => auth_comp_id(l) === 'ALA' });
  309. const q1 = Query(Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'ALA' }));
  310. const q2 = Query(Q.generators.atoms({ residueTest: l => auth_comp_id(l) === 'ALA', groupBy: Q.props.residue.key }));
  311. const q3 = Query(Q.generators.atoms({
  312. chainTest: Q.pred.inSet(P.chain.auth_asym_id, ['A', 'B', 'C', 'D']),
  313. residueTest: Q.pred.eq(P.residue.auth_comp_id, 'ALA')
  314. }));
  315. await query(q, structures[0]);
  316. //console.log(to_mmCIF('test', Selection.union(q0r)));
  317. console.time('q1')
  318. await query(q1, structures[0]);
  319. console.timeEnd('q1')
  320. console.time('q1')
  321. await query(q1, structures[0]);
  322. console.timeEnd('q1')
  323. console.time('q2')
  324. const q2r = await query(q2, structures[0]);
  325. console.timeEnd('q2')
  326. console.log(Selection.structureCount(q2r));
  327. //console.log(q1(structures[0]));
  328. const col = models[0].atomSiteConformation.atomId.value;
  329. const suite = new B.Suite();
  330. suite
  331. //.add('test q', () => q1(structures[0]))
  332. //.add('test q', () => q(structures[0]))
  333. .add('test int', () => sumProperty(structures[0], l => col(l.element)))
  334. .add('test q1', async () => await query(q1, structures[0]))
  335. .add('test q3', async () => await query(q3, structures[0]))
  336. // .add('sum residue', () => sumPropertyResidue(structures[0], l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom])))
  337. // .add('baseline', () => baseline(models[0]))
  338. // .add('direct', () => sumDirect(structures[0]))
  339. //.add('normal int', () => sumProperty(structures[0], l => l.unit.model.conformation.atomId.value(l.element))
  340. //.add('atom set it int', () => sumPropertyAtomSetIt(structures[0], l => l.unit.conformation.atomId.value(l.element))
  341. // .add('segmented faster int', () => sumPropertySegmented(structures[0], l => l.unit.conformation.atomId.value(l.element))
  342. // .add('faster int', () => sumProperty(structures[0], l => l.unit.conformation.atomId.value(l.element))
  343. //.add('segmented faster _x', () => sumPropertySegmented(structures[0], l => l.unit.conformation.__x[l.atom]))
  344. //.add('faster _x', () => sumProperty(structures[0], l => l.unit.conformation.__x[l.atom] + l.unit.conformation.__y[l.atom] + l.unit.conformation.__z[l.atom]))
  345. //.add('segmented mut faster int', () => sumPropertySegmentedMutable(structures[0], l => l.unit.conformation.atomId.value(l.element))
  346. //.add('normal shortcut int', () => sumProperty(structures[0], l => l.conformation.atomId.value(l.element))
  347. //.add('cached int', () => sumProperty(structures[0], Property.cachedAtomColumn(m => m.conformation.atomId)))
  348. //.add('concat str', () => concatProperty(structures[0], l => l.unit.model.hierarchy.atoms.auth_atom_id.value(l.element))
  349. //.add('cached concat str', () => concatProperty(structures[0], Property.cachedAtomColumn(m => m.hierarchy.atoms.auth_atom_id)))
  350. .on('cycle', (e: any) => console.log(String(e.target)))
  351. .run();
  352. }
  353. }
  354. PropertyAccess.run();