data-source.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /**
  2. * Copyright (c) 2018-2023 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. * @author Adam Midlik <midlik@gmail.com>
  7. *
  8. * Adapted from LiteMol
  9. */
  10. import { utf8Read } from '../mol-io/common/utf8';
  11. import { RuntimeContext, Task } from '../mol-task';
  12. import { Asset, AssetManager } from './assets';
  13. import { LazyImports } from './lazy-imports';
  14. import { File_ as File, RUNNING_IN_NODEJS, XMLHttpRequest_ as XMLHttpRequest } from './nodejs-shims';
  15. import { ungzip, unzip } from './zip/zip';
  16. const lazyImports = LazyImports.create('fs') as {
  17. 'fs': typeof import ('fs'),
  18. };
  19. export enum DataCompressionMethod {
  20. None,
  21. Gzip,
  22. Zip,
  23. }
  24. export type DataType = 'json' | 'xml' | 'string' | 'binary' | 'zip'
  25. export type DataValue = 'string' | any | XMLDocument | Uint8Array
  26. export type DataResponse<T extends DataType> =
  27. T extends 'json' ? any :
  28. T extends 'xml' ? XMLDocument :
  29. T extends 'string' ? string :
  30. T extends 'binary' ? Uint8Array :
  31. T extends 'zip' ? { [k: string]: Uint8Array } : never
  32. export interface AjaxGetParams<T extends DataType = 'string'> {
  33. url: string,
  34. type?: T,
  35. title?: string,
  36. headers?: [string, string][],
  37. body?: string
  38. }
  39. export function readStringFromFile(file: File) {
  40. return readFromFileInternal(file, 'string');
  41. }
  42. export function readUint8ArrayFromFile(file: File) {
  43. return readFromFileInternal(file, 'binary');
  44. }
  45. export function readFromFile<T extends DataType>(file: File, type: T) {
  46. return readFromFileInternal(file, type);
  47. }
  48. export function ajaxGet(url: string): Task<DataValue>
  49. export function ajaxGet<T extends DataType>(params: AjaxGetParams<T>): Task<DataResponse<T>>
  50. export function ajaxGet<T extends DataType>(params: AjaxGetParams<T> | string) {
  51. if (typeof params === 'string') return ajaxGetInternal(params, params, 'string');
  52. return ajaxGetInternal(params.title, params.url, params.type || 'string', params.body, params.headers);
  53. }
  54. export type AjaxTask = typeof ajaxGet
  55. function isDone(data: XMLHttpRequest | FileReader) {
  56. if (!RUNNING_IN_NODEJS && data instanceof FileReader) { // FileReader is not available in Node.js
  57. return data.readyState === FileReader.DONE;
  58. } else if (data instanceof XMLHttpRequest) {
  59. return data.readyState === XMLHttpRequest.DONE;
  60. }
  61. throw new Error('unknown data type');
  62. }
  63. function genericError(isDownload: boolean) {
  64. if (isDownload) return 'Failed to download data. Possible reasons: Resource is not available, or CORS is not allowed on the server.';
  65. return 'Failed to open file.';
  66. }
  67. function readData<T extends XMLHttpRequest | FileReader>(ctx: RuntimeContext, action: string, data: T): Promise<T> {
  68. return new Promise<T>((resolve, reject) => {
  69. // first check if data reading is already done
  70. if (isDone(data)) {
  71. const { error } = data as FileReader;
  72. if (error !== null && error !== undefined) {
  73. reject(error ?? genericError(data instanceof XMLHttpRequest));
  74. } else {
  75. resolve(data);
  76. }
  77. return;
  78. }
  79. let hasError = false;
  80. data.onerror = (e: ProgressEvent) => {
  81. if (hasError) return;
  82. const { error } = e.target as FileReader;
  83. reject(error ?? genericError(data instanceof XMLHttpRequest));
  84. };
  85. data.onprogress = (e: ProgressEvent) => {
  86. if (!ctx.shouldUpdate || hasError) return;
  87. try {
  88. if (e.lengthComputable) {
  89. ctx.update({ message: action, isIndeterminate: false, current: e.loaded, max: e.total });
  90. } else {
  91. ctx.update({ message: `${action} ${(e.loaded / 1024 / 1024).toFixed(2)} MB`, isIndeterminate: true });
  92. }
  93. } catch (e) {
  94. hasError = true;
  95. reject(e);
  96. }
  97. };
  98. data.onload = (e: ProgressEvent) => {
  99. resolve(data);
  100. };
  101. });
  102. }
  103. function getCompression(name: string) {
  104. return /\.gz$/i.test(name) ? DataCompressionMethod.Gzip :
  105. /\.zip$/i.test(name) ? DataCompressionMethod.Zip :
  106. DataCompressionMethod.None;
  107. }
  108. const reFilterPath = /^(__MACOSX|.DS_Store)/;
  109. async function decompress(ctx: RuntimeContext, data: Uint8Array, compression: DataCompressionMethod): Promise<Uint8Array> {
  110. switch (compression) {
  111. case DataCompressionMethod.None: return data;
  112. case DataCompressionMethod.Gzip: return ungzip(ctx, data);
  113. case DataCompressionMethod.Zip:
  114. const parsed = await unzip(ctx, data.buffer);
  115. const names = Object.keys(parsed).filter(n => !reFilterPath.test(n));
  116. if (names.length !== 1) throw new Error('can only decompress zip files with a single entry');
  117. return parsed[names[0]] as Uint8Array;
  118. }
  119. }
  120. async function processFile<T extends DataType>(ctx: RuntimeContext, fileContent: string | ArrayBuffer | null, type: T, compression: DataCompressionMethod): Promise<DataResponse<T>> {
  121. let data = fileContent instanceof ArrayBuffer ? new Uint8Array(fileContent) : fileContent;
  122. if (data === null) throw new Error('no data given');
  123. if (compression !== DataCompressionMethod.None) {
  124. if (!(data instanceof Uint8Array)) throw new Error('need Uint8Array for decompression');
  125. const decompressed = await decompress(ctx, data, compression);
  126. if (type === 'string') {
  127. await ctx.update({ message: 'Decoding text...' });
  128. data = utf8Read(decompressed, 0, decompressed.length);
  129. } else {
  130. data = decompressed;
  131. }
  132. }
  133. if (type === 'binary' && data instanceof Uint8Array) {
  134. return data as DataResponse<T>;
  135. } else if (type === 'zip' && data instanceof Uint8Array) {
  136. return await unzip(ctx, data.buffer) as DataResponse<T>;
  137. } else if (type === 'string' && typeof data === 'string') {
  138. return data as DataResponse<T>;
  139. } else if (type === 'xml' && typeof data === 'string') {
  140. const parser = new DOMParser();
  141. return parser.parseFromString(data, 'application/xml') as DataResponse<T>;
  142. } else if (type === 'json' && typeof data === 'string') {
  143. return JSON.parse(data) as DataResponse<T>;
  144. }
  145. throw new Error(`could not get requested response data '${type}'`);
  146. }
  147. function readFromFileInternal<T extends DataType>(file: File, type: T): Task<DataResponse<T>> {
  148. if (RUNNING_IN_NODEJS) {
  149. return readFromFileInternal_NodeJS(file, type);
  150. }
  151. let reader: FileReader | undefined = void 0;
  152. return Task.create('Read File', async ctx => {
  153. try {
  154. reader = new FileReader();
  155. // unzipping for type 'zip' handled explicitly in `processFile`
  156. const compression = type === 'zip' ? DataCompressionMethod.None : getCompression(file.name);
  157. if (type === 'binary' || type === 'zip' || compression !== DataCompressionMethod.None) {
  158. reader.readAsArrayBuffer(file);
  159. } else {
  160. reader.readAsText(file);
  161. }
  162. await ctx.update({ message: 'Opening file...', canAbort: true });
  163. const fileReader = await readData(ctx, 'Reading...', reader);
  164. await ctx.update({ message: 'Processing file...', canAbort: false });
  165. return await processFile(ctx, fileReader.result, type, compression);
  166. } finally {
  167. reader = void 0;
  168. }
  169. }, () => {
  170. if (reader) reader.abort();
  171. });
  172. }
  173. function readFromFileInternal_NodeJS<T extends DataType>(file: File, type: T): Task<DataResponse<T>> {
  174. return Task.create('Read File', async ctx => {
  175. // unzipping for type 'zip' handled explicitly in `processFile`
  176. const compression = type === 'zip' ? DataCompressionMethod.None : getCompression(file.name);
  177. await ctx.update({ message: 'Opening file...', canAbort: false });
  178. let content: ArrayBuffer | string;
  179. if (type === 'binary' || type === 'zip' || compression !== DataCompressionMethod.None) {
  180. content = await file.arrayBuffer();
  181. } else {
  182. content = await file.text();
  183. }
  184. await ctx.update({ message: 'Processing file...', canAbort: false });
  185. return await processFile(ctx, content, type, compression);
  186. });
  187. }
  188. class RequestPool {
  189. private static pool: XMLHttpRequest[] = [];
  190. private static poolSize = 15;
  191. static get() {
  192. if (this.pool.length) {
  193. return this.pool.pop()!;
  194. }
  195. return new XMLHttpRequest();
  196. }
  197. static emptyFunc() { }
  198. static deposit(req: XMLHttpRequest) {
  199. if (this.pool.length < this.poolSize) {
  200. req.onabort = RequestPool.emptyFunc;
  201. req.onerror = RequestPool.emptyFunc;
  202. req.onload = RequestPool.emptyFunc;
  203. req.onprogress = RequestPool.emptyFunc;
  204. this.pool.push(req);
  205. }
  206. }
  207. }
  208. function processAjax<T extends DataType>(req: XMLHttpRequest, type: T): DataResponse<T> {
  209. if (req.status >= 200 && req.status < 400) {
  210. const { response } = req;
  211. RequestPool.deposit(req);
  212. if ((type === 'binary' || type === 'zip') && response instanceof ArrayBuffer) {
  213. return new Uint8Array(response) as DataResponse<T>;
  214. } else if (type === 'string' && typeof response === 'string') {
  215. return response as DataResponse<T>;
  216. } else if (type === 'xml' && response instanceof XMLDocument) {
  217. return response as DataResponse<T>;
  218. } else if (type === 'json' && typeof response === 'object') {
  219. return response as DataResponse<T>;
  220. }
  221. throw new Error(`could not get requested response data '${type}'`);
  222. } else {
  223. RequestPool.deposit(req);
  224. throw new Error(`Download failed with status code ${req.status}`);
  225. }
  226. }
  227. function getRequestResponseType(type: DataType): XMLHttpRequestResponseType {
  228. switch (type) {
  229. case 'json': return 'json';
  230. case 'xml': return 'document';
  231. case 'string': return 'text';
  232. case 'binary': return 'arraybuffer';
  233. case 'zip': return 'arraybuffer';
  234. }
  235. }
  236. function ajaxGetInternal<T extends DataType>(title: string | undefined, url: string, type: T, body?: string, headers?: [string, string][]): Task<DataResponse<T>> {
  237. if (RUNNING_IN_NODEJS && url.startsWith('file://')) {
  238. return ajaxGetInternal_file_NodeJS(title, url, type, body, headers);
  239. }
  240. let xhttp: XMLHttpRequest | undefined = void 0;
  241. return Task.create(title ? title : 'Download', async ctx => {
  242. xhttp = RequestPool.get();
  243. xhttp.open(body ? 'post' : 'get', url, true);
  244. if (headers) {
  245. for (const [name, value] of headers) {
  246. xhttp.setRequestHeader(name, value);
  247. }
  248. }
  249. xhttp.responseType = getRequestResponseType(type);
  250. xhttp.send(body);
  251. await ctx.update({ message: 'Waiting for server...', canAbort: true });
  252. const req = await readData(ctx, 'Downloading...', xhttp);
  253. xhttp = void 0; // guard against reuse, help garbage collector
  254. await ctx.update({ message: 'Parsing response...', canAbort: false });
  255. const result = processAjax(req, type);
  256. return result;
  257. }, () => {
  258. if (xhttp) {
  259. xhttp.abort();
  260. xhttp = void 0; // guard against reuse, help garbage collector
  261. }
  262. });
  263. }
  264. /** Alternative implementation of ajaxGetInternal (because xhr2 does not support file:// protocol) */
  265. function ajaxGetInternal_file_NodeJS<T extends DataType>(title: string | undefined, url: string, type: T, body?: string, headers?: [string, string][]): Task<DataResponse<T>> {
  266. if (!RUNNING_IN_NODEJS) throw new Error('This function should only be used when running in Node.js');
  267. if (!url.startsWith('file://')) throw new Error('This function is only for URLs with protocol file://');
  268. const filename = url.substring('file://'.length);
  269. const data = lazyImports.fs.readFileSync(filename);
  270. const file = new File([data], 'raw-data');
  271. return readFromFile(file, type);
  272. }
  273. export type AjaxGetManyEntry = { kind: 'ok', id: string, result: Asset.Wrapper<'string' | 'binary'> } | { kind: 'error', id: string, error: any }
  274. export async function ajaxGetMany(ctx: RuntimeContext, assetManager: AssetManager, sources: { id: string, url: Asset.Url | string, isBinary?: boolean, canFail?: boolean }[], maxConcurrency: number) {
  275. const len = sources.length;
  276. const slots: AjaxGetManyEntry[] = new Array(sources.length);
  277. await ctx.update({ message: 'Downloading...', current: 0, max: len });
  278. let promises: Promise<AjaxGetManyEntry & { index: number }>[] = [], promiseKeys: number[] = [];
  279. let currentSrc = 0;
  280. for (let _i = Math.min(len, maxConcurrency); currentSrc < _i; currentSrc++) {
  281. const current = sources[currentSrc];
  282. promises.push(wrapPromise(currentSrc, current.id,
  283. assetManager.resolve(Asset.getUrlAsset(assetManager, current.url), current.isBinary ? 'binary' : 'string').runAsChild(ctx)));
  284. promiseKeys.push(currentSrc);
  285. }
  286. let done = 0;
  287. while (promises.length > 0) {
  288. const r = await Promise.race(promises);
  289. const src = sources[r.index];
  290. const idx = promiseKeys.indexOf(r.index);
  291. done++;
  292. if (r.kind === 'error' && !src.canFail) {
  293. // TODO: cancel other downloads
  294. throw new Error(`${src.url}: ${r.error}`);
  295. }
  296. if (ctx.shouldUpdate) {
  297. await ctx.update({ message: 'Downloading...', current: done, max: len });
  298. }
  299. slots[r.index] = r;
  300. promises = promises.filter(_filterRemoveIndex, idx);
  301. promiseKeys = promiseKeys.filter(_filterRemoveIndex, idx);
  302. if (currentSrc < len) {
  303. const current = sources[currentSrc];
  304. const asset = assetManager.resolve(Asset.getUrlAsset(assetManager, current.url), current.isBinary ? 'binary' : 'string').runAsChild(ctx);
  305. promises.push(wrapPromise(currentSrc, current.id, asset));
  306. promiseKeys.push(currentSrc);
  307. currentSrc++;
  308. }
  309. }
  310. return slots;
  311. }
  312. function _filterRemoveIndex(this: number, _: any, i: number) {
  313. return this !== i;
  314. }
  315. async function wrapPromise(index: number, id: string, p: Promise<Asset.Wrapper<'string' | 'binary'>>): Promise<AjaxGetManyEntry & { index: number }> {
  316. try {
  317. const result = await p;
  318. return { kind: 'ok', result, index, id };
  319. } catch (error) {
  320. return { kind: 'error', error, index, id };
  321. }
  322. }