deflate.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /**
  2. * Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. *
  6. * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js
  7. * MIT License, Copyright (c) 2018 Photopea
  8. */
  9. import { RuntimeContext } from '../../mol-task';
  10. import { NumberArray } from '../type-helpers';
  11. import { _hufTree } from './huffman';
  12. import { U, revCodes, makeCodes } from './util';
  13. function DeflateContext(data: Uint8Array, out: Uint8Array, opos: number, lvl: number) {
  14. const { lits, strt, prev } = U;
  15. return {
  16. data,
  17. out,
  18. opt: Opts[lvl],
  19. i: 0,
  20. pos: opos << 3,
  21. cvrd: 0,
  22. dlen: data.length,
  23. li: 0,
  24. lc: 0,
  25. bs: 0,
  26. ebits: 0,
  27. c: 0,
  28. nc: 0,
  29. lits,
  30. strt,
  31. prev
  32. };
  33. }
  34. type DeflateContext = ReturnType<typeof DeflateContext>
  35. function deflateChunk(ctx: DeflateContext, count: number) {
  36. const { data, dlen, out, opt } = ctx;
  37. let { i, pos, cvrd, li, lc, bs, ebits, c, nc } = ctx;
  38. const { lits, strt, prev } = U;
  39. const end = Math.min(i + count, dlen);
  40. for (; i < end; i++) {
  41. c = nc;
  42. if (i + 1 < dlen - 2) {
  43. nc = _hash(data, i + 1);
  44. const ii = ((i + 1) & 0x7fff);
  45. prev[ii] = strt[nc];
  46. strt[nc] = ii;
  47. }
  48. if (cvrd <= i) {
  49. if ((li > 14000 || lc > 26697) && (dlen - i) > 100) {
  50. if (cvrd < i) {
  51. lits[li] = i - cvrd;
  52. li += 2;
  53. cvrd = i;
  54. }
  55. pos = _writeBlock(((i === dlen - 1) || (cvrd === dlen)) ? 1 : 0, lits, li, ebits, data, bs, i - bs, out, pos);
  56. li = lc = ebits = 0;
  57. bs = i;
  58. }
  59. let mch = 0;
  60. if (i < dlen - 2) {
  61. mch = _bestMatch(data, i, prev, c, Math.min(opt[2], dlen - i), opt[3]);
  62. }
  63. if (mch !== 0) {
  64. const len = mch >>> 16, dst = mch & 0xffff;
  65. const lgi = _goodIndex(len, U.of0); U.lhst[257 + lgi]++;
  66. const dgi = _goodIndex(dst, U.df0); U.dhst[dgi]++; ebits += U.exb[lgi] + U.dxb[dgi];
  67. lits[li] = (len << 23) | (i - cvrd); lits[li + 1] = (dst << 16) | (lgi << 8) | dgi; li += 2;
  68. cvrd = i + len;
  69. } else {
  70. U.lhst[data[i]]++;
  71. }
  72. lc++;
  73. }
  74. }
  75. ctx.i = i;
  76. ctx.pos = pos;
  77. ctx.cvrd = cvrd;
  78. ctx.li = li;
  79. ctx.lc = lc;
  80. ctx.bs = bs;
  81. ctx.ebits = ebits;
  82. ctx.c = c;
  83. ctx.nc = nc;
  84. }
  85. /**
  86. * - good_length: reduce lazy search above this match length;
  87. * - max_lazy: do not perform lazy search above this match length;
  88. * - nice_length: quit search above this match length;
  89. */
  90. const Opts = [
  91. /* good lazy nice chain */
  92. /* 0 */ [0, 0, 0, 0, 0], /* store only */
  93. /* 1 */ [4, 4, 8, 4, 0], /* max speed, no lazy matches */
  94. /* 2 */ [4, 5, 16, 8, 0],
  95. /* 3 */ [4, 6, 16, 16, 0],
  96. /* 4 */ [4, 10, 16, 32, 0], /* lazy matches */
  97. /* 5 */ [8, 16, 32, 32, 0],
  98. /* 6 */ [8, 16, 128, 128, 0],
  99. /* 7 */ [8, 32, 128, 256, 0],
  100. /* 8 */ [32, 128, 258, 1024, 1],
  101. /* 9 */ [32, 258, 258, 4096, 1] /* max compression */
  102. ] as const;
  103. export async function _deflateRaw(runtime: RuntimeContext, data: Uint8Array, out: Uint8Array, opos: number, lvl: number) {
  104. const ctx = DeflateContext(data, out, opos, lvl);
  105. const { dlen } = ctx;
  106. if (lvl === 0) {
  107. let { i, pos } = ctx;
  108. while (i < dlen) {
  109. const len = Math.min(0xffff, dlen - i);
  110. _putsE(out, pos, (i + len === dlen ? 1 : 0));
  111. pos = _copyExact(data, i, len, out, pos + 8);
  112. i += len;
  113. }
  114. return pos >>> 3;
  115. }
  116. if (dlen > 2) {
  117. ctx.nc = _hash(data, 0);
  118. ctx.strt[ctx.nc] = 0;
  119. }
  120. while (ctx.i < dlen) {
  121. if (runtime.shouldUpdate) {
  122. await runtime.update({ message: 'Deflating...', current: ctx.i, max: dlen });
  123. }
  124. deflateChunk(ctx, 1024 * 1024);
  125. }
  126. let { li, cvrd, pos } = ctx;
  127. const { i, lits, bs, ebits } = ctx;
  128. if (bs !== i || data.length === 0) {
  129. if (cvrd < i) {
  130. lits[li] = i - cvrd;
  131. li += 2;
  132. cvrd = i;
  133. }
  134. pos = _writeBlock(1, lits, li, ebits, data, bs, i - bs, out, pos);
  135. }
  136. while ((pos & 7) !== 0) pos++;
  137. return pos >>> 3;
  138. }
  139. function _bestMatch(data: Uint8Array, i: number, prev: Uint16Array, c: number, nice: number, chain: number) {
  140. let ci = (i & 0x7fff), pi = prev[ci];
  141. // console.log("----", i);
  142. let dif = ((ci - pi + (1 << 15)) & 0x7fff);
  143. if (pi === ci || c !== _hash(data, i - dif)) return 0;
  144. let tl = 0, td = 0; // top length, top distance
  145. const dlim = Math.min(0x7fff, i);
  146. while (dif <= dlim && --chain !== 0 && pi !== ci /* && c==UZIP.F._hash(data,i-dif)*/) {
  147. if (tl === 0 || (data[i + tl] === data[i + tl - dif])) {
  148. let cl = _howLong(data, i, dif);
  149. if (cl > tl) {
  150. tl = cl; td = dif; if (tl >= nice) break; //*
  151. if (dif + 2 < cl) cl = dif + 2;
  152. let maxd = 0; // pi does not point to the start of the word
  153. for (let j = 0; j < cl - 2; j++) {
  154. const ei = (i - dif + j + (1 << 15)) & 0x7fff;
  155. const li = prev[ei];
  156. const curd = (ei - li + (1 << 15)) & 0x7fff;
  157. if (curd > maxd) { maxd = curd; pi = ei; }
  158. }
  159. }
  160. }
  161. ci = pi; pi = prev[ci];
  162. dif += ((ci - pi + (1 << 15)) & 0x7fff);
  163. }
  164. return (tl << 16) | td;
  165. }
  166. function _howLong(data: Uint8Array, i: number, dif: number) {
  167. if (data[i] !== data[i - dif] || data[i + 1] !== data[i + 1 - dif] || data[i + 2] !== data[i + 2 - dif]) return 0;
  168. const oi = i, l = Math.min(data.length, i + 258);
  169. i += 3;
  170. // while(i+4<l && data[i]==data[i-dif] && data[i+1]==data[i+1-dif] && data[i+2]==data[i+2-dif] && data[i+3]==data[i+3-dif]) i+=4;
  171. while (i < l && data[i] === data[i - dif]) i++;
  172. return i - oi;
  173. }
  174. function _hash(data: Uint8Array, i: number) {
  175. return (((data[i] << 8) | data[i + 1]) + (data[i + 2] << 4)) & 0xffff;
  176. // var hash_shift = 0, hash_mask = 255;
  177. // var h = data[i+1] % 251;
  178. // h = (((h << 8) + data[i+2]) % 251);
  179. // h = (((h << 8) + data[i+2]) % 251);
  180. // h = ((h<<hash_shift) ^ (c) ) & hash_mask;
  181. // return h | (data[i]<<8);
  182. // return (data[i] | (data[i+1]<<8));
  183. }
  184. function _writeBlock(BFINAL: number, lits: Uint32Array, li: number, ebits: number, data: Uint8Array, o0: number, l0: number, out: Uint8Array, pos: number) {
  185. U.lhst[256]++;
  186. const [ML, MD, MH, numl, numd, numh, lset, dset] = getTrees();
  187. const cstSize = (((pos + 3) & 7) === 0 ? 0 : 8 - ((pos + 3) & 7)) + 32 + (l0 << 3);
  188. const fxdSize = ebits + contSize(U.fltree, U.lhst) + contSize(U.fdtree, U.dhst);
  189. let dynSize = ebits + contSize(U.ltree, U.lhst) + contSize(U.dtree, U.dhst);
  190. dynSize += 14 + 3 * numh + contSize(U.itree, U.ihst) + (U.ihst[16] * 2 + U.ihst[17] * 3 + U.ihst[18] * 7);
  191. for (let j = 0; j < 286; j++) U.lhst[j] = 0;
  192. for (let j = 0; j < 30; j++) U.dhst[j] = 0;
  193. for (let j = 0; j < 19; j++) U.ihst[j] = 0;
  194. const BTYPE = (cstSize < fxdSize && cstSize < dynSize) ? 0 : (fxdSize < dynSize ? 1 : 2);
  195. _putsF(out, pos, BFINAL);
  196. _putsF(out, pos + 1, BTYPE);
  197. pos += 3;
  198. // let opos = pos;
  199. if (BTYPE === 0) {
  200. while ((pos & 7) !== 0) pos++;
  201. pos = _copyExact(data, o0, l0, out, pos);
  202. } else {
  203. let ltree: number[], dtree: number[];
  204. if (BTYPE === 1) {
  205. ltree = U.fltree; dtree = U.fdtree;
  206. } else if (BTYPE === 2) {
  207. makeCodes(U.ltree, ML); revCodes(U.ltree, ML);
  208. makeCodes(U.dtree, MD); revCodes(U.dtree, MD);
  209. makeCodes(U.itree, MH); revCodes(U.itree, MH);
  210. ltree = U.ltree; dtree = U.dtree;
  211. _putsE(out, pos, numl - 257); pos += 5; // 286
  212. _putsE(out, pos, numd - 1); pos += 5; // 30
  213. _putsE(out, pos, numh - 4); pos += 4; // 19
  214. for (let i = 0; i < numh; i++) _putsE(out, pos + i * 3, U.itree[(U.ordr[i] << 1) + 1]);
  215. pos += 3 * numh;
  216. pos = _codeTiny(lset, U.itree, out, pos);
  217. pos = _codeTiny(dset, U.itree, out, pos);
  218. } else {
  219. throw new Error(`unknown BTYPE ${BTYPE}`);
  220. }
  221. let off = o0;
  222. for (let si = 0; si < li; si += 2) {
  223. const qb = lits[si], len = (qb >>> 23), end = off + (qb & ((1 << 23) - 1));
  224. while (off < end) pos = _writeLit(data[off++], ltree, out, pos);
  225. if (len !== 0) {
  226. const qc = lits[si + 1], dst = (qc >> 16), lgi = (qc >> 8) & 255, dgi = (qc & 255);
  227. pos = _writeLit(257 + lgi, ltree, out, pos);
  228. _putsE(out, pos, len - U.of0[lgi]); pos += U.exb[lgi];
  229. pos = _writeLit(dgi, dtree, out, pos);
  230. _putsF(out, pos, dst - U.df0[dgi]); pos += U.dxb[dgi]; off += len;
  231. }
  232. }
  233. pos = _writeLit(256, ltree, out, pos);
  234. }
  235. // console.log(pos-opos, fxdSize, dynSize, cstSize);
  236. return pos;
  237. }
  238. function _copyExact(data: Uint8Array, off: number, len: number, out: Uint8Array, pos: number) {
  239. let p8 = (pos >>> 3);
  240. out[p8] = (len);
  241. out[p8 + 1] = (len >>> 8);
  242. out[p8 + 2] = 255 - out[p8];
  243. out[p8 + 3] = 255 - out[p8 + 1];
  244. p8 += 4;
  245. out.set(new Uint8Array(data.buffer, off, len), p8);
  246. // for(var i=0; i<len; i++) out[p8+i]=data[off+i];
  247. return pos + ((len + 4) << 3);
  248. }
  249. /*
  250. Interesting facts:
  251. - decompressed block can have bytes, which do not occur in a Huffman tree (copied from the previous block by reference)
  252. */
  253. function getTrees() {
  254. const ML = _hufTree(U.lhst, U.ltree, 15);
  255. const MD = _hufTree(U.dhst, U.dtree, 15);
  256. const lset: number[] = [];
  257. const numl = _lenCodes(U.ltree, lset);
  258. const dset: number[] = [];
  259. const numd = _lenCodes(U.dtree, dset);
  260. for (let i = 0; i < lset.length; i += 2) U.ihst[lset[i]]++;
  261. for (let i = 0; i < dset.length; i += 2) U.ihst[dset[i]]++;
  262. const MH = _hufTree(U.ihst, U.itree, 7);
  263. let numh = 19;
  264. while (numh > 4 && U.itree[(U.ordr[numh - 1] << 1) + 1] === 0) numh--;
  265. return [ML, MD, MH, numl, numd, numh, lset, dset] as const;
  266. }
  267. function contSize(tree: number[], hst: NumberArray) {
  268. let s = 0;
  269. for (let i = 0; i < hst.length; i++) s += hst[i] * tree[(i << 1) + 1];
  270. return s;
  271. }
  272. function _codeTiny(set: number[], tree: number[], out: Uint8Array, pos: number) {
  273. for (let i = 0; i < set.length; i += 2) {
  274. const l = set[i], rst = set[i + 1]; // console.log(l, pos, tree[(l<<1)+1]);
  275. pos = _writeLit(l, tree, out, pos);
  276. const rsl = l === 16 ? 2 : (l === 17 ? 3 : 7);
  277. if (l > 15) {
  278. _putsE(out, pos, rst);
  279. pos += rsl;
  280. }
  281. }
  282. return pos;
  283. }
  284. function _lenCodes(tree: number[], set: number[]) {
  285. let len = tree.length;
  286. while (len !== 2 && tree[len - 1] === 0) len -= 2; // when no distances, keep one code with length 0
  287. for (let i = 0; i < len; i += 2) {
  288. const l = tree[i + 1], nxt = (i + 3 < len ? tree[i + 3] : -1), nnxt = (i + 5 < len ? tree[i + 5] : -1), prv = (i === 0 ? -1 : tree[i - 1]);
  289. if (l === 0 && nxt === l && nnxt === l) {
  290. let lz = i + 5;
  291. while (lz + 2 < len && tree[lz + 2] === l) lz += 2;
  292. const zc = Math.min((lz + 1 - i) >>> 1, 138);
  293. if (zc < 11) set.push(17, zc - 3);
  294. else set.push(18, zc - 11);
  295. i += zc * 2 - 2;
  296. } else if (l === prv && nxt === l && nnxt === l) {
  297. let lz = i + 5;
  298. while (lz + 2 < len && tree[lz + 2] === l) lz += 2;
  299. const zc = Math.min((lz + 1 - i) >>> 1, 6);
  300. set.push(16, zc - 3);
  301. i += zc * 2 - 2;
  302. } else {
  303. set.push(l, 0);
  304. }
  305. }
  306. return len >>> 1;
  307. }
  308. function _goodIndex(v: number, arr: number[]) {
  309. let i = 0;
  310. if (arr[i | 16] <= v) i |= 16;
  311. if (arr[i | 8] <= v) i |= 8;
  312. if (arr[i | 4] <= v) i |= 4;
  313. if (arr[i | 2] <= v) i |= 2;
  314. if (arr[i | 1] <= v) i |= 1;
  315. return i;
  316. }
  317. function _writeLit(ch: number, ltree: number[], out: Uint8Array, pos: number) {
  318. _putsF(out, pos, ltree[ch << 1]);
  319. return pos + ltree[(ch << 1) + 1];
  320. }
  321. function _putsE(dt: NumberArray, pos: number, val: number) {
  322. val = val << (pos & 7);
  323. const o = (pos >>> 3);
  324. dt[o] |= val;
  325. dt[o + 1] |= (val >>> 8);
  326. }
  327. function _putsF(dt: NumberArray, pos: number, val: number) {
  328. val = val << (pos & 7);
  329. const o = (pos >>> 3);
  330. dt[o] |= val;
  331. dt[o + 1] |= (val >>> 8);
  332. dt[o + 2] |= (val >>> 16);
  333. }