deflate.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. /**
  2. * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. *
  6. * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js
  7. * MIT License, Copyright (c) 2018 Photopea
  8. */
  9. import { NumberArray } from '../type-helpers';
  10. import { _hufTree } from './huffman';
  11. import { U, revCodes, makeCodes } from './util';
  12. export function _deflateRaw(data: Uint8Array, out: Uint8Array, opos: number, lvl: number) {
  13. const opts = [
  14. /*
  15. ush good_length; /* reduce lazy search above this match length
  16. ush max_lazy; /* do not perform lazy search above this match length
  17. ush nice_length; /* quit search above this match length
  18. */
  19. /* good lazy nice chain */
  20. /* 0 */ [ 0, 0, 0, 0, 0], /* store only */
  21. /* 1 */ [ 4, 4, 8, 4, 0], /* max speed, no lazy matches */
  22. /* 2 */ [ 4, 5, 16, 8, 0],
  23. /* 3 */ [ 4, 6, 16, 16, 0],
  24. /* 4 */ [ 4, 10, 16, 32, 0], /* lazy matches */
  25. /* 5 */ [ 8, 16, 32, 32, 0],
  26. /* 6 */ [ 8, 16, 128, 128, 0],
  27. /* 7 */ [ 8, 32, 128, 256, 0],
  28. /* 8 */ [32, 128, 258, 1024, 1],
  29. /* 9 */ [32, 258, 258, 4096, 1] /* max compression */
  30. ];
  31. const opt = opts[lvl];
  32. let i = 0, pos = opos << 3, cvrd = 0;
  33. const dlen = data.length;
  34. if(lvl === 0) {
  35. while(i < dlen) {
  36. const len = Math.min(0xffff, dlen - i);
  37. _putsE(out, pos, (i + len === dlen ? 1 : 0));
  38. pos = _copyExact(data, i, len, out, pos + 8);
  39. i += len;
  40. }
  41. return pos >>> 3;
  42. }
  43. const { lits, strt, prev } = U;
  44. let li = 0, lc = 0, bs = 0, ebits = 0, c = 0, nc = 0; // last_item, literal_count, block_start
  45. if(dlen > 2) {
  46. nc = _hash(data, 0);
  47. strt[nc] = 0;
  48. }
  49. // let nmch = 0
  50. // let nmci = 0
  51. for(i = 0; i < dlen; i++) {
  52. c = nc;
  53. //*
  54. if(i + 1 < dlen - 2) {
  55. nc = _hash(data, i + 1);
  56. const ii = ((i + 1) & 0x7fff);
  57. prev[ii] = strt[nc];
  58. strt[nc] = ii;
  59. } // */
  60. if(cvrd <= i) {
  61. if((li > 14000 || lc > 26697) && (dlen - i) > 100) {
  62. if(cvrd < i) {
  63. lits[li] = i - cvrd;
  64. li += 2;
  65. cvrd = i;
  66. }
  67. pos = _writeBlock(((i === dlen - 1) || (cvrd === dlen)) ? 1 : 0, lits, li, ebits, data, bs, i - bs, out, pos);
  68. li = lc = ebits = 0;
  69. bs = i;
  70. }
  71. let mch = 0;
  72. // if(nmci==i) mch= nmch; else
  73. if(i < dlen - 2) {
  74. mch = _bestMatch(data, i, prev, c, Math.min(opt[2], dlen - i), opt[3]);
  75. }
  76. /*
  77. if(mch!=0 && opt[4]==1 && (mch>>>16)<opt[1] && i+1<dlen-2) {
  78. nmch = UZIP.F._bestMatch(data, i+1, prev, nc, opt[2], opt[3]); nmci=i+1;
  79. //var mch2 = UZIP.F._bestMatch(data, i+2, prev, nnc); //nmci=i+1;
  80. if((nmch>>>16)>(mch>>>16)) mch=0;
  81. }//*/
  82. // const len = mch>>>16, dst = mch & 0xffff; // if(i-dst<0) throw "e";
  83. if(mch !== 0) {
  84. const len = mch >>> 16, dst = mch & 0xffff; // if(i-dst<0) throw "e";
  85. const lgi = _goodIndex(len, U.of0); U.lhst[257 + lgi]++;
  86. const dgi = _goodIndex(dst, U.df0); U.dhst[ dgi]++; ebits += U.exb[lgi] + U.dxb[dgi];
  87. lits[li] = (len << 23) | (i - cvrd); lits[li + 1] = (dst << 16) | (lgi << 8) | dgi; li += 2;
  88. cvrd = i + len;
  89. } else {
  90. U.lhst[data[i]]++;
  91. }
  92. lc++;
  93. }
  94. }
  95. if(bs !== i || data.length === 0) {
  96. if(cvrd < i) {
  97. lits[li] = i - cvrd;
  98. li += 2;
  99. cvrd = i;
  100. }
  101. pos = _writeBlock(1, lits, li, ebits, data, bs, i - bs, out, pos);
  102. li = 0;
  103. lc = 0;
  104. li = lc = ebits = 0;
  105. bs = i;
  106. }
  107. while((pos & 7) !== 0) pos++;
  108. return pos >>> 3;
  109. }
  110. function _bestMatch(data: Uint8Array, i: number, prev: Uint16Array, c: number, nice: number, chain: number) {
  111. let ci = (i & 0x7fff), pi = prev[ci];
  112. // console.log("----", i);
  113. let dif = ((ci - pi + (1 << 15)) & 0x7fff);
  114. if(pi === ci || c !== _hash(data, i - dif)) return 0;
  115. let tl = 0, td = 0; // top length, top distance
  116. const dlim = Math.min(0x7fff, i);
  117. while(dif <= dlim && --chain !== 0 && pi !== ci /* && c==UZIP.F._hash(data,i-dif)*/) {
  118. if(tl === 0 || (data[i + tl] === data[i + tl - dif])) {
  119. let cl = _howLong(data, i, dif);
  120. if(cl > tl) {
  121. tl = cl; td = dif; if(tl >= nice) break; //*
  122. if(dif + 2 < cl) cl = dif + 2;
  123. let maxd = 0; // pi does not point to the start of the word
  124. for(let j = 0; j < cl - 2; j++) {
  125. const ei = (i - dif + j + (1 << 15)) & 0x7fff;
  126. const li = prev[ei];
  127. const curd = (ei - li + (1 << 15)) & 0x7fff;
  128. if(curd > maxd) { maxd = curd; pi = ei; }
  129. }
  130. }
  131. }
  132. ci = pi; pi = prev[ci];
  133. dif += ((ci - pi + (1 << 15)) & 0x7fff);
  134. }
  135. return (tl << 16) | td;
  136. }
  137. function _howLong(data: Uint8Array, i: number, dif: number) {
  138. if(data[i] !== data[i - dif] || data[i + 1] !== data[i + 1 - dif] || data[i + 2] !== data[i + 2 - dif]) return 0;
  139. const oi = i, l = Math.min(data.length, i + 258);
  140. i += 3;
  141. // while(i+4<l && data[i]==data[i-dif] && data[i+1]==data[i+1-dif] && data[i+2]==data[i+2-dif] && data[i+3]==data[i+3-dif]) i+=4;
  142. while(i < l && data[i] === data[i - dif]) i++;
  143. return i - oi;
  144. }
  145. function _hash(data: Uint8Array, i: number) {
  146. return (((data[i] << 8) | data[i + 1]) + (data[i + 2] << 4)) & 0xffff;
  147. // var hash_shift = 0, hash_mask = 255;
  148. // var h = data[i+1] % 251;
  149. // h = (((h << 8) + data[i+2]) % 251);
  150. // h = (((h << 8) + data[i+2]) % 251);
  151. // h = ((h<<hash_shift) ^ (c) ) & hash_mask;
  152. // return h | (data[i]<<8);
  153. // return (data[i] | (data[i+1]<<8));
  154. }
  155. function _writeBlock(BFINAL: number, lits: Uint32Array, li: number, ebits: number, data: Uint8Array, o0: number, l0: number, out: Uint8Array, pos: number) {
  156. U.lhst[256]++;
  157. const [ ML, MD, MH, numl, numd, numh, lset, dset ] = getTrees();
  158. const cstSize = (((pos + 3) & 7) === 0 ? 0 : 8 - ((pos + 3) & 7)) + 32 + (l0 << 3);
  159. const fxdSize = ebits + contSize(U.fltree, U.lhst) + contSize(U.fdtree, U.dhst);
  160. let dynSize = ebits + contSize(U.ltree, U.lhst) + contSize(U.dtree, U.dhst);
  161. dynSize += 14 + 3 * numh + contSize(U.itree, U.ihst) + (U.ihst[16] * 2 + U.ihst[17] * 3 + U.ihst[18] * 7);
  162. for(let j = 0; j < 286; j++) U.lhst[j] = 0;
  163. for(let j = 0; j < 30; j++) U.dhst[j] = 0;
  164. for(let j = 0; j < 19; j++) U.ihst[j] = 0;
  165. const BTYPE = (cstSize < fxdSize && cstSize < dynSize) ? 0 : ( fxdSize < dynSize ? 1 : 2 );
  166. _putsF(out, pos, BFINAL);
  167. _putsF(out, pos + 1, BTYPE);
  168. pos += 3;
  169. // let opos = pos;
  170. if(BTYPE === 0) {
  171. while((pos & 7) !== 0) pos++;
  172. pos = _copyExact(data, o0, l0, out, pos);
  173. } else {
  174. let ltree: number[], dtree: number[];
  175. if(BTYPE === 1) {
  176. ltree = U.fltree; dtree = U.fdtree;
  177. } else if(BTYPE === 2) {
  178. makeCodes(U.ltree, ML); revCodes(U.ltree, ML);
  179. makeCodes(U.dtree, MD); revCodes(U.dtree, MD);
  180. makeCodes(U.itree, MH); revCodes(U.itree, MH);
  181. ltree = U.ltree; dtree = U.dtree;
  182. _putsE(out, pos, numl - 257); pos += 5; // 286
  183. _putsE(out, pos, numd - 1); pos += 5; // 30
  184. _putsE(out, pos, numh - 4); pos += 4; // 19
  185. for(let i = 0; i < numh; i++) _putsE(out, pos + i * 3, U.itree[(U.ordr[i] << 1) + 1]);
  186. pos += 3 * numh;
  187. pos = _codeTiny(lset, U.itree, out, pos);
  188. pos = _codeTiny(dset, U.itree, out, pos);
  189. } else {
  190. throw new Error(`unknown BTYPE ${BTYPE}`);
  191. }
  192. let off = o0;
  193. for(let si = 0; si < li; si += 2) {
  194. const qb = lits[si], len = (qb >>> 23), end = off + (qb & ((1 << 23) - 1));
  195. while(off < end) pos = _writeLit(data[off++], ltree, out, pos);
  196. if(len !== 0) {
  197. const qc = lits[si + 1], dst = (qc >> 16), lgi = (qc >> 8) & 255, dgi = (qc & 255);
  198. pos = _writeLit(257 + lgi, ltree, out, pos);
  199. _putsE(out, pos, len - U.of0[lgi]); pos += U.exb[lgi];
  200. pos = _writeLit(dgi, dtree, out, pos);
  201. _putsF(out, pos, dst - U.df0[dgi]); pos += U.dxb[dgi]; off += len;
  202. }
  203. }
  204. pos = _writeLit(256, ltree, out, pos);
  205. }
  206. // console.log(pos-opos, fxdSize, dynSize, cstSize);
  207. return pos;
  208. }
  209. function _copyExact(data: Uint8Array, off: number, len: number, out: Uint8Array, pos: number) {
  210. let p8 = (pos >>> 3);
  211. out[p8] = (len);
  212. out[p8 + 1] = (len >>> 8);
  213. out[p8 + 2] = 255 - out[p8];
  214. out[p8 + 3] = 255 - out[p8 + 1];
  215. p8 += 4;
  216. out.set(new Uint8Array(data.buffer, off, len), p8);
  217. // for(var i=0; i<len; i++) out[p8+i]=data[off+i];
  218. return pos + ((len + 4) << 3);
  219. }
  220. /*
  221. Interesting facts:
  222. - decompressed block can have bytes, which do not occur in a Huffman tree (copied from the previous block by reference)
  223. */
  224. function getTrees() {
  225. const ML = _hufTree(U.lhst, U.ltree, 15);
  226. const MD = _hufTree(U.dhst, U.dtree, 15);
  227. const lset: number[] = [];
  228. const numl = _lenCodes(U.ltree, lset);
  229. const dset: number[] = [];
  230. const numd = _lenCodes(U.dtree, dset);
  231. for(let i = 0; i < lset.length; i += 2) U.ihst[lset[i]]++;
  232. for(let i = 0; i < dset.length; i += 2) U.ihst[dset[i]]++;
  233. const MH = _hufTree(U.ihst, U.itree, 7);
  234. let numh = 19;
  235. while(numh > 4 && U.itree[(U.ordr[numh - 1] << 1) + 1] === 0) numh--;
  236. return [ML, MD, MH, numl, numd, numh, lset, dset] as const;
  237. }
  238. function contSize(tree: number[], hst: NumberArray) {
  239. let s = 0;
  240. for(let i = 0; i < hst.length; i++) s += hst[i] * tree[(i << 1) + 1];
  241. return s;
  242. }
  243. function _codeTiny(set: number[], tree: number[], out: Uint8Array, pos: number) {
  244. for(let i = 0; i < set.length; i += 2) {
  245. const l = set[i], rst = set[i + 1]; // console.log(l, pos, tree[(l<<1)+1]);
  246. pos = _writeLit(l, tree, out, pos);
  247. const rsl = l === 16 ? 2 : (l === 17 ? 3 : 7);
  248. if(l > 15) {
  249. _putsE(out, pos, rst);
  250. pos += rsl;
  251. }
  252. }
  253. return pos;
  254. }
  255. function _lenCodes(tree: number[], set: number[]) {
  256. let len = tree.length;
  257. while(len !== 2 && tree[len - 1] === 0) len -= 2; // when no distances, keep one code with length 0
  258. for(let i = 0; i < len; i += 2) {
  259. const l = tree[i + 1], nxt = (i + 3 < len ? tree[i + 3] : -1), nnxt = (i + 5 < len ? tree[i + 5] : -1), prv = (i === 0 ? -1 : tree[i - 1]);
  260. if(l === 0 && nxt === l && nnxt === l) {
  261. let lz = i + 5;
  262. while(lz + 2 < len && tree[lz + 2] === l) lz += 2;
  263. const zc = Math.min((lz + 1 - i) >>> 1, 138);
  264. if(zc < 11) set.push(17, zc - 3);
  265. else set.push(18, zc - 11);
  266. i += zc * 2 - 2;
  267. } else if(l === prv && nxt === l && nnxt === l) {
  268. let lz = i + 5;
  269. while(lz + 2 < len && tree[lz + 2] === l) lz += 2;
  270. const zc = Math.min((lz + 1 - i) >>> 1, 6);
  271. set.push(16, zc - 3);
  272. i += zc * 2 - 2;
  273. } else {
  274. set.push(l, 0);
  275. }
  276. }
  277. return len >>> 1;
  278. }
  279. function _goodIndex(v: number, arr: number[]) {
  280. let i = 0;
  281. if(arr[i | 16] <= v) i |= 16;
  282. if(arr[i | 8] <= v) i |= 8;
  283. if(arr[i | 4] <= v) i |= 4;
  284. if(arr[i | 2] <= v) i |= 2;
  285. if(arr[i | 1] <= v) i |= 1;
  286. return i;
  287. }
  288. function _writeLit(ch: number, ltree: number[], out: Uint8Array, pos: number) {
  289. _putsF(out, pos, ltree[ch << 1]);
  290. return pos + ltree[(ch << 1) + 1];
  291. }
  292. function _putsE(dt: NumberArray, pos: number, val: number) {
  293. val = val << (pos & 7);
  294. const o = (pos >>> 3);
  295. dt[o] |= val;
  296. dt[o + 1] |= (val >>> 8);
  297. }
  298. function _putsF(dt: NumberArray, pos: number, val: number) {
  299. val = val << (pos & 7);
  300. const o = (pos >>> 3);
  301. dt[o] |= val;
  302. dt[o + 1] |= (val >>> 8);
  303. dt[o + 2] |= (val >>> 16);
  304. }