浏览代码

async zip/gzip decoding

Alexander Rose 5 年之前
父节点
当前提交
61a5d18be6
共有 4 个文件被更改,包括 127 次插入96 次删除
  1. 10 10
      src/mol-util/_spec/zip.spec.ts
  2. 9 8
      src/mol-util/data-source.ts
  3. 94 58
      src/mol-util/zip/inflate.ts
  4. 14 20
      src/mol-util/zip/zip.ts

+ 10 - 10
src/mol-util/_spec/zip.spec.ts

@@ -5,22 +5,22 @@
  */
 
 import { deflate, inflate, unzip, zip } from '../zip/zip'
+import { SyncRuntimeContext } from '../../mol-task/execution/synchronous'
 
 describe('zip', () => {
-    it('roundtrip deflate/inflate', () => {
+    it('roundtrip deflate/inflate', async () => {
         const data = new Uint8Array([1, 2, 3, 4, 5, 6, 7])
         const deflated = deflate(data)
-        console.log(deflated)
-        const inflated = inflate(deflated)
-        console.log(inflated)
+        const inflated = await inflate(SyncRuntimeContext, deflated)
+        expect(inflated).toEqual(data)
     })
 
-    it('roundtrip zip', () => {
-        const zipped = zip({
+    it('roundtrip zip', async () => {
+        const data = {
             'test.foo': new Uint8Array([1, 2, 3, 4, 5, 6, 7])
-        })
-        console.log(zipped)
-        const unzipped = unzip(zipped)
-        console.log(unzipped)
+        }
+        const zipped = zip(data)
+        const unzipped = await unzip(SyncRuntimeContext, zipped)
+        expect(unzipped).toEqual(data)
     })
 })

+ 9 - 8
src/mol-util/data-source.ts

@@ -119,19 +119,19 @@ function getCompression(name: string) {
             DataCompressionMethod.None
 }
 
-function decompress(data: Uint8Array, compression: DataCompressionMethod): Uint8Array {
+async function decompress(ctx: RuntimeContext, data: Uint8Array, compression: DataCompressionMethod): Promise<Uint8Array> {
     switch (compression) {
         case DataCompressionMethod.None: return data
-        case DataCompressionMethod.Gzip: return ungzip(data)
+        case DataCompressionMethod.Gzip: return ungzip(ctx, data)
         case DataCompressionMethod.Zip:
-            const parsed = unzip(data.buffer)
+            const parsed = await unzip(ctx, data.buffer)
             const names = Object.keys(parsed)
             if (names.length !== 1) throw new Error('can only decompress zip files with a single entry')
             return parsed[names[0]] as Uint8Array
     }
 }
 
-function processFile<T extends DataType>(reader: FileReader, type: T, compression: DataCompressionMethod): DataResponse<T> {
+async function processFile<T extends DataType>(ctx: RuntimeContext, reader: FileReader, type: T, compression: DataCompressionMethod): Promise<DataResponse<T>> {
     const { result } = reader
 
     let data = result instanceof ArrayBuffer ? new Uint8Array(result) : result
@@ -139,9 +139,10 @@ function processFile<T extends DataType>(reader: FileReader, type: T, compressio
 
     if (compression !== DataCompressionMethod.None) {
         if (!(data instanceof Uint8Array)) throw new Error('need Uint8Array for decompression')
-        const decompressed = decompress(data, compression);
+        const decompressed = await decompress(ctx, data, compression);
         if (type === 'string') {
-            data = utf8Read(decompressed, 0, decompressed.length);
+            await ctx.update({ message: 'Decoding text...' });
+            data = utf8Read(decompressed, 0, decompressed.length)
         } else {
             data = decompressed
         }
@@ -176,8 +177,8 @@ function readFromFileInternal<T extends DataType>(file: File, type: T): Task<Dat
             await ctx.update({ message: 'Opening file...', canAbort: true });
             const fileReader = await readData(ctx, 'Reading...', reader);
 
-            await ctx.update({ message: 'Parsing file...', canAbort: false });
-            return processFile(fileReader, type, compression);
+            await ctx.update({ message: 'Processing file...', canAbort: false });
+            return await processFile(ctx, fileReader, type, compression);
         } finally {
             reader = void 0;
         }

+ 94 - 58
src/mol-util/zip/inflate.ts

@@ -9,44 +9,65 @@
 
 import { NumberArray } from '../type-helpers';
 import { U, makeCodes, codes2map } from './util';
+import { RuntimeContext } from '../../mol-task';
 
-export function _inflate(data: Uint8Array, buf?: Uint8Array) {
-    if(data[0] === 3 && data[1] === 0) return (buf ? buf : new Uint8Array(0));
-    // var F=UZIP.F, bitsF = F._bitsF, bitsE = F._bitsE, decodeTiny = F._decodeTiny, makeCodes = F.makeCodes, codes2map=F.codes2map, get17 = F._get17;
-    // var U = F.U;
-
+function InflateContext(data: Uint8Array, buf?: Uint8Array) {
     const noBuf = buf === undefined;
     if(buf === undefined) buf = new Uint8Array((data.length>>>2)<<3);
+    return {
+        data,
+        buf,
+        noBuf,
+        BFINAL: 0,
+        off: 0,
+        pos: 0
+    }
+}
+type InflateContext = ReturnType<typeof InflateContext>
 
-    let BFINAL=0, BTYPE=0, HLIT=0, HDIST=0, HCLEN=0, ML=0, MD=0;
-    let off = 0, pos = 0;
-    let lmap, dmap;
+function inflateBlocks(ctx: InflateContext, count: number) {
+    const { data, noBuf } = ctx
+    let { buf, BFINAL, off, pos } = ctx
 
-    while(BFINAL === 0) {
-        BFINAL = _bitsF(data, pos  , 1);
-        BTYPE  = _bitsF(data, pos+1, 2);
+    let iBlock = 0
+
+    while(BFINAL === 0 && iBlock < count) {
+        let lmap, dmap;
+        let ML = 0, MD = 0;
+
+        BFINAL = _bitsF(data, pos, 1);
+        iBlock += 1
+        const BTYPE = _bitsF(data, pos + 1, 2);
         pos+=3;
 
         if(BTYPE === 0) {
-            if((pos&7) !== 0) pos+=8-(pos&7);
-            const p8 = (pos>>>3)+4, len = data[p8-4]|(data[p8-3]<<8);  // console.log(len);//bitsF(data, pos, 16),
-            if(noBuf) buf=_check(buf, off+len);
-            buf.set(new Uint8Array(data.buffer, data.byteOffset+p8, len), off);
-            // for(var i=0; i<len; i++) buf[off+i] = data[p8+i];
-            // for(var i=0; i<len; i++) if(buf[off+i] != data[p8+i]) throw "e";
-            pos = ((p8+len)<<3);  off+=len;  continue;
+            // uncompressed block
+            if((pos&7) !== 0) pos += 8 - (pos&7);
+            const p8 = (pos>>>3) + 4
+            const len = data[p8-4] | (data[p8-3]<<8);
+            if(noBuf) buf=_check(buf, off + len);
+            buf.set(new Uint8Array(data.buffer, data.byteOffset + p8, len), off);
+            pos = ((p8+len)<<3);
+            off += len;
+            continue;
         }
-        if(noBuf) buf=_check(buf, off+(1<<17));  // really not enough in many cases (but PNG and ZIP provide buffer in advance)
+
+        // grow output buffer if not provided
+        if(noBuf) buf = _check(buf, off + (1<<17));
+
         if(BTYPE === 1) {
+            // block compressed with fixed Huffman codes
             lmap = U.flmap;
             dmap = U.fdmap;
-            ML = (1<<9)-1;
-            MD = (1<<5)-1;
+            ML = (1<<9) - 1;
+            MD = (1<<5) - 1;
         } else if(BTYPE === 2) {
-            HLIT  = _bitsE(data, pos   , 5)+257;
-            HDIST = _bitsE(data, pos+ 5, 5)+  1;
-            HCLEN = _bitsE(data, pos+10, 4)+  4;  pos+=14;
-            // const ppos = pos;
+            // block compressed with dynamic Huffman codes
+            const HLIT = _bitsE(data, pos, 5) + 257;
+            const HDIST = _bitsE(data, pos + 5, 5) + 1;
+            const HCLEN = _bitsE(data, pos + 10, 4) + 4;
+            pos += 14;
+
             for(let i=0; i<38; i+=2) {
                 U.itree[i]=0;
                 U.itree[i+1]=0;
@@ -57,32 +78,31 @@ export function _inflate(data: Uint8Array, buf?: Uint8Array) {
                 U.itree[(U.ordr[i]<<1)+1] = l;
                 if(l>tl) tl = l;
             }
-            pos+=3*HCLEN;  // console.log(itree);
+            pos += 3 * HCLEN;
             makeCodes(U.itree, tl);
             codes2map(U.itree, tl, U.imap);
 
             lmap = U.lmap;  dmap = U.dmap;
 
             pos = _decodeTiny(U.imap, (1<<tl)-1, HLIT+HDIST, data, pos, U.ttree);
-            const mx0 = _copyOut(U.ttree,    0, HLIT , U.ltree);  ML = (1<<mx0)-1;
-            const mx1 = _copyOut(U.ttree, HLIT, HDIST, U.dtree);  MD = (1<<mx1)-1;
+            const mx0 = _copyOut(U.ttree,    0, HLIT , U.ltree);
+            ML = (1<<mx0)-1;
+            const mx1 = _copyOut(U.ttree, HLIT, HDIST, U.dtree);
+            MD = (1<<mx1)-1;
 
-            // var ml = decodeTiny(U.imap, (1<<tl)-1, HLIT , data, pos, U.ltree); ML = (1<<(ml>>>24))-1;  pos+=(ml&0xffffff);
             makeCodes(U.ltree, mx0);
             codes2map(U.ltree, mx0, lmap);
 
-            // var md = decodeTiny(U.imap, (1<<tl)-1, HDIST, data, pos, U.dtree); MD = (1<<(md>>>24))-1;  pos+=(md&0xffffff);
             makeCodes(U.dtree, mx1);
             codes2map(U.dtree, mx1, dmap);
         } else {
             throw new Error(`unknown BTYPE ${BTYPE}`)
         }
 
-        // var ooff=off, opos=pos;
         while(true) {
             const code = lmap[_get17(data, pos) & ML];
             pos += code&15;
-            const lit = code >>> 4;  // U.lhst[lit]++;
+            const lit = code >>> 4;
             if((lit >>> 8) === 0) {
                 buf[off++] = lit;
             } else if(lit === 256) {
@@ -94,32 +114,44 @@ export function _inflate(data: Uint8Array, buf?: Uint8Array) {
                     end = off + (ebs>>>3) + _bitsE(data, pos, ebs&7);
                     pos += ebs&7;
                 }
-                // UZIP.F.dst[end-off]++;
 
-                const dcode = dmap[_get17(data, pos) & MD];  pos += dcode&15;
+                const dcode = dmap[_get17(data, pos) & MD];
+                pos += dcode&15;
                 const dlit = dcode>>>4;
-                const dbs = U.ddef[dlit], dst = (dbs>>>4) + _bitsF(data, pos, dbs&15);  pos += dbs&15;
+                const dbs = U.ddef[dlit]
+                const dst = (dbs>>>4) + _bitsF(data, pos, dbs&15);
+                pos += dbs&15;
 
-                // var o0 = off-dst, stp = Math.min(end-off, dst);
-                // if(stp>20) while(off<end) {  buf.copyWithin(off, o0, o0+stp);  off+=stp;  }  else
-                // if(end-dst<=off) buf.copyWithin(off, off-dst, end-dst);  else
-                // if(dst==1) buf.fill(buf[off-1], off, end);  else
                 if(noBuf) buf = _check(buf, off+(1<<17));
                 while(off<end) {
-                    buf[off]=buf[off++-dst];
-                    buf[off]=buf[off++-dst];
-                    buf[off]=buf[off++-dst];
-                    buf[off]=buf[off++-dst];
+                    buf[off] = buf[off++-dst];
+                    buf[off] = buf[off++-dst];
+                    buf[off] = buf[off++-dst];
+                    buf[off] = buf[off++-dst];
                 }
-                off=end;
-                // while(off!=end) {  buf[off]=buf[off++-dst];  }
+                off = end;
             }
         }
-        // console.log(off-ooff, (pos-opos)>>>3);
     }
-    // console.log(UZIP.F.dst);
-    // console.log(tlen, dlen, off-tlen+tcnt);
-    return buf.length === off ? buf : buf.slice(0, off);
+
+    ctx.buf = buf
+    ctx.BFINAL = BFINAL
+    ctx.off = off
+    ctx.pos = pos
+}
+
+// https://tools.ietf.org/html/rfc1951
+export async function _inflate(runtime: RuntimeContext, data: Uint8Array, buf?: Uint8Array) {
+    if(data[0] === 3 && data[1] === 0) return (buf ? buf : new Uint8Array(0));
+
+    const ctx = InflateContext(data, buf)
+    while(ctx.BFINAL === 0) {
+        if (runtime.shouldUpdate) {
+            await runtime.update({ message: 'Inflating blocks...', current: ctx.pos, max: data.length })
+        }
+        inflateBlocks(ctx, 100)
+    }
+    return ctx.buf.length === ctx.off ? ctx.buf : ctx.buf.slice(0, ctx.off);
 }
 
 function _check(buf: Uint8Array, len: number) {
@@ -127,27 +159,30 @@ function _check(buf: Uint8Array, len: number) {
     if(len <= bl) return buf;
     const nbuf = new Uint8Array(Math.max(bl << 1, len));
     nbuf.set(buf, 0);
-    // for(var i=0; i<bl; i+=4) {  nbuf[i]=buf[i];  nbuf[i+1]=buf[i+1];  nbuf[i+2]=buf[i+2];  nbuf[i+3]=buf[i+3];  }
     return nbuf;
 }
 
 function _decodeTiny(lmap: NumberArray, LL: number, len: number, data: Uint8Array, pos: number, tree: number[]) {
     let i = 0;
-    while(i<len) {
+    while(i < len) {
         const code = lmap[_get17(data, pos)&LL];
         pos += code&15;
         const lit = code>>>4;
-        if(lit<=15) {
+        if(lit <= 15) {
             tree[i]=lit;
             i++;
         } else {
             let ll = 0, n = 0;
             if(lit === 16) {
-                n = (3  + _bitsE(data, pos, 2));  pos += 2;  ll = tree[i-1];
+                n = (3  + _bitsE(data, pos, 2));
+                pos += 2;
+                ll = tree[i-1];
             } else if(lit === 17) {
-                n = (3  + _bitsE(data, pos, 3));  pos += 3;
+                n = (3  + _bitsE(data, pos, 3));
+                pos += 3;
             } else if(lit === 18) {
-                n = (11 + _bitsE(data, pos, 7));  pos += 7;
+                n = (11 + _bitsE(data, pos, 7));
+                pos += 7;
             }
             const ni = i+n;
             while(i<ni) {
@@ -160,15 +195,16 @@ function _decodeTiny(lmap: NumberArray, LL: number, len: number, data: Uint8Arra
 }
 
 function _copyOut(src: number[], off: number, len: number, tree: number[]) {
-    let mx=0, i=0, tl=tree.length>>>1;
-    while(i<len) {
+    let mx=0, i=0
+    const tl=tree.length>>>1;
+    while(i < len) {
         let v=src[i+off];
         tree[(i<<1)]=0;
         tree[(i<<1)+1]=v;
         if(v>mx)mx=v;
         i++;
     }
-    while(i<tl ) {
+    while(i < tl) {
         tree[(i<<1)]=0;
         tree[(i<<1)+1]=0;
         i++;

+ 14 - 20
src/mol-util/zip/zip.ts

@@ -13,8 +13,9 @@ import { writeUint, writeUshort, sizeUTF8, writeUTF8, readUshort, readUint, read
 import { crc, adler } from './checksum';
 import { _inflate } from './inflate';
 import { _deflateRaw } from './deflate';
+import { RuntimeContext } from '../../mol-task';
 
-export function unzip(buf: ArrayBuffer, onlyNames = false) {
+export async function unzip(runtime: RuntimeContext, buf: ArrayBuffer, onlyNames = false) {
     const out: { [k: string]: Uint8Array | { size: number, csize: number } } = Object.create(null);
     const data = new Uint8Array(buf);
     let eocd = data.length-4;
@@ -57,13 +58,13 @@ export function unzip(buf: ArrayBuffer, onlyNames = false) {
         const roff = readUint(data, o);  o+=4;
         o += nl + el + cl;
 
-        _readLocal(data, roff, out, csize, usize, onlyNames);
+        await _readLocal(runtime, data, roff, out, csize, usize, onlyNames);
     }
     // console.log(out);
     return out;
 }
 
-function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array | { size: number, csize: number } }, csize: number, usize: number, onlyNames: boolean) {
+async function _readLocal(runtime: RuntimeContext, data: Uint8Array, o: number, out: { [k: string]: Uint8Array | { size: number, csize: number } }, csize: number, usize: number, onlyNames: boolean) {
     // const sign  = readUint(data, o);
     o+=4;
     // const ver   = readUshort(data, o);
@@ -83,16 +84,15 @@ function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array
     // var usize = rUi(data, o);  o+=4;
     o+=8;
 
-    const nlen  = readUshort(data, o);
+    const nlen = readUshort(data, o);
     o+=2;
-    const elen  = readUshort(data, o);
+    const elen = readUshort(data, o);
     o+=2;
 
-    const name =  readUTF8(data, o, nlen);
+    const name = readUTF8(data, o, nlen);
     o += nlen;  // console.log(name);
     o += elen;
 
-    // console.log(sign.toString(16), ver, gpflg, cmpr, crc32.toString(16), "csize, usize", csize, usize, nlen, elen, name, o);
     if(onlyNames) {
         out[name] = { size: usize, csize };
         return;
@@ -103,33 +103,27 @@ function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array
         out[name] = new Uint8Array(file.buffer.slice(o, o+csize));
     } else if(cmpr === 8) {
         const buf = new Uint8Array(usize);
-        inflateRaw(file, buf);
-        // var nbuf = pako["inflateRaw"](file);
-        // if(usize>8514000) {
-        //     //console.log(PUtils.readASCII(buf , 8514500, 500));
-        //     //console.log(PUtils.readASCII(nbuf, 8514500, 500));
-        // }
-        // for(var i=0; i<buf.length; i++) if(buf[i]!=nbuf[i]) {  console.log(buf.length, nbuf.length, usize, i);  throw "e";  }
+        await inflateRaw(runtime, file, buf);
         out[name] = buf;
     }
     else throw `unknown compression method: ${cmpr}`;
 }
 
-export function inflateRaw(file: Uint8Array, buf?: Uint8Array) {
-    return _inflate(file, buf);
+export async function inflateRaw(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) {
+    return _inflate(runtime, file, buf);
 }
 
-export function inflate(file: Uint8Array, buf?: Uint8Array) {
+export function inflate(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) {
     // const CMF = file[0]
     // const FLG = file[1]
     // const CM = (CMF&15)
     // const CINFO = (CMF>>>4);
     // console.log(CM, CINFO,CMF,FLG);
-    return inflateRaw(new Uint8Array(file.buffer, file.byteOffset+2, file.length-6), buf);
+    return inflateRaw(runtime, new Uint8Array(file.buffer, file.byteOffset+2, file.length-6), buf);
 }
 
 // https://tools.ietf.org/html/rfc1952
-export function ungzip(file: Uint8Array, buf?: Uint8Array) {
+export async function ungzip(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) {
     // const id1 = file[0]
     // const id2 = file[1]
     // const cm = file[2]
@@ -170,7 +164,7 @@ export function ungzip(file: Uint8Array, buf?: Uint8Array) {
     if (buf === undefined) buf = new Uint8Array(isize)
 
     const blocks = new Uint8Array(file.buffer, file.byteOffset + o, file.length - o - 8)
-    const inflated = inflateRaw(blocks, buf);
+    const inflated = await inflateRaw(runtime, blocks, buf);
     const crcValue = crc(inflated, 0, inflated.length)
     if (crc32 !== crcValue) {
         console.error("ungzip: checksums don't match")