array-encoder.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js; MIT) and MMTF (https://github.com/rcsb/mmtf-javascript/; MIT)
  5. *
  6. * @author David Sehnal <david.sehnal@gmail.com>
  7. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  8. */
  9. import { ChunkedArray } from 'mol-data/util'
  10. import { Encoding, EncodedData } from './encoding'
  11. export interface ArrayEncoder {
  12. and(f: ArrayEncoding.Provider): ArrayEncoder,
  13. encode(data: ArrayLike<any>): EncodedData
  14. }
  15. export class ArrayEncoderImpl implements ArrayEncoder {
  16. and(f: ArrayEncoding.Provider) {
  17. return new ArrayEncoderImpl(this.providers.concat([f]));
  18. }
  19. encode(data: ArrayLike<any>): EncodedData {
  20. let encoding: Encoding[] = [];
  21. for (let p of this.providers) {
  22. let t = p(data);
  23. if (!t.encodings.length) {
  24. throw new Error('Encodings must be non-empty.');
  25. }
  26. data = t.data;
  27. for (let e of t.encodings) {
  28. encoding.push(e);
  29. }
  30. }
  31. if (!(data instanceof Uint8Array)) {
  32. throw new Error('The encoding must result in a Uint8Array. Fix your encoding chain.');
  33. }
  34. return {
  35. encoding,
  36. data
  37. }
  38. }
  39. constructor(private providers: ArrayEncoding.Provider[]) {
  40. }
  41. }
  42. export namespace ArrayEncoder {
  43. export function by(f: ArrayEncoding.Provider): ArrayEncoder {
  44. return new ArrayEncoderImpl([f]);
  45. }
  46. }
  47. export namespace ArrayEncoding {
  48. export type TypedArrayCtor = { new(size: number): ArrayLike<number> & { buffer: ArrayBuffer, byteLength: number, byteOffset: number, BYTES_PER_ELEMENT: number } }
  49. export interface Result {
  50. encodings: Encoding[],
  51. data: any
  52. }
  53. export type Provider = (data: any) => Result
  54. export function by(f: Provider): ArrayEncoder {
  55. return new ArrayEncoderImpl([f]);
  56. }
  57. function uint8(data: Uint8Array): Result {
  58. return {
  59. encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Uint8 }],
  60. data
  61. };
  62. }
  63. function int8(data: Int8Array): Result {
  64. return {
  65. encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Int8 }],
  66. data: new Uint8Array(data.buffer, data.byteOffset)
  67. };
  68. }
  69. const writers = {
  70. [Encoding.IntDataType.Int16]: function (v: DataView, i: number, a: number) { v.setInt16(2 * i, a, true) },
  71. [Encoding.IntDataType.Uint16]: function (v: DataView, i: number, a: number) { v.setUint16(2 * i, a, true) },
  72. [Encoding.IntDataType.Int32]: function (v: DataView, i: number, a: number) { v.setInt32(4 * i, a, true) },
  73. [Encoding.IntDataType.Uint32]: function (v: DataView, i: number, a: number) { v.setUint32(4 * i, a, true) },
  74. [Encoding.FloatDataType.Float32]: function (v: DataView, i: number, a: number) { v.setFloat32(4 * i, a, true) },
  75. [Encoding.FloatDataType.Float64]: function (v: DataView, i: number, a: number) { v.setFloat64(8 * i, a, true) }
  76. }
  77. const byteSizes = {
  78. [Encoding.IntDataType.Int16]: 2,
  79. [Encoding.IntDataType.Uint16]: 2,
  80. [Encoding.IntDataType.Int32]: 4,
  81. [Encoding.IntDataType.Uint32]: 4,
  82. [Encoding.FloatDataType.Float32]: 4,
  83. [Encoding.FloatDataType.Float64]: 8
  84. }
  85. export function byteArray(data: Encoding.FloatArray | Encoding.IntArray) {
  86. let type = Encoding.getDataType(data);
  87. if (type === Encoding.IntDataType.Int8) return int8(data as Int8Array);
  88. else if (type === Encoding.IntDataType.Uint8) return uint8(data as Uint8Array);
  89. let result = new Uint8Array(data.length * byteSizes[type]);
  90. let w = writers[type];
  91. let view = new DataView(result.buffer);
  92. for (let i = 0, n = data.length; i < n; i++) {
  93. w(view, i, data[i]);
  94. }
  95. return {
  96. encodings: [<Encoding.ByteArray>{ kind: 'ByteArray', type }],
  97. data: result
  98. };
  99. }
  100. function _fixedPoint(data: Encoding.FloatArray, factor: number): Result {
  101. let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
  102. let result = new Int32Array(data.length);
  103. for (let i = 0, n = data.length; i < n; i++) {
  104. result[i] = Math.round(data[i] * factor);
  105. }
  106. return {
  107. encodings: [{ kind: 'FixedPoint', factor, srcType }],
  108. data: result
  109. };
  110. }
  111. export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.FloatArray, factor); }
  112. function _intervalQuantizaiton(data: Encoding.FloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray): Result {
  113. let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
  114. if (!data.length) {
  115. return {
  116. encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
  117. data: new Int32Array(0)
  118. };
  119. }
  120. if (max < min) {
  121. let t = min;
  122. min = max;
  123. max = t;
  124. }
  125. let delta = (max - min) / (numSteps - 1);
  126. let output = new arrayType(data.length);
  127. for (let i = 0, n = data.length; i < n; i++) {
  128. let v = data[i];
  129. if (v <= min) output[i] = 0;
  130. else if (v >= max) output[i] = numSteps;
  131. else output[i] = (Math.round((v - min) / delta)) | 0;
  132. }
  133. return {
  134. encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
  135. data: output
  136. };
  137. }
  138. export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray = Int32Array): Provider {
  139. return data => _intervalQuantizaiton(data as Encoding.FloatArray, min, max, numSteps, arrayType);
  140. }
  141. export function runLength(data: Encoding.IntArray): Result {
  142. let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
  143. if (srcType === void 0) {
  144. data = new Int32Array(data);
  145. srcType = Encoding.IntDataType.Int32;
  146. }
  147. if (!data.length) {
  148. return {
  149. encodings: [{ kind: 'RunLength', srcType, srcSize: 0 }],
  150. data: new Int32Array(0)
  151. };
  152. }
  153. // calculate output size
  154. let fullLength = 2;
  155. for (let i = 1, il = data.length; i < il; i++) {
  156. if (data[i - 1] !== data[i]) {
  157. fullLength += 2;
  158. }
  159. }
  160. let output = new Int32Array(fullLength);
  161. let offset = 0;
  162. let runLength = 1;
  163. for (let i = 1, il = data.length; i < il; i++) {
  164. if (data[i - 1] !== data[i]) {
  165. output[offset] = data[i - 1];
  166. output[offset + 1] = runLength;
  167. runLength = 1;
  168. offset += 2;
  169. } else {
  170. ++runLength;
  171. }
  172. }
  173. output[offset] = data[data.length - 1];
  174. output[offset + 1] = runLength;
  175. return {
  176. encodings: [{ kind: 'RunLength', srcType, srcSize: data.length }],
  177. data: output
  178. };
  179. }
  180. export function delta(data: Int8Array | Int16Array | Int32Array): Result {
  181. if (!Encoding.isSignedIntegerDataType(data)) {
  182. throw new Error('Only signed integer types can be encoded using delta encoding.');
  183. }
  184. let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
  185. if (srcType === void 0) {
  186. data = new Int32Array(data);
  187. srcType = Encoding.IntDataType.Int32;
  188. }
  189. if (!data.length) {
  190. return {
  191. encodings: [{ kind: 'Delta', origin: 0, srcType }],
  192. data: new (data as any).constructor(0)
  193. };
  194. }
  195. let output = new (data as any).constructor(data.length);
  196. let origin = data[0];
  197. output[0] = data[0];
  198. for (let i = 1, n = data.length; i < n; i++) {
  199. output[i] = data[i] - data[i - 1];
  200. }
  201. output[0] = 0;
  202. return {
  203. encodings: [{ kind: 'Delta', origin, srcType }],
  204. data: output
  205. };
  206. }
  207. function isSigned(data: Int32Array) {
  208. for (let i = 0, n = data.length; i < n; i++) {
  209. if (data[i] < 0) return true;
  210. }
  211. return false;
  212. }
  213. function packingSize(data: Int32Array, upperLimit: number) {
  214. let lowerLimit = -upperLimit - 1;
  215. let size = 0;
  216. for (let i = 0, n = data.length; i < n; i++) {
  217. let value = data[i];
  218. if (value === 0) {
  219. size += 1;
  220. } else if (value > 0) {
  221. size += Math.ceil(value / upperLimit);
  222. if (value % upperLimit === 0) size += 1;
  223. } else {
  224. size += Math.ceil(value / lowerLimit);
  225. if (value % lowerLimit === 0) size += 1;
  226. }
  227. }
  228. return size;
  229. }
  230. function determinePacking(data: Int32Array): { isSigned: boolean, size: number, bytesPerElement: number } {
  231. let signed = isSigned(data);
  232. let size8 = signed ? packingSize(data, 0x7F) : packingSize(data, 0xFF);
  233. let size16 = signed ? packingSize(data, 0x7FFF) : packingSize(data, 0xFFFF);
  234. if (data.length * 4 < size16 * 2) {
  235. // 4 byte packing is the most effective
  236. return {
  237. isSigned: signed,
  238. size: data.length,
  239. bytesPerElement: 4
  240. };
  241. } else if (size16 * 2 < size8) {
  242. // 2 byte packing is the most effective
  243. return {
  244. isSigned: signed,
  245. size: size16,
  246. bytesPerElement: 2
  247. }
  248. } else {
  249. // 1 byte packing is the most effective
  250. return {
  251. isSigned: signed,
  252. size: size8,
  253. bytesPerElement: 1
  254. }
  255. };
  256. }
  257. function _integerPacking(data: Int32Array, packing: { isSigned: boolean, size: number, bytesPerElement: number }): Result {
  258. let upperLimit = packing.isSigned
  259. ? (packing.bytesPerElement === 1 ? 0x7F : 0x7FFF)
  260. : (packing.bytesPerElement === 1 ? 0xFF : 0xFFFF);
  261. let lowerLimit = -upperLimit - 1;
  262. let n = data.length;
  263. let packed = packing.isSigned
  264. ? packing.bytesPerElement === 1 ? new Int8Array(packing.size) : new Int16Array(packing.size)
  265. : packing.bytesPerElement === 1 ? new Uint8Array(packing.size) : new Uint16Array(packing.size);
  266. let j = 0;
  267. for (let i = 0; i < n; i++) {
  268. let value = data[i];
  269. if (value >= 0) {
  270. while (value >= upperLimit) {
  271. packed[j] = upperLimit;
  272. ++j;
  273. value -= upperLimit;
  274. }
  275. } else {
  276. while (value <= lowerLimit) {
  277. packed[j] = lowerLimit;
  278. ++j;
  279. value -= lowerLimit;
  280. }
  281. }
  282. packed[j] = value;
  283. ++j;
  284. }
  285. let result = byteArray(packed);
  286. return {
  287. encodings: [{
  288. kind: 'IntegerPacking',
  289. byteCount: packing.bytesPerElement,
  290. isUnsigned: !packing.isSigned,
  291. srcSize: n
  292. },
  293. result.encodings[0]
  294. ],
  295. data: result.data
  296. };
  297. }
  298. /**
  299. * Packs Int32 array. The packing level is determined automatically to either 1-, 2-, or 4-byte words.
  300. */
  301. export function integerPacking(data: Int32Array): Result {
  302. if (!(data instanceof Int32Array)) {
  303. throw new Error('Integer packing can only be applied to Int32 data.');
  304. }
  305. let packing = determinePacking(data);
  306. if (packing.bytesPerElement === 4) {
  307. // no packing done, Int32 encoding will be used
  308. return byteArray(data);
  309. }
  310. return _integerPacking(data, packing);
  311. }
  312. export function stringArray(data: string[]): Result {
  313. let map: any = Object.create(null);
  314. let strings: string[] = [];
  315. let accLength = 0;
  316. let offsets = ChunkedArray.create<number>(s => new Int32Array(s), 1, 1024, true)
  317. let output = new Int32Array(data.length);
  318. ChunkedArray.add(offsets, 0);
  319. let i = 0;
  320. for (let s of data) {
  321. // handle null strings.
  322. if (s === null || s === void 0) {
  323. output[i++] = -1;
  324. continue;
  325. }
  326. let index = map[s];
  327. if (index === void 0) {
  328. // increment the length
  329. accLength += s.length;
  330. // store the string and index
  331. index = strings.length;
  332. strings[index] = s;
  333. map[s] = index;
  334. // write the offset
  335. ChunkedArray.add(offsets, accLength);
  336. }
  337. output[i++] = index;
  338. }
  339. let encOffsets = ArrayEncoder.by(delta).and(integerPacking).encode(ChunkedArray.compact(offsets));
  340. let encOutput = ArrayEncoder.by(delta).and(runLength).and(integerPacking).encode(output);
  341. return {
  342. encodings: [{ kind: 'StringArray', dataEncoding: encOutput.encoding, stringData: strings.join(''), offsetEncoding: encOffsets.encoding, offsets: encOffsets.data }],
  343. data: encOutput.data
  344. };
  345. }
  346. }