binary.ts 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /**
  2. * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js)
  5. *
  6. * @author David Sehnal <david.sehnal@gmail.com>
  7. */
  8. import { Column } from '../../../../mol-data/db'
  9. import encodeMsgPack from '../../../common/msgpack/encode'
  10. import {
  11. EncodedColumn, EncodedData, EncodedFile, EncodedDataBlock, EncodedCategory, ArrayEncoder, ArrayEncoding as E, VERSION
  12. } from '../../../common/binary-cif'
  13. import { Field, Category, Encoder } from '../encoder'
  14. import Writer from '../../writer'
  15. import { getIncludedFields, getCategoryInstanceData, CategoryInstanceData } from './util';
  16. import { classifyIntArray, classifyFloatArray } from '../../../common/binary-cif/classifier';
  17. import { ArrayCtor } from '../../../../mol-util/type-helpers';
  18. export interface BinaryEncodingProvider {
  19. get(category: string, field: string): ArrayEncoder | undefined;
  20. }
  21. export default class BinaryEncoder implements Encoder<Uint8Array> {
  22. private data: EncodedFile;
  23. private dataBlocks: EncodedDataBlock[] = [];
  24. private encodedData: Uint8Array;
  25. private filter: Category.Filter = Category.DefaultFilter;
  26. private formatter: Category.Formatter = Category.DefaultFormatter;
  27. binaryEncodingProvider: BinaryEncodingProvider | undefined = void 0;
  28. setFilter(filter?: Category.Filter) {
  29. this.filter = filter || Category.DefaultFilter;
  30. }
  31. isCategoryIncluded(name: string) {
  32. return this.filter.includeCategory(name);
  33. }
  34. setFormatter(formatter?: Category.Formatter) {
  35. this.formatter = formatter || Category.DefaultFormatter;
  36. }
  37. startDataBlock(header: string) {
  38. this.dataBlocks.push({
  39. header: (header || '').replace(/[ \n\t]/g, '').toUpperCase(),
  40. categories: []
  41. });
  42. }
  43. writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx, options?: Encoder.WriteCategoryOptions) {
  44. if (!this.data) {
  45. throw new Error('The writer contents have already been encoded, no more writing.');
  46. }
  47. if (!this.dataBlocks.length) {
  48. throw new Error('No data block created.');
  49. }
  50. if (!options?.ignoreFilter && !this.filter.includeCategory(category.name)) return;
  51. const { instance, rowCount, source } = getCategoryInstanceData(category, context);
  52. if (!rowCount) return;
  53. const cat: EncodedCategory = { name: '_' + category.name, columns: [], rowCount };
  54. const fields = getIncludedFields(instance);
  55. for (const f of fields) {
  56. if (!this.filter.includeField(category.name, f.name)) continue;
  57. const format = this.formatter.getFormat(category.name, f.name);
  58. cat.columns.push(encodeField(category.name, f, source, rowCount, format, this.binaryEncodingProvider, this.autoClassify));
  59. }
  60. // no columns included.
  61. if (!cat.columns.length) return;
  62. this.dataBlocks[this.dataBlocks.length - 1].categories.push(cat);
  63. }
  64. encode() {
  65. if (this.encodedData) return;
  66. this.encodedData = encodeMsgPack(this.data);
  67. this.data = <any>null;
  68. this.dataBlocks = <any>null;
  69. }
  70. writeTo(writer: Writer) {
  71. writer.writeBinary(this.encodedData);
  72. }
  73. getData() {
  74. this.encode();
  75. return this.encodedData;
  76. }
  77. getSize() {
  78. return this.encodedData.length;
  79. }
  80. constructor(encoder: string, encodingProvider: BinaryEncodingProvider | undefined, private autoClassify: boolean) {
  81. this.binaryEncodingProvider = encodingProvider;
  82. this.data = {
  83. encoder,
  84. version: VERSION,
  85. dataBlocks: this.dataBlocks
  86. };
  87. }
  88. }
  89. function getArrayCtor(field: Field, format: Field.Format | undefined): ArrayCtor<string | number> {
  90. if (format && format.typedArray) return format.typedArray;
  91. if (field.defaultFormat && field.defaultFormat.typedArray) return field.defaultFormat.typedArray;
  92. if (field.type === Field.Type.Str) return Array;
  93. if (field.type === Field.Type.Int) return Int32Array;
  94. return Float64Array;
  95. }
  96. function getDefaultEncoder(type: Field.Type): ArrayEncoder {
  97. if (type === Field.Type.Str) return ArrayEncoder.by(E.stringArray);
  98. return ArrayEncoder.by(E.byteArray);
  99. }
  100. function tryGetEncoder(categoryName: string, field: Field, format: Field.Format | undefined, provider: BinaryEncodingProvider | undefined) {
  101. if (format && format.encoder) {
  102. return format.encoder;
  103. } else if (field.defaultFormat && field.defaultFormat.encoder) {
  104. return field.defaultFormat.encoder;
  105. } else if (provider) {
  106. return provider.get(categoryName, field.name);
  107. } else {
  108. return void 0;
  109. }
  110. }
  111. function classify(type: Field.Type, data: ArrayLike<any>) {
  112. if (type === Field.Type.Str) return ArrayEncoder.by(E.stringArray);
  113. if (type === Field.Type.Int) return classifyIntArray(data);
  114. return classifyFloatArray(data);
  115. }
  116. function encodeField(categoryName: string, field: Field, data: CategoryInstanceData['source'], totalCount: number,
  117. format: Field.Format | undefined, encoderProvider: BinaryEncodingProvider | undefined, autoClassify: boolean): EncodedColumn {
  118. const { array, allPresent, mask } = getFieldData(field, getArrayCtor(field, format), totalCount, data);
  119. let encoder: ArrayEncoder | undefined = tryGetEncoder(categoryName, field, format, encoderProvider);
  120. if (!encoder) {
  121. if (autoClassify) encoder = classify(field.type, array);
  122. else encoder = getDefaultEncoder(field.type);
  123. }
  124. const encoded = encoder.encode(array);
  125. let maskData: EncodedData | undefined = void 0;
  126. if (!allPresent) {
  127. const maskRLE = ArrayEncoder.by(E.runLength).and(E.byteArray).encode(mask);
  128. if (maskRLE.data.length < mask.length) {
  129. maskData = maskRLE;
  130. } else {
  131. maskData = ArrayEncoder.by(E.byteArray).encode(mask);
  132. }
  133. }
  134. return {
  135. name: field.name,
  136. data: encoded,
  137. mask: maskData
  138. };
  139. }
  140. function getFieldData(field: Field<any, any>, arrayCtor: ArrayCtor<string | number>, totalCount: number, data: CategoryInstanceData['source']) {
  141. const isStr = field.type === Field.Type.Str;
  142. const array = new arrayCtor(totalCount);
  143. const mask = new Uint8Array(totalCount);
  144. const valueKind = field.valueKind;
  145. const getter = field.value;
  146. let allPresent = true;
  147. let offset = 0;
  148. for (let _d = 0; _d < data.length; _d++) {
  149. const d = data[_d].data;
  150. const keys = data[_d].keys();
  151. while (keys.hasNext) {
  152. const key = keys.move();
  153. const p = valueKind ? valueKind(key, d) : Column.ValueKind.Present;
  154. if (p !== Column.ValueKind.Present) {
  155. mask[offset] = p;
  156. if (isStr)
  157. array[offset] = '';
  158. allPresent = false;
  159. } else {
  160. mask[offset] = Column.ValueKind.Present;
  161. array[offset] = getter(key, d, offset);
  162. }
  163. offset++;
  164. }
  165. }
  166. return { array, allPresent, mask };
  167. }