encoder.ts 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. /**
  2. * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import Iterator from 'mol-data/iterator'
  8. import { Column, Table, Database, DatabaseCollection } from 'mol-data/db'
  9. import { Tensor } from 'mol-math/linear-algebra'
  10. import EncoderBase from '../encoder'
  11. import { ArrayEncoder, ArrayEncoding } from '../../common/binary-cif';
  12. // TODO: support for "coordinate fields", make "coordinate precision" a parameter of the encoder
  13. // TODO: automatically detect "precision" of floating point arrays.
  14. // TODO: automatically detect "best encoding" for integer arrays. This could be used for "fixed-point" as well.
  15. // TODO: add "repeat encoding"? [[1, 2], [1, 2], [1, 2]] --- Repeat ---> [[1, 2], 3]
  16. // TODO: Add "higher level fields"? (i.e. generalization of repeat)
  17. // TODO: align "data blocks" to 8 byte offsets for fast typed array windows? (prolly needs some testing if this is actually the case too)
  18. export interface Field<Key = any, Data = any> {
  19. name: string,
  20. type: Field.Type,
  21. value(key: Key, data: Data, index: number): string | number
  22. valueKind?: (key: Key, data: Data) => Column.ValueKind,
  23. defaultFormat?: Field.Format,
  24. shouldInclude?: (data: Data) => boolean
  25. }
  26. export namespace Field {
  27. export const enum Type { Str, Int, Float }
  28. export interface Format {
  29. digitCount?: number,
  30. encoder?: ArrayEncoder,
  31. typedArray?: ArrayEncoding.TypedArrayCtor
  32. }
  33. export type ParamsBase<K, D> = { valueKind?: (k: K, d: D) => Column.ValueKind, encoder?: ArrayEncoder, shouldInclude?: (data: D) => boolean }
  34. export function str<K, D = any>(name: string, value: (k: K, d: D, index: number) => string, params?: ParamsBase<K, D>): Field<K, D> {
  35. return { name, type: Type.Str, value, valueKind: params && params.valueKind, defaultFormat: params && params.encoder ? { encoder: params.encoder } : void 0, shouldInclude: params && params.shouldInclude };
  36. }
  37. export function int<K, D = any>(name: string, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor }): Field<K, D> {
  38. return {
  39. name,
  40. type: Type.Int,
  41. value,
  42. valueKind: params && params.valueKind,
  43. defaultFormat: params ? { encoder: params.encoder, typedArray: params.typedArray } : void 0,
  44. shouldInclude: params && params.shouldInclude
  45. };
  46. }
  47. export function float<K, D = any>(name: string, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor, digitCount?: number }): Field<K, D> {
  48. return {
  49. name,
  50. type: Type.Float,
  51. value,
  52. valueKind: params && params.valueKind,
  53. defaultFormat: params ? { encoder: params.encoder, typedArray: params.typedArray, digitCount: typeof params.digitCount !== 'undefined' ? params.digitCount : void 0 } : void 0,
  54. shouldInclude: params && params.shouldInclude
  55. };
  56. }
  57. export function index(name: string) {
  58. return int(name, (e, d, i) => i + 1, { typedArray: Int32Array, encoder: ArrayEncoding.by(ArrayEncoding.delta).and(ArrayEncoding.runLength).and(ArrayEncoding.integerPacking) })
  59. }
  60. }
  61. export interface Category<Key = any, Data = any> {
  62. name: string,
  63. fields: Field<Key, Data>[],
  64. data?: Data,
  65. rowCount: number,
  66. keys?: () => Iterator<Key>
  67. }
  68. export namespace Category {
  69. export const Empty: Category = { name: 'empty', rowCount: 0, fields: [] };
  70. export interface Provider<Ctx = any> {
  71. (ctx: Ctx): Category
  72. }
  73. export interface Filter {
  74. includeCategory(categoryName: string): boolean,
  75. includeField(categoryName: string, fieldName: string): boolean,
  76. }
  77. export const DefaultFilter: Filter = {
  78. includeCategory(cat) { return true; },
  79. includeField(cat, field) { return true; }
  80. }
  81. export interface Formatter {
  82. getFormat(categoryName: string, fieldName: string): Field.Format | undefined
  83. }
  84. export const DefaultFormatter: Formatter = {
  85. getFormat(cat, field) { return void 0; }
  86. }
  87. export function ofTable(name: string, table: Table<Table.Schema>, indices?: ArrayLike<number>): Category<number, Table<Table.Schema>> {
  88. if (indices) {
  89. return { name, fields: cifFieldsFromTableSchema(table._schema), data: table, rowCount: indices.length, keys: () => Iterator.Array(indices) };
  90. }
  91. return { name, fields: cifFieldsFromTableSchema(table._schema), data: table, rowCount: table._rowCount };
  92. }
  93. }
  94. export interface Encoder<T = string | Uint8Array> extends EncoderBase {
  95. setFilter(filter?: Category.Filter): void,
  96. setFormatter(formatter?: Category.Formatter): void,
  97. startDataBlock(header: string): void,
  98. writeCategory<Ctx>(category: Category.Provider<Ctx>, contexts?: Ctx[]): void,
  99. getData(): T
  100. }
  101. export namespace Encoder {
  102. export function writeDatabase(encoder: Encoder, name: string, database: Database<Database.Schema>) {
  103. encoder.startDataBlock(name);
  104. for (const table of database._tableNames) {
  105. encoder.writeCategory(() => Category.ofTable(table, database[table]));
  106. }
  107. }
  108. export function writeDatabaseCollection(encoder: Encoder, collection: DatabaseCollection<Database.Schema>) {
  109. for (const name of Object.keys(collection)) {
  110. writeDatabase(encoder, name, collection[name])
  111. }
  112. }
  113. }
  114. function columnValue(k: string) {
  115. return (i: number, d: any) => d[k].value(i);
  116. }
  117. function columnListValue(k: string) {
  118. return (i: number, d: any) => d[k].value(i).join(d[k].schema.separator);
  119. }
  120. function columnTensorValue(k: string, ...coords: number[]) {
  121. return (i: number, d: any) => d[k].schema.space.get(d[k].value(i), ...coords);
  122. }
  123. function columnValueKind(k: string) {
  124. return (i: number, d: any) => d[k].valueKind(i);
  125. }
  126. function getTensorDefinitions(field: string, space: Tensor.Space) {
  127. const fieldDefinitions: Field[] = []
  128. const type = Field.Type.Float
  129. const valueKind = columnValueKind(field)
  130. if (space.rank === 1) {
  131. const rows = space.dimensions[0]
  132. for (let i = 0; i < rows; i++) {
  133. const name = `${field}[${i + 1}]`
  134. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i), valueKind })
  135. }
  136. } else if (space.rank === 2) {
  137. const rows = space.dimensions[0], cols = space.dimensions[1]
  138. for (let i = 0; i < rows; i++) {
  139. for (let j = 0; j < cols; j++) {
  140. const name = `${field}[${i + 1}][${j + 1}]`
  141. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i, j), valueKind })
  142. }
  143. }
  144. } else if (space.rank === 3) {
  145. const d0 = space.dimensions[0], d1 = space.dimensions[1], d2 = space.dimensions[2]
  146. for (let i = 0; i < d0; i++) {
  147. for (let j = 0; j < d1; j++) {
  148. for (let k = 0; k < d2; k++) {
  149. const name = `${field}[${i + 1}][${j + 1}][${k + 1}]`
  150. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i, j, k), valueKind })
  151. }
  152. }
  153. }
  154. } else {
  155. throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.')
  156. }
  157. return fieldDefinitions
  158. }
  159. function cifFieldsFromTableSchema(schema: Table.Schema) {
  160. const fields: Field[] = [];
  161. for (const k of Object.keys(schema)) {
  162. const t = schema[k];
  163. if (t.valueType === 'int') {
  164. fields.push({ name: k, type: Field.Type.Int, value: columnValue(k), valueKind: columnValueKind(k) });
  165. } else if (t.valueType === 'float') {
  166. fields.push({ name: k, type: Field.Type.Float, value: columnValue(k), valueKind: columnValueKind(k) });
  167. } else if (t.valueType === 'str') {
  168. fields.push({ name: k, type: Field.Type.Str, value: columnValue(k), valueKind: columnValueKind(k) });
  169. } else if (t.valueType === 'list') {
  170. fields.push({ name: k, type: Field.Type.Str, value: columnListValue(k), valueKind: columnValueKind(k) })
  171. } else if (t.valueType === 'tensor') {
  172. fields.push(...getTensorDefinitions(k, t.space))
  173. } else {
  174. throw new Error(`Unknown valueType ${t.valueType}`);
  175. }
  176. }
  177. return fields;
  178. }