encoder.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /**
  2. * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import Iterator from '../../../mol-data/iterator'
  8. import { Column, Table, Database, DatabaseCollection } from '../../../mol-data/db'
  9. import { Tensor } from '../../../mol-math/linear-algebra'
  10. import EncoderBase from '../encoder'
  11. import { ArrayEncoder, ArrayEncoding } from '../../common/binary-cif';
  12. // TODO: support for "coordinate fields", make "coordinate precision" a parameter of the encoder
  13. // TODO: automatically detect "precision" of floating point arrays.
  14. // TODO: automatically detect "best encoding" for integer arrays. This could be used for "fixed-point" as well.
  15. // TODO: add "repeat encoding"? [[1, 2], [1, 2], [1, 2]] --- Repeat ---> [[1, 2], 3]
  16. // TODO: Add "higher level fields"? (i.e. generalization of repeat)
  17. // TODO: align "data blocks" to 8 byte offsets for fast typed array windows? (prolly needs some testing if this is actually the case too)
  18. export interface Field<Key = any, Data = any> {
  19. name: string,
  20. type: Field.Type,
  21. value(key: Key, data: Data, index: number): string | number
  22. valueKind?: (key: Key, data: Data) => Column.ValueKind,
  23. defaultFormat?: Field.Format,
  24. shouldInclude?: (data: Data) => boolean
  25. }
  26. export namespace Field {
  27. export const enum Type { Str, Int, Float }
  28. export interface Format {
  29. digitCount?: number,
  30. encoder?: ArrayEncoder,
  31. typedArray?: ArrayEncoding.TypedArrayCtor
  32. }
  33. export type ParamsBase<K, D> = {
  34. valueKind?: (k: K, d: D) => Column.ValueKind,
  35. encoder?: ArrayEncoder,
  36. shouldInclude?: (data: D) => boolean
  37. }
  38. export function str<K, D = any>(name: string, value: (k: K, d: D, index: number) => string, params?: ParamsBase<K, D>): Field<K, D> {
  39. return { name, type: Type.Str, value, valueKind: params && params.valueKind, defaultFormat: params && params.encoder ? { encoder: params.encoder } : void 0, shouldInclude: params && params.shouldInclude };
  40. }
  41. export function int<K, D = any>(name: string, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor }): Field<K, D> {
  42. return {
  43. name,
  44. type: Type.Int,
  45. value,
  46. valueKind: params && params.valueKind,
  47. defaultFormat: params ? { encoder: params.encoder, typedArray: params.typedArray } : void 0,
  48. shouldInclude: params && params.shouldInclude
  49. };
  50. }
  51. export function float<K, D = any>(name: string, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor, digitCount?: number }): Field<K, D> {
  52. return {
  53. name,
  54. type: Type.Float,
  55. value,
  56. valueKind: params && params.valueKind,
  57. defaultFormat: params ? { encoder: params.encoder, typedArray: params.typedArray, digitCount: typeof params.digitCount !== 'undefined' ? params.digitCount : void 0 } : void 0,
  58. shouldInclude: params && params.shouldInclude
  59. };
  60. }
  61. export function index(name: string) {
  62. return int(name, (e, d, i) => i + 1, { typedArray: Int32Array, encoder: ArrayEncoding.by(ArrayEncoding.delta).and(ArrayEncoding.runLength).and(ArrayEncoding.integerPacking) })
  63. }
  64. export class Builder<K = number, D = any, N extends string = string> {
  65. private fields: Field<K, D>[] = [];
  66. index(name: N) {
  67. this.fields.push(Field.index(name));
  68. return this;
  69. }
  70. str(name: N, value: (k: K, d: D, index: number) => string, params?: ParamsBase<K, D>) {
  71. this.fields.push(Field.str(name, value, params));
  72. return this;
  73. }
  74. int(name: N, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor }) {
  75. this.fields.push(Field.int(name, value, params));
  76. return this;
  77. }
  78. vec(name: N, values: ((k: K, d: D, index: number) => number)[], params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor }) {
  79. for (let i = 0; i < values.length; i++) {
  80. this.fields.push(Field.int(`${name}[${i + 1}]`, values[i], params));
  81. }
  82. return this;
  83. }
  84. float(name: N, value: (k: K, d: D, index: number) => number, params?: ParamsBase<K, D> & { typedArray?: ArrayEncoding.TypedArrayCtor, digitCount?: number }) {
  85. this.fields.push(Field.float(name, value, params));
  86. return this;
  87. }
  88. many(fields: ArrayLike<Field<K, D>>) {
  89. for (let i = 0; i < fields.length; i++) this.fields.push(fields[i]);
  90. return this;
  91. }
  92. getFields() { return this.fields; }
  93. }
  94. export function build<K = number, D = any, N extends string = string>() {
  95. return new Builder<K, D, N>();
  96. }
  97. }
  98. export interface Category<Ctx = any> {
  99. name: string,
  100. instance(ctx: Ctx): Category.Instance
  101. }
  102. export namespace Category {
  103. export const Empty: Instance = { fields: [], source: [] };
  104. export interface DataSource<Key = any, Data = any> {
  105. data?: Data,
  106. rowCount: number,
  107. keys?: () => Iterator<Key>
  108. }
  109. export interface Instance<Key = any, Data = any> {
  110. fields: Field[],
  111. source: DataSource<Key, Data>[]
  112. }
  113. export interface Filter {
  114. includeCategory(categoryName: string): boolean,
  115. includeField(categoryName: string, fieldName: string): boolean,
  116. }
  117. export function filterOf(directives: string): Filter {
  118. const cat_whitelist: string[] = [];
  119. const cat_blacklist: string[] = [];
  120. const field_whitelist: string[] = [];
  121. const field_blacklist: string[] = [];
  122. for (let d of directives.split(/[\r\n]+/)) {
  123. d = d.trim();
  124. // allow for empty lines in config
  125. if (d.length === 0) continue;
  126. // let ! denote blacklisted entries
  127. const blacklist = /^!/.test(d);
  128. if (blacklist) d = d.substr(1);
  129. const split = d.split(/\./);
  130. const field = split[1];
  131. const list = blacklist ? (field ? field_blacklist : cat_blacklist) : (field ? field_whitelist : cat_whitelist);
  132. list[list.length] = d;
  133. // ensure categories are aware about whitelisted columns
  134. if (field && !cat_whitelist.includes(split[0])) {
  135. cat_whitelist[cat_whitelist.length] = split[0];
  136. }
  137. }
  138. const wlcatcol = field_whitelist.map(it => it.split('.')[0]);
  139. // blacklist has higher priority
  140. return {
  141. includeCategory(cat) {
  142. // block if category in black
  143. if (cat_blacklist.includes(cat)) {
  144. return false;
  145. } else {
  146. // if there is a whitelist, the category has to be explicitly allowed
  147. return cat_whitelist.length <= 0 ||
  148. // otherwise include if whitelist contains category
  149. cat_whitelist.indexOf(cat) !== -1;
  150. }
  151. },
  152. includeField(cat, field) {
  153. // column names are assumed to follow the pattern 'category_name.column_name'
  154. const full = cat + '.' + field;
  155. if (field_blacklist.includes(full)) {
  156. return false;
  157. } else {
  158. // if for this category no whitelist entries exist
  159. return !wlcatcol.includes(cat) ||
  160. // otherwise must be specifically allowed
  161. field_whitelist.includes(full);
  162. }
  163. }
  164. }
  165. }
  166. export const DefaultFilter: Filter = {
  167. includeCategory(cat) { return true; },
  168. includeField(cat, field) { return true; }
  169. }
  170. export interface Formatter {
  171. getFormat(categoryName: string, fieldName: string): Field.Format | undefined
  172. }
  173. export const DefaultFormatter: Formatter = {
  174. getFormat(cat, field) { return void 0; }
  175. }
  176. export function ofTable(table: Table, indices?: ArrayLike<number>): Category.Instance {
  177. if (indices) {
  178. return {
  179. fields: cifFieldsFromTableSchema(table._schema),
  180. source: [{ data: table, rowCount: indices.length, keys: () => Iterator.Array(indices) }]
  181. };
  182. }
  183. return {
  184. fields: cifFieldsFromTableSchema(table._schema),
  185. source: [{ data: table, rowCount: table._rowCount }]
  186. };
  187. }
  188. }
  189. export interface Encoder<T = string | Uint8Array> extends EncoderBase {
  190. setFilter(filter?: Category.Filter): void,
  191. isCategoryIncluded(name: string): boolean,
  192. setFormatter(formatter?: Category.Formatter): void,
  193. startDataBlock(header: string): void,
  194. writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx, options?: Encoder.WriteCategoryOptions): void,
  195. getData(): T
  196. }
  197. export namespace Encoder {
  198. export interface WriteCategoryOptions {
  199. ignoreFilter?: boolean
  200. }
  201. export function writeDatabase(encoder: Encoder, name: string, database: Database<Database.Schema>) {
  202. encoder.startDataBlock(name);
  203. for (const table of database._tableNames) {
  204. encoder.writeCategory({ name: table, instance: () => Category.ofTable(database[table]) });
  205. }
  206. }
  207. export function writeDatabaseCollection(encoder: Encoder, collection: DatabaseCollection<Database.Schema>) {
  208. for (const name of Object.keys(collection)) {
  209. writeDatabase(encoder, name, collection[name])
  210. }
  211. }
  212. }
  213. function columnValue(k: string) {
  214. return (i: number, d: any) => d[k].value(i);
  215. }
  216. function columnListValue(k: string) {
  217. return (i: number, d: any) => d[k].value(i).join(d[k].schema.separator);
  218. }
  219. function columnTensorValue(k: string, ...coords: number[]) {
  220. return (i: number, d: any) => d[k].schema.space.get(d[k].value(i), ...coords);
  221. }
  222. function columnValueKind(k: string) {
  223. return (i: number, d: any) => d[k].valueKind(i);
  224. }
  225. function getTensorDefinitions(field: string, space: Tensor.Space) {
  226. const fieldDefinitions: Field[] = []
  227. const type = Field.Type.Float
  228. const valueKind = columnValueKind(field)
  229. if (space.rank === 1) {
  230. const rows = space.dimensions[0]
  231. for (let i = 0; i < rows; i++) {
  232. const name = `${field}[${i + 1}]`
  233. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i), valueKind })
  234. }
  235. } else if (space.rank === 2) {
  236. const rows = space.dimensions[0], cols = space.dimensions[1]
  237. for (let i = 0; i < rows; i++) {
  238. for (let j = 0; j < cols; j++) {
  239. const name = `${field}[${i + 1}][${j + 1}]`
  240. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i, j), valueKind })
  241. }
  242. }
  243. } else if (space.rank === 3) {
  244. const d0 = space.dimensions[0], d1 = space.dimensions[1], d2 = space.dimensions[2]
  245. for (let i = 0; i < d0; i++) {
  246. for (let j = 0; j < d1; j++) {
  247. for (let k = 0; k < d2; k++) {
  248. const name = `${field}[${i + 1}][${j + 1}][${k + 1}]`
  249. fieldDefinitions.push({ name, type, value: columnTensorValue(field, i, j, k), valueKind })
  250. }
  251. }
  252. }
  253. } else {
  254. throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.')
  255. }
  256. return fieldDefinitions
  257. }
  258. function cifFieldsFromTableSchema(schema: Table.Schema) {
  259. const fields: Field[] = [];
  260. for (const k of Object.keys(schema)) {
  261. const t = schema[k];
  262. if (t.valueType === 'int') {
  263. fields.push({ name: k, type: Field.Type.Int, value: columnValue(k), valueKind: columnValueKind(k) });
  264. } else if (t.valueType === 'float') {
  265. fields.push({ name: k, type: Field.Type.Float, value: columnValue(k), valueKind: columnValueKind(k) });
  266. } else if (t.valueType === 'str') {
  267. fields.push({ name: k, type: Field.Type.Str, value: columnValue(k), valueKind: columnValueKind(k) });
  268. } else if (t.valueType === 'list') {
  269. fields.push({ name: k, type: Field.Type.Str, value: columnListValue(k), valueKind: columnValueKind(k) })
  270. } else if (t.valueType === 'tensor') {
  271. fields.push(...getTensorDefinitions(k, t.space))
  272. } else {
  273. throw new Error(`Unknown valueType ${t.valueType}`);
  274. }
  275. }
  276. return fields;
  277. }