column.ts 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. /**
  2. * Copyright (c) 2017-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import * as ColumnHelpers from './column-helpers';
  8. import { Tensor as Tensors } from '../../mol-math/linear-algebra';
  9. import { Tokens } from '../../mol-io/reader/common/text/tokenizer';
  10. import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../mol-io/reader/common/text/number-parser';
  11. interface Column<T> {
  12. readonly schema: Column.Schema,
  13. readonly __array: ArrayLike<any> | undefined,
  14. readonly isDefined: boolean,
  15. readonly rowCount: number,
  16. value(row: number): T,
  17. valueKind(row: number): Column.ValueKind,
  18. toArray(params?: Column.ToArrayParams<T>): ArrayLike<T>,
  19. areValuesEqual(rowA: number, rowB: number): boolean
  20. }
  21. namespace Column {
  22. export type ArrayCtor<T> = { new(size: number): ArrayLike<T> }
  23. export type Schema<T = any> = Schema.Str | Schema.Int | Schema.Float | Schema.Coordinate | Schema.Aliased<T> | Schema.Tensor | Schema.List<number|string>
  24. export namespace Schema {
  25. // T also serves as a default value for undefined columns
  26. type Base<T extends string> = { valueType: T }
  27. export type Str = { '@type': 'str', T: string } & Base<'str'>
  28. export type Int = { '@type': 'int', T: number } & Base<'int'>
  29. export type Float = { '@type': 'float', T: number } & Base<'float'>
  30. export type Coordinate = { '@type': 'coord', T: number } & Base<'float'>
  31. export type Tensor = { '@type': 'tensor', T: Tensors.Data, space: Tensors.Space, baseType: Int | Float } & Base<'tensor'>
  32. export type Aliased<T> = { '@type': 'aliased', T: T } & Base<T extends string ? 'str' : 'int'>
  33. export type List<T extends number|string> = { '@type': 'list', T: T[], separator: string, itemParse: (x: string) => T } & Base<'list'>
  34. export const str: Str = { '@type': 'str', T: '', valueType: 'str' };
  35. export const int: Int = { '@type': 'int', T: 0, valueType: 'int' };
  36. export const coord: Coordinate = { '@type': 'coord', T: 0, valueType: 'float' };
  37. export const float: Float = { '@type': 'float', T: 0, valueType: 'float' };
  38. export function Str(defaultValue = ''): Str { return { '@type': 'str', T: defaultValue, valueType: 'str' }; };
  39. export function Int(defaultValue = 0): Int { return { '@type': 'int', T: defaultValue, valueType: 'int' }; };
  40. export function Float(defaultValue = 0): Float { return { '@type': 'float', T: defaultValue, valueType: 'float' }; };
  41. export function Tensor(space: Tensors.Space, baseType: Int | Float = float): Tensor { return { '@type': 'tensor', T: space.create(), space, valueType: 'tensor', baseType }; }
  42. export function Vector(dim: number, baseType: Int | Float = float): Tensor { return Tensor(Tensors.Vector(dim, baseType['@type'] === 'int' ? Int32Array : Float64Array), baseType); }
  43. export function Matrix(rows: number, cols: number, baseType: Int | Float = float): Tensor { return Tensor(Tensors.ColumnMajorMatrix(rows, cols, baseType['@type'] === 'int' ? Int32Array : Float64Array), baseType); }
  44. export function Aliased<T>(t: Str | Int, defaultValue?: T): Aliased<T> {
  45. if (typeof defaultValue !== 'undefined') return { ...t, T: defaultValue } as any as Aliased<T>;
  46. return t as any as Aliased<T>;
  47. }
  48. export function List<T extends number|string>(separator: string, itemParse: (x: string) => T, defaultValue: T[] = []): List<T> {
  49. return { '@type': 'list', T: defaultValue, separator, itemParse, valueType: 'list' };
  50. }
  51. }
  52. export interface ToArrayParams<T> {
  53. array?: ArrayCtor<T>,
  54. start?: number,
  55. /** Last row (exclusive) */
  56. end?: number
  57. }
  58. export interface LambdaSpec<T extends Schema> {
  59. value: (row: number) => T['T'],
  60. rowCount: number,
  61. schema: T,
  62. valueKind?: (row: number) => ValueKind,
  63. areValuesEqual?: (rowA: number, rowB: number) => boolean
  64. }
  65. export interface ArraySpec<T extends Schema> {
  66. array: ArrayLike<T['T']>,
  67. schema: T,
  68. valueKind?: (row: number) => ValueKind
  69. }
  70. export interface MapSpec<S extends Schema, T extends Schema> {
  71. f: (v: S['T']) => T['T'],
  72. schema: T,
  73. valueKind?: (row: number) => ValueKind,
  74. }
  75. export function is(v: any): v is Column<any> {
  76. return !!v && !!(v as Column<any>).schema && !!(v as Column<any>).value;
  77. }
  78. export const enum ValueKind {
  79. Present = 0,
  80. /** Expressed in CIF as `.` */
  81. NotPresent = 1,
  82. /** Expressed in CIF as `?` */
  83. Unknown = 2
  84. }
  85. export function Undefined<T extends Schema>(rowCount: number, schema: T): Column<T['T']> {
  86. return constColumn(schema['T'], rowCount, schema, ValueKind.NotPresent);
  87. }
  88. export function ofConst<T extends Schema>(v: T['T'], rowCount: number, type: T): Column<T['T']> {
  89. return constColumn(v, rowCount, type, ValueKind.Present);
  90. }
  91. export function ofLambda<T extends Schema>(spec: LambdaSpec<T>): Column<T['T']> {
  92. return lambdaColumn(spec);
  93. }
  94. /** values [min, max] (i.e. include both values) */
  95. export function range(min: number, max: number): Column<number> {
  96. return ofLambda({
  97. value: i => i + min,
  98. rowCount: Math.max(max - min + 1, 0),
  99. schema: Schema.int
  100. });
  101. }
  102. export function ofArray<T extends Column.Schema>(spec: Column.ArraySpec<T>): Column<T['T']> {
  103. return arrayColumn(spec);
  104. }
  105. export function ofIntArray(array: ArrayLike<number>) {
  106. return arrayColumn({ array, schema: Schema.int });
  107. }
  108. export function ofFloatArray(array: ArrayLike<number>) {
  109. return arrayColumn({ array, schema: Schema.float });
  110. }
  111. export function ofStringArray(array: ArrayLike<string>) {
  112. return arrayColumn({ array, schema: Schema.str });
  113. }
  114. export function ofStringAliasArray<T extends string>(array: ArrayLike<T>) {
  115. return arrayColumn<Schema.Aliased<T>>({ array, schema: Schema.Aliased(Schema.str) });
  116. }
  117. export function ofStringListArray<T extends string>(array: ArrayLike<T[]>, separator = ',') {
  118. return arrayColumn<Schema.List<T>>({ array, schema: Schema.List<T>(separator, x => x as T) });
  119. }
  120. export function ofIntTokens(tokens: Tokens) {
  121. const { count, data, indices } = tokens;
  122. return lambdaColumn({
  123. value: (row: number) => fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0,
  124. rowCount: count,
  125. schema: Schema.int,
  126. });
  127. }
  128. export function ofFloatTokens(tokens: Tokens) {
  129. const { count, data, indices } = tokens;
  130. return lambdaColumn({
  131. value: (row: number) => fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0,
  132. rowCount: count,
  133. schema: Schema.float,
  134. });
  135. }
  136. export function ofStringTokens(tokens: Tokens) {
  137. const { count, data, indices } = tokens;
  138. return lambdaColumn({
  139. value: (row: number) => {
  140. const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
  141. if (ret === '.' || ret === '?') return '';
  142. return ret;
  143. },
  144. rowCount: count,
  145. schema: Schema.str,
  146. });
  147. }
  148. export function window<T>(column: Column<T>, start: number, end: number) {
  149. return windowColumn(column, start, end);
  150. }
  151. export function view<T>(column: Column<T>, indices: ArrayLike<number>, checkIndentity = true) {
  152. return columnView(column, indices, checkIndentity);
  153. }
  154. /** A map of the 1st occurence of each value. */
  155. export function createFirstIndexMap<T>(column: Column<T>) {
  156. return createFirstIndexMapOfColumn(column);
  157. }
  158. export function createIndexer<T, R extends number = number>(column: Column<T>) {
  159. return createIndexerOfColumn(column) as ((e: T) => R);
  160. }
  161. export function mapToArray<T, S>(column: Column<T>, f: (v: T) => S, ctor?: ArrayCtor<S>): ArrayLike<S> {
  162. return mapToArrayImpl<T, S>(column, f, ctor || Array);
  163. }
  164. export function areEqual<T>(a: Column<T>, b: Column<T>) {
  165. return areColumnsEqual(a, b);
  166. }
  167. export function indicesOf<T>(c: Column<T>, test: (e: T) => boolean) {
  168. return columnIndicesOf(c, test);
  169. }
  170. /** Makes the column backed by an array. Useful for columns that are accessed often. */
  171. export function asArrayColumn<T>(c: Column<T>, array?: ArrayCtor<T>): Column<T> {
  172. if (c.__array) return c;
  173. if (!c.isDefined) return Undefined(c.rowCount, c.schema) as any as Column<T>;
  174. return arrayColumn({ array: c.toArray({ array }), schema: c.schema, valueKind: c.valueKind });
  175. }
  176. export function copyToArray<T extends number>(c: Column<T>, array: { [k: number]: T, length: number }, offset = 0) {
  177. if (!c.isDefined) return;
  178. const cArray = c.__array;
  179. if (cArray) {
  180. for (let i = 0, _i = cArray.length; i < _i; i++) array[offset + i] = cArray[i];
  181. } else {
  182. for (let i = 0, _i = c.rowCount; i < _i; i++) array[offset + i] = c.value(i);
  183. }
  184. }
  185. export function isIdentity<T extends number>(c: Column<T>) {
  186. for (let i = 0, _i = c.rowCount; i < _i; i++) {
  187. if (i !== c.value(i)) return false;
  188. }
  189. return true;
  190. }
  191. }
  192. export { Column };
  193. function createFirstIndexMapOfColumn<T>(c: Column<T>): Map<T, number> {
  194. const map = new Map<T, number>();
  195. for (let i = 0, _i = c.rowCount; i < _i; i++) {
  196. const v = c.value(i);
  197. if (!map.has(v)) map.set(c.value(i), i);
  198. }
  199. return map;
  200. }
  201. function createIndexerOfColumn<T>(c: Column<T>): (value: T) => number {
  202. const map = new Map<T, number>();
  203. for (let i = 0, _i = c.rowCount; i < _i; i++) {
  204. const v = c.value(i);
  205. if (!map.has(v)) map.set(c.value(i), i);
  206. }
  207. return v => map.has(v) ? map.get(v)! : -1;
  208. }
  209. function constColumn<T extends Column.Schema>(v: T['T'], rowCount: number, schema: T, valueKind: Column.ValueKind): Column<T['T']> {
  210. const value: Column<T['T']>['value'] = row => v;
  211. return {
  212. schema: schema,
  213. __array: void 0,
  214. isDefined: valueKind === Column.ValueKind.Present,
  215. rowCount,
  216. value,
  217. valueKind: row => valueKind,
  218. toArray: params => {
  219. const { array } = ColumnHelpers.createArray(rowCount, params);
  220. for (let i = 0, _i = array.length; i < _i; i++) array[i] = v;
  221. return array;
  222. },
  223. areValuesEqual: (rowA, rowB) => true
  224. };
  225. }
  226. function lambdaColumn<T extends Column.Schema>({ value, valueKind, areValuesEqual, rowCount, schema }: Column.LambdaSpec<T>): Column<T['T']> {
  227. return {
  228. schema: schema,
  229. __array: void 0,
  230. isDefined: true,
  231. rowCount,
  232. value,
  233. valueKind: valueKind ? valueKind : row => Column.ValueKind.Present,
  234. toArray: params => {
  235. const { array, start } = ColumnHelpers.createArray(rowCount, params);
  236. for (let i = 0, _i = array.length; i < _i; i++) array[i] = value(i + start);
  237. return array;
  238. },
  239. areValuesEqual: areValuesEqual ? areValuesEqual : (rowA, rowB) => value(rowA) === value(rowB)
  240. };
  241. }
  242. function arrayColumn<T extends Column.Schema>({ array, schema, valueKind }: Column.ArraySpec<T>): Column<T['T']> {
  243. const rowCount = array.length;
  244. const value: Column<T['T']>['value'] = schema.valueType === 'str'
  245. ? row => { const v = array[row]; return typeof v === 'string' ? v : '' + v; }
  246. : row => array[row];
  247. const isTyped = ColumnHelpers.isTypedArray(array);
  248. return {
  249. schema: schema,
  250. __array: array,
  251. isDefined: true,
  252. rowCount,
  253. value,
  254. valueKind: valueKind ? valueKind : row => Column.ValueKind.Present,
  255. toArray: schema.valueType === 'str'
  256. ? params => {
  257. const { start, end } = ColumnHelpers.getArrayBounds(rowCount, params);
  258. const ret = new (params && typeof params.array !== 'undefined' ? params.array : (array as any).constructor)(end - start) as any;
  259. for (let i = 0, _i = end - start; i < _i; i++) {
  260. const v = array[start + i];
  261. ret[i] = typeof v === 'string' ? v : '' + v;
  262. }
  263. return ret;
  264. }
  265. : isTyped
  266. ? params => ColumnHelpers.typedArrayWindow(array, params) as any as ReadonlyArray<T>
  267. : params => {
  268. const { start, end } = ColumnHelpers.getArrayBounds(rowCount, params);
  269. if (start === 0 && end === array.length) return array as ReadonlyArray<T['T']>;
  270. const ret = new (params && typeof params.array !== 'undefined' ? params.array : (array as any).constructor)(end - start) as any;
  271. for (let i = 0, _i = end - start; i < _i; i++) ret[i] = array[start + i];
  272. return ret;
  273. },
  274. areValuesEqual: (rowA, rowB) => array[rowA] === array[rowB]
  275. };
  276. }
  277. function windowColumn<T>(column: Column<T>, start: number, end: number): Column<T> {
  278. if (!column.isDefined) return Column.Undefined(end - start, column.schema);
  279. if (start === 0 && end === column.rowCount) return column;
  280. if (!!column.__array && ColumnHelpers.isTypedArray(column.__array)) return windowTyped(column, start, end);
  281. return windowFull(column, start, end);
  282. }
  283. function windowTyped<T>(c: Column<T>, start: number, end: number): Column<T> {
  284. const array = ColumnHelpers.typedArrayWindow(c.__array, { start, end });
  285. const vk = c.valueKind;
  286. return arrayColumn({ array, schema: c.schema, valueKind: row => vk(start + row) }) as any;
  287. }
  288. function windowFull<T>(c: Column<T>, start: number, end: number): Column<T> {
  289. const v = c.value, vk = c.valueKind, ave = c.areValuesEqual;
  290. const value: Column<T>['value'] = start === 0 ? v : row => v(row + start);
  291. const rowCount = end - start;
  292. return {
  293. schema: c.schema,
  294. __array: void 0,
  295. isDefined: c.isDefined,
  296. rowCount,
  297. value,
  298. valueKind: start === 0 ? vk : row => vk(row + start),
  299. toArray: params => {
  300. const { array } = ColumnHelpers.createArray(rowCount, params);
  301. for (let i = 0, _i = array.length; i < _i; i++) array[i] = v(i + start);
  302. return array;
  303. },
  304. areValuesEqual: start === 0 ? ave : (rowA, rowB) => ave(rowA + start, rowB + start)
  305. };
  306. }
  307. function isIdentity(map: ArrayLike<number>, rowCount: number) {
  308. if (map.length !== rowCount) return false;
  309. for (let i = 0, _i = map.length; i < _i; i++) {
  310. if (map[i] !== i) return false;
  311. }
  312. return true;
  313. }
  314. function columnView<T>(c: Column<T>, map: ArrayLike<number>, checkIdentity: boolean): Column<T> {
  315. if (c.rowCount === 0) return c;
  316. if (checkIdentity && isIdentity(map, c.rowCount)) return c;
  317. if (!!c.__array && typeof c.value(0) === typeof c.__array[0]) return arrayView(c, map);
  318. return viewFull(c, map);
  319. }
  320. function arrayView<T>(c: Column<T>, map: ArrayLike<number>): Column<T> {
  321. const array = c.__array!;
  322. const ret = new (array as any).constructor(map.length);
  323. for (let i = 0, _i = map.length; i < _i; i++) ret[i] = array[map[i]];
  324. const vk = c.valueKind;
  325. return arrayColumn({ array: ret, schema: c.schema, valueKind: row => vk(map[row]) });
  326. }
  327. function viewFull<T>(c: Column<T>, map: ArrayLike<number>): Column<T> {
  328. const v = c.value, vk = c.valueKind, ave = c.areValuesEqual;
  329. const value: Column<T>['value'] = row => v(map[row]);
  330. const rowCount = map.length;
  331. return {
  332. schema: c.schema,
  333. __array: void 0,
  334. isDefined: c.isDefined,
  335. rowCount,
  336. value,
  337. valueKind: row => vk(map[row]),
  338. toArray: params => {
  339. const { array } = ColumnHelpers.createArray(rowCount, params);
  340. for (let i = 0, _i = array.length; i < _i; i++) array[i] = v(map[i]);
  341. return array;
  342. },
  343. areValuesEqual: (rowA, rowB) => ave(map[rowA], map[rowB])
  344. };
  345. }
  346. function mapToArrayImpl<T, S>(c: Column<T>, f: (v: T) => S, ctor: Column.ArrayCtor<S>): ArrayLike<S> {
  347. const ret = new ctor(c.rowCount) as any;
  348. for (let i = 0, _i = c.rowCount; i < _i; i++) ret[i] = f(c.value(i));
  349. return ret;
  350. }
  351. function areColumnsEqual(a: Column<any>, b: Column<any>) {
  352. if (a === b) return true;
  353. if (a.rowCount !== b.rowCount || a.isDefined !== b.isDefined || a.schema.valueType !== b.schema.valueType) return false;
  354. if (!!a.__array && !!b.__array) return areArraysEqual(a, b);
  355. return areValuesEqual(a, b);
  356. }
  357. function areArraysEqual(a: Column<any>, b: Column<any>) {
  358. const xs = a.__array!, ys = b.__array!;
  359. for (let i = 0, _i = a.rowCount; i < _i; i++) {
  360. if (xs[i] !== ys[i]) return false;
  361. }
  362. return true;
  363. }
  364. function areValuesEqual(a: Column<any>, b: Column<any>) {
  365. const va = a.value, vb = b.value;
  366. for (let i = 0, _i = a.rowCount; i < _i; i++) {
  367. if (va(i) !== vb(i)) return false;
  368. }
  369. return true;
  370. }
  371. function columnIndicesOf<T>(c: Column<T>, test: (e: T) => boolean) {
  372. const ret = [], v = c.value;
  373. for (let i = 0, _i = c.rowCount; i < _i; i++) {
  374. if (test(v(i))) ret[ret.length] = i;
  375. }
  376. return ret;
  377. }