data-model.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /**
  2. * Copyright (c) 2017-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { Column, ColumnHelpers, Table } from '../../../mol-data/db';
  8. import { Tensor } from '../../../mol-math/linear-algebra';
  9. import { getNumberType, NumberType, parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser';
  10. import { Encoding } from '../../common/binary-cif';
  11. import { Tokens } from '../common/text/tokenizer';
  12. import { areValuesEqualProvider } from '../common/text/column/token';
  13. export interface CifFile {
  14. readonly name?: string,
  15. readonly blocks: ReadonlyArray<CifBlock>
  16. }
  17. export function CifFile(blocks: ArrayLike<CifBlock>, name?: string): CifFile {
  18. return { name, blocks: blocks as any };
  19. }
  20. export interface CifFrame {
  21. readonly header: string,
  22. /** Category names, stored separately so that the ordering can be preserved. */
  23. readonly categoryNames: ReadonlyArray<string>,
  24. readonly categories: CifCategories
  25. }
  26. export interface CifBlock extends CifFrame {
  27. readonly saveFrames: CifFrame[]
  28. getField(name: string): CifField | undefined
  29. }
  30. export function CifBlock(categoryNames: string[], categories: CifCategories, header: string, saveFrames: CifFrame[] = []): CifBlock {
  31. return {
  32. categoryNames, header, categories, saveFrames,
  33. getField(name: string) {
  34. const [category, field] = name.split('.');
  35. return categories[category].getField(field || '');
  36. }
  37. };
  38. }
  39. export function CifSaveFrame(categoryNames: string[], categories: CifCategories, header: string): CifFrame {
  40. return { categoryNames, header, categories };
  41. }
  42. export type CifAliases = { readonly [name: string]: string[] }
  43. export type CifCategories = { readonly [name: string]: CifCategory }
  44. export interface CifCategory {
  45. readonly rowCount: number,
  46. readonly name: string,
  47. readonly fieldNames: ReadonlyArray<string>,
  48. getField(name: string): CifField | undefined
  49. }
  50. export function CifCategory(name: string, rowCount: number, fieldNames: string[], fields: { [name: string]: CifField }): CifCategory {
  51. return { rowCount, name, fieldNames: [...fieldNames], getField(name) { return fields[name]; } };
  52. }
  53. export namespace CifCategory {
  54. export function empty(name: string): CifCategory {
  55. return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } };
  56. };
  57. export type SomeFields<S> = { [P in keyof S]?: CifField }
  58. export type Fields<S> = { [P in keyof S]: CifField }
  59. export function ofFields(name: string, fields: { [name: string]: CifField | undefined }): CifCategory {
  60. const fieldNames = Object.keys(fields);
  61. return {
  62. rowCount: fieldNames.length > 0 ? fields[fieldNames[0]]!.rowCount : 0,
  63. name,
  64. fieldNames,
  65. getField(name) { return fields[name]; }
  66. };
  67. }
  68. export function ofTable(name: string, table: Table<any>) {
  69. const fields: { [name: string]: CifField | undefined } = {};
  70. for (const name of table._columns) {
  71. fields[name] = CifField.ofColumn(table[name]);
  72. }
  73. return ofFields(name, fields);
  74. }
  75. }
  76. /**
  77. * Implementation note:
  78. * Always implement without using "this." in any of the interface functions.
  79. * This is to ensure that the functions can invoked without having to "bind" them.
  80. */
  81. export interface CifField {
  82. readonly __array: ArrayLike<any> | undefined,
  83. readonly binaryEncoding: Encoding[] | undefined,
  84. readonly isDefined: boolean,
  85. readonly rowCount: number,
  86. str(row: number): string,
  87. int(row: number): number,
  88. float(row: number): number,
  89. valueKind(row: number): Column.ValueKind,
  90. areValuesEqual(rowA: number, rowB: number): boolean,
  91. toStringArray(params?: Column.ToArrayParams<string>): ReadonlyArray<string>,
  92. toIntArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>,
  93. toFloatArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>
  94. }
  95. export namespace CifField {
  96. export function ofString(value: string) {
  97. return ofStrings([value]);
  98. }
  99. export function ofStrings(values: ArrayLike<string>): CifField {
  100. const rowCount = values.length;
  101. const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };
  102. const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; };
  103. const float: CifField['float'] = row => { const v = values[row]; return fastParseFloat(v, 0, v.length) || 0; };
  104. const valueKind: CifField['valueKind'] = row => {
  105. const v = values[row], l = v.length;
  106. if (l > 1) return Column.ValueKind.Present;
  107. if (l === 0) return Column.ValueKind.NotPresent;
  108. const c = v.charCodeAt(0);
  109. if (c === 46 /* . */) return Column.ValueKind.NotPresent;
  110. if (c === 63 /* ? */) return Column.ValueKind.Unknown;
  111. return Column.ValueKind.Present;
  112. };
  113. return {
  114. __array: void 0,
  115. binaryEncoding: void 0,
  116. isDefined: true,
  117. rowCount,
  118. str,
  119. int,
  120. float,
  121. valueKind,
  122. areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
  123. toStringArray: params => params ? ColumnHelpers.createAndFillArray(rowCount, str, params) : values as string[],
  124. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
  125. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  126. };
  127. }
  128. export function ofNumbers(values: ArrayLike<number>): CifField {
  129. const rowCount = values.length;
  130. const str: CifField['str'] = row => { return '' + values[row]; };
  131. const float: CifField['float'] = row => values[row];
  132. const valueKind: CifField['valueKind'] = row => Column.ValueKind.Present;
  133. const toFloatArray = (params: Column.ToArrayParams<number>) => {
  134. if (!params || params.array && values instanceof params.array) {
  135. return values as number[];
  136. } else {
  137. return ColumnHelpers.createAndFillArray(rowCount, float, params);
  138. }
  139. };
  140. return {
  141. __array: void 0,
  142. binaryEncoding: void 0,
  143. isDefined: true,
  144. rowCount,
  145. str,
  146. int: float,
  147. float,
  148. valueKind,
  149. areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
  150. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  151. toIntArray: toFloatArray,
  152. toFloatArray
  153. };
  154. }
  155. export function ofTokens(tokens: Tokens): CifField {
  156. const { data, indices, count: rowCount } = tokens;
  157. const str: CifField['str'] = row => {
  158. const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
  159. if (ret === '.' || ret === '?') return '';
  160. return ret;
  161. };
  162. const int: CifField['int'] = row => {
  163. return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
  164. };
  165. const float: CifField['float'] = row => {
  166. return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
  167. };
  168. const valueKind: CifField['valueKind'] = row => {
  169. const s = indices[2 * row], l = indices[2 * row + 1] - s;
  170. if (l > 1) return Column.ValueKind.Present;
  171. if (l === 0) return Column.ValueKind.NotPresent;
  172. const v = data.charCodeAt(s);
  173. if (v === 46 /* . */) return Column.ValueKind.NotPresent;
  174. if (v === 63 /* ? */) return Column.ValueKind.Unknown;
  175. return Column.ValueKind.Present;
  176. };
  177. return {
  178. __array: void 0,
  179. binaryEncoding: void 0,
  180. isDefined: true,
  181. rowCount,
  182. str,
  183. int,
  184. float,
  185. valueKind,
  186. areValuesEqual: areValuesEqualProvider(tokens),
  187. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  188. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
  189. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  190. };
  191. }
  192. export function ofColumn(column: Column<any>): CifField {
  193. const { rowCount, valueKind, areValuesEqual, isDefined } = column;
  194. let str: CifField['str'];
  195. let int: CifField['int'];
  196. let float: CifField['float'];
  197. switch (column.schema.valueType) {
  198. case 'float':
  199. case 'int':
  200. str = row => { return '' + column.value(row); };
  201. int = column.value;
  202. float = column.value;
  203. break;
  204. case 'str':
  205. str = column.value;
  206. int = row => { const v = column.value(row); return fastParseInt(v, 0, v.length) || 0; };
  207. float = row => { const v = column.value(row); return fastParseFloat(v, 0, v.length) || 0; };
  208. break;
  209. case 'list':
  210. const { separator } = column.schema;
  211. str = row => column.value(row).join(separator);
  212. int = row => NaN;
  213. float = row => NaN;
  214. break;
  215. default:
  216. throw new Error(`unsupported valueType '${column.schema.valueType}'`);
  217. }
  218. return {
  219. __array: void 0,
  220. binaryEncoding: void 0,
  221. isDefined,
  222. rowCount,
  223. str,
  224. int,
  225. float,
  226. valueKind,
  227. areValuesEqual,
  228. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  229. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
  230. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  231. };
  232. }
  233. export function ofUndefined(rowCount: number, schema: Column.Schema): CifField {
  234. return ofColumn(Column.Undefined(rowCount, schema));
  235. }
  236. }
  237. export function tensorFieldNameGetter(field: string, rank: number, zeroIndexed: boolean, namingVariant: 'brackets' | 'underscore') {
  238. const offset = zeroIndexed ? 0 : 1;
  239. switch (rank) {
  240. case 1:
  241. return namingVariant === 'brackets'
  242. ? (i: number) => `${field}[${i + offset}]`
  243. : (i: number) => `${field}_${i + offset}`;
  244. case 2:
  245. return namingVariant === 'brackets'
  246. ? (i: number, j: number) => `${field}[${i + offset}][${j + offset}]`
  247. : (i: number, j: number) => `${field}_${i + offset}${j + offset}`;
  248. case 3:
  249. return namingVariant === 'brackets'
  250. ? (i: number, j: number, k: number) => `${field}[${i + offset}][${j + offset}][${k + offset}]`
  251. : (i: number, j: number, k: number) => `${field}_${i + offset}${j + offset}${k + offset}`;
  252. default:
  253. throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
  254. }
  255. }
  256. export function getTensor(category: CifCategory, space: Tensor.Space, row: number, getName: (...args: number[]) => string): Tensor.Data {
  257. const ret = space.create();
  258. if (space.rank === 1) {
  259. const rows = space.dimensions[0];
  260. for (let i = 0; i < rows; i++) {
  261. const f = category.getField(getName(i));
  262. space.set(ret, i, !!f ? f.float(row) : 0.0);
  263. }
  264. } else if (space.rank === 2) {
  265. const rows = space.dimensions[0], cols = space.dimensions[1];
  266. for (let i = 0; i < rows; i++) {
  267. for (let j = 0; j < cols; j++) {
  268. const f = category.getField(getName(i, j));
  269. space.set(ret, i, j, !!f ? f.float(row) : 0.0);
  270. }
  271. }
  272. } else if (space.rank === 3) {
  273. const d0 = space.dimensions[0], d1 = space.dimensions[1], d2 = space.dimensions[2];
  274. for (let i = 0; i < d0; i++) {
  275. for (let j = 0; j < d1; j++) {
  276. for (let k = 0; k < d2; k++) {
  277. const f = category.getField(getName(i, j, k));
  278. space.set(ret, i, j, k, !!f ? f.float(row) : 0.0);
  279. }
  280. }
  281. }
  282. } else {
  283. throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
  284. }
  285. return ret;
  286. }
  287. export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Schema.Float | Column.Schema.Str {
  288. let floatCount = 0, hasStringOrScientific = false, undefinedCount = 0;
  289. for (let i = 0, _i = field.rowCount; i < _i; i++) {
  290. const k = field.valueKind(i);
  291. if (k !== Column.ValueKind.Present) {
  292. undefinedCount++;
  293. continue;
  294. }
  295. const type = getNumberType(field.str(i));
  296. if (type === NumberType.Int) continue;
  297. else if (type === NumberType.Float) floatCount++;
  298. else { hasStringOrScientific = true; break; }
  299. }
  300. // numbers in scientific notation and plain text are not distinguishable
  301. if (hasStringOrScientific || undefinedCount === field.rowCount) return Column.Schema.str;
  302. if (floatCount > 0) return Column.Schema.float;
  303. return Column.Schema.int;
  304. }