data-model.ts 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /**
  2. * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { Column, ColumnHelpers } from 'mol-data/db'
  8. import { Tensor } from 'mol-math/linear-algebra'
  9. import { getNumberType, NumberType, parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser';
  10. import { Encoding } from '../../common/binary-cif';
  11. import { Tokens } from '../common/text/tokenizer';
  12. import { areValuesEqualProvider } from '../common/text/column/token';
  13. export interface CifFile {
  14. readonly name?: string,
  15. readonly blocks: ReadonlyArray<CifBlock>
  16. }
  17. export function CifFile(blocks: ArrayLike<CifBlock>, name?: string): CifFile {
  18. return { name, blocks: blocks as any };
  19. }
  20. export interface CifFrame {
  21. readonly header: string,
  22. // Category names stored separately so that the ordering can be preserved.
  23. readonly categoryNames: ReadonlyArray<string>,
  24. readonly categories: CifCategories
  25. }
  26. export interface CifBlock extends CifFrame {
  27. readonly saveFrames: CifFrame[]
  28. }
  29. export function CifBlock(categoryNames: string[], categories: CifCategories, header: string, saveFrames: CifFrame[] = []): CifBlock {
  30. return { categoryNames, header, categories, saveFrames };
  31. }
  32. export function CifSafeFrame(categoryNames: string[], categories: CifCategories, header: string): CifFrame {
  33. return { categoryNames, header, categories };
  34. }
  35. export type CifCategories = { readonly [name: string]: CifCategory }
  36. export interface CifCategory {
  37. readonly rowCount: number,
  38. readonly name: string,
  39. readonly fieldNames: ReadonlyArray<string>,
  40. getField(name: string): CifField | undefined
  41. }
  42. export function CifCategory(name: string, rowCount: number, fieldNames: string[], fields: { [name: string]: CifField }): CifCategory {
  43. return { rowCount, name, fieldNames: [...fieldNames], getField(name) { return fields[name]; } };
  44. }
  45. export namespace CifCategory {
  46. export function empty(name: string): CifCategory {
  47. return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } };
  48. };
  49. export type SomeFields<S> = { [P in keyof S]?: CifField }
  50. export type Fields<S> = { [P in keyof S]: CifField }
  51. export function ofFields(name: string, fields: { [name: string]: CifField | undefined }): CifCategory {
  52. const fieldNames = Object.keys(fields);
  53. return {
  54. rowCount: fieldNames.length > 0 ? fields[fieldNames[0]]!.rowCount : 0,
  55. name,
  56. fieldNames,
  57. getField(name) { return fields[name]; }
  58. };
  59. }
  60. }
  61. /**
  62. * Implementation note:
  63. * Always implement without using "this." in any of the interface functions.
  64. * This is to ensure that the functions can invoked without having to "bind" them.
  65. */
  66. export interface CifField {
  67. readonly __array: ArrayLike<any> | undefined,
  68. readonly binaryEncoding: Encoding[] | undefined,
  69. readonly isDefined: boolean,
  70. readonly rowCount: number,
  71. str(row: number): string,
  72. int(row: number): number,
  73. float(row: number): number,
  74. valueKind(row: number): Column.ValueKind,
  75. areValuesEqual(rowA: number, rowB: number): boolean,
  76. toStringArray(params?: Column.ToArrayParams<string>): ReadonlyArray<string>,
  77. toIntArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>,
  78. toFloatArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>
  79. }
  80. export namespace CifField {
  81. export function ofString(value: string) {
  82. return ofStrings([value]);
  83. }
  84. export function ofStrings(values: string[]): CifField {
  85. const rowCount = values.length;
  86. const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };
  87. const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; };
  88. const float: CifField['float'] = row => { const v = values[row]; return fastParseFloat(v, 0, v.length) || 0; };
  89. const valueKind: CifField['valueKind'] = row => {
  90. const v = values[row], l = v.length;
  91. if (l > 1) return Column.ValueKind.Present;
  92. if (l === 0) return Column.ValueKind.NotPresent;
  93. const c = v.charCodeAt(0);
  94. if (c === 46 /* . */) return Column.ValueKind.NotPresent;
  95. if (c === 63 /* ? */) return Column.ValueKind.Unknown;
  96. return Column.ValueKind.Present;
  97. };
  98. return {
  99. __array: void 0,
  100. binaryEncoding: void 0,
  101. isDefined: true,
  102. rowCount,
  103. str,
  104. int,
  105. float,
  106. valueKind,
  107. areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
  108. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  109. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
  110. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  111. }
  112. }
  113. export function ofNumbers(values: number[]): CifField {
  114. const rowCount = values.length;
  115. const str: CifField['str'] = row => { return '' + values[row]; };
  116. const float: CifField['float'] = row => values[row];
  117. const valueKind: CifField['valueKind'] = row => Column.ValueKind.Present;
  118. return {
  119. __array: void 0,
  120. binaryEncoding: void 0,
  121. isDefined: true,
  122. rowCount,
  123. str,
  124. int: float,
  125. float,
  126. valueKind,
  127. areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
  128. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  129. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params),
  130. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  131. }
  132. }
  133. export function ofTokens(tokens: Tokens): CifField {
  134. const { data, indices, count: rowCount } = tokens;
  135. const str: CifField['str'] = row => {
  136. const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
  137. if (ret === '.' || ret === '?') return '';
  138. return ret;
  139. };
  140. const int: CifField['int'] = row => {
  141. return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
  142. };
  143. const float: CifField['float'] = row => {
  144. return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
  145. };
  146. const valueKind: CifField['valueKind'] = row => {
  147. const s = indices[2 * row], l = indices[2 * row + 1] - s;
  148. if (l > 1) return Column.ValueKind.Present;
  149. if (l === 0) return Column.ValueKind.NotPresent;
  150. const v = data.charCodeAt(s);
  151. if (v === 46 /* . */) return Column.ValueKind.NotPresent;
  152. if (v === 63 /* ? */) return Column.ValueKind.Unknown;
  153. return Column.ValueKind.Present;
  154. };
  155. return {
  156. __array: void 0,
  157. binaryEncoding: void 0,
  158. isDefined: true,
  159. rowCount,
  160. str,
  161. int,
  162. float,
  163. valueKind,
  164. areValuesEqual: areValuesEqualProvider(tokens),
  165. toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
  166. toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
  167. toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
  168. }
  169. }
  170. }
  171. export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data {
  172. const ret = space.create();
  173. const offset = zeroIndexed ? 0 : 1;
  174. if (space.rank === 1) {
  175. const rows = space.dimensions[0];
  176. for (let i = 0; i < rows; i++) {
  177. const f = category.getField(`${field}[${i + offset}]`);
  178. space.set(ret, i, !!f ? f.float(row) : 0.0);
  179. }
  180. } else if (space.rank === 2) {
  181. const rows = space.dimensions[0], cols = space.dimensions[1];
  182. for (let i = 0; i < rows; i++) {
  183. for (let j = 0; j < cols; j++) {
  184. const f = category.getField(`${field}[${i + offset}][${j + offset}]`);
  185. space.set(ret, i, j, !!f ? f.float(row) : 0.0);
  186. }
  187. }
  188. } else if (space.rank === 3) {
  189. const d0 = space.dimensions[0], d1 = space.dimensions[1], d2 = space.dimensions[2];
  190. for (let i = 0; i < d0; i++) {
  191. for (let j = 0; j < d1; j++) {
  192. for (let k = 0; k < d2; k++) {
  193. const f = category.getField(`${field}[${i + offset}][${j + offset}][${k + offset}]`);
  194. space.set(ret, i, j, k, !!f ? f.float(row) : 0.0);
  195. }
  196. }
  197. }
  198. } else throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
  199. return ret;
  200. }
  201. export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Schema.Float | Column.Schema.Str {
  202. let floatCount = 0, hasString = false, undefinedCount = 0;
  203. for (let i = 0, _i = field.rowCount; i < _i; i++) {
  204. const k = field.valueKind(i);
  205. if (k !== Column.ValueKind.Present) {
  206. undefinedCount++;
  207. continue;
  208. }
  209. const type = getNumberType(field.str(i));
  210. if (type === NumberType.Int) continue;
  211. else if (type === NumberType.Float) floatCount++;
  212. else { hasString = true; break; }
  213. }
  214. if (hasString || undefinedCount === field.rowCount) return Column.Schema.str;
  215. if (floatCount > 0) return Column.Schema.float;
  216. return Column.Schema.int;
  217. }