reader.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. /**
  2. * Copyright (c) 2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * Adapted from https://github.com/cheminfo-js/netcdfjs
  5. * MIT License, Copyright (c) 2016 cheminfo
  6. *
  7. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  8. */
  9. import { IOBuffer } from '../io-buffer';
  10. export interface NetCDFRecordDimension {
  11. length: number,
  12. id?: number,
  13. name?: string,
  14. recordStep?: number
  15. }
  16. export interface NetCDFVariable {
  17. name: string
  18. dimensions: any[]
  19. attributes: any[]
  20. type: string
  21. size: number
  22. offset: number
  23. record: boolean
  24. }
  25. export interface NetCDFHeader {
  26. recordDimension: NetCDFRecordDimension,
  27. version: number,
  28. dimensions: { name: string, size: number }[],
  29. globalAttributes: { name: string, type: string, value: string | number }[],
  30. variables: NetCDFVariable[]
  31. }
  32. export interface NetCDFDimension {
  33. name: string,
  34. size: number
  35. }
  36. /**
  37. * Throws a non-valid NetCDF exception if the statement it's true
  38. */
  39. function notNetcdf(statement: boolean, reason: string) {
  40. if (statement) {
  41. throw new TypeError('Not a valid NetCDF v3.x file: ' + reason);
  42. }
  43. }
  44. /**
  45. * Moves 1, 2, or 3 bytes to next 4-byte boundary
  46. */
  47. function padding(buffer: IOBuffer) {
  48. if ((buffer.offset % 4) !== 0) {
  49. buffer.skip(4 - (buffer.offset % 4));
  50. }
  51. }
  52. /**
  53. * Reads the name
  54. */
  55. function readName(buffer: IOBuffer) {
  56. // Read name
  57. const nameLength = buffer.readUint32();
  58. const name = buffer.readChars(nameLength);
  59. // validate name
  60. // TODO
  61. // Apply padding
  62. padding(buffer);
  63. return name;
  64. }
  65. const types = {
  66. BYTE: 1,
  67. CHAR: 2,
  68. SHORT: 3,
  69. INT: 4,
  70. FLOAT: 5,
  71. DOUBLE: 6
  72. };
  73. /**
  74. * Parse a number into their respective type
  75. */
  76. function num2str(type: number) {
  77. switch (Number(type)) {
  78. case types.BYTE:
  79. return 'byte';
  80. case types.CHAR:
  81. return 'char';
  82. case types.SHORT:
  83. return 'short';
  84. case types.INT:
  85. return 'int';
  86. case types.FLOAT:
  87. return 'float';
  88. case types.DOUBLE:
  89. return 'double';
  90. default:
  91. return 'undefined';
  92. }
  93. }
  94. /**
  95. * Parse a number type identifier to his size in bytes
  96. */
  97. function num2bytes(type: number) {
  98. switch (Number(type)) {
  99. case types.BYTE:
  100. return 1;
  101. case types.CHAR:
  102. return 1;
  103. case types.SHORT:
  104. return 2;
  105. case types.INT:
  106. return 4;
  107. case types.FLOAT:
  108. return 4;
  109. case types.DOUBLE:
  110. return 8;
  111. default:
  112. return -1;
  113. }
  114. }
  115. /**
  116. * Reverse search of num2str
  117. */
  118. function str2num(type: string) {
  119. switch (String(type)) {
  120. case 'byte':
  121. return types.BYTE;
  122. case 'char':
  123. return types.CHAR;
  124. case 'short':
  125. return types.SHORT;
  126. case 'int':
  127. return types.INT;
  128. case 'float':
  129. return types.FLOAT;
  130. case 'double':
  131. return types.DOUBLE;
  132. default:
  133. return -1;
  134. }
  135. }
  136. /**
  137. * Auxiliary function to read numeric data
  138. */
  139. function readNumber(size: number, bufferReader: Function) {
  140. if (size !== 1) {
  141. const numbers = new Array(size);
  142. for (let i = 0; i < size; i++) {
  143. numbers[i] = bufferReader();
  144. }
  145. return numbers;
  146. } else {
  147. return bufferReader();
  148. }
  149. }
  150. /**
  151. * Given a type and a size reads the next element
  152. */
  153. function readType(buffer: IOBuffer, type: number, size: number) {
  154. switch (type) {
  155. case types.BYTE:
  156. return buffer.readBytes(size);
  157. case types.CHAR:
  158. return trimNull(buffer.readChars(size));
  159. case types.SHORT:
  160. return readNumber(size, buffer.readInt16.bind(buffer));
  161. case types.INT:
  162. return readNumber(size, buffer.readInt32.bind(buffer));
  163. case types.FLOAT:
  164. return readNumber(size, buffer.readFloat32.bind(buffer));
  165. case types.DOUBLE:
  166. return readNumber(size, buffer.readFloat64.bind(buffer));
  167. default:
  168. notNetcdf(true, 'non valid type ' + type);
  169. return undefined;
  170. }
  171. }
  172. /**
  173. * Removes null terminate value
  174. */
  175. function trimNull(value: string) {
  176. if (value.charCodeAt(value.length - 1) === 0) {
  177. return value.substring(0, value.length - 1);
  178. }
  179. return value;
  180. }
  181. // const STREAMING = 4294967295;
  182. /**
  183. * Read data for the given non-record variable
  184. */
  185. function nonRecord(buffer: IOBuffer, variable: { type: string, size: number }) {
  186. // variable type
  187. const type = str2num(variable.type);
  188. // size of the data
  189. const size = variable.size / num2bytes(type);
  190. // iterates over the data
  191. const data = new Array(size);
  192. for (let i = 0; i < size; i++) {
  193. data[i] = readType(buffer, type, 1);
  194. }
  195. return data;
  196. }
  197. /**
  198. * Read data for the given record variable
  199. */
  200. function record(buffer: IOBuffer, variable: { type: string, size: number }, recordDimension: NetCDFRecordDimension) {
  201. // variable type
  202. const type = str2num(variable.type);
  203. const width = variable.size ? variable.size / num2bytes(type) : 1;
  204. // size of the data
  205. // TODO streaming data
  206. const size = recordDimension.length;
  207. // iterates over the data
  208. const data = new Array(size);
  209. const step = recordDimension.recordStep;
  210. for (let i = 0; i < size; i++) {
  211. const currentOffset = buffer.offset;
  212. data[i] = readType(buffer, type, width);
  213. buffer.seek(currentOffset + step!);
  214. }
  215. return data;
  216. }
  217. // Grammar constants
  218. const ZERO = 0;
  219. const NC_DIMENSION = 10;
  220. const NC_VARIABLE = 11;
  221. const NC_ATTRIBUTE = 12;
  222. /**
  223. * Read the header of the file
  224. * Returns object with the fields:
  225. * - `recordDimension`: Number with the length of record dimension
  226. * - `dimensions`: List of dimensions
  227. * - `globalAttributes`: List of global attributes
  228. * - `variables`: List of variables
  229. */
  230. function header(buffer: IOBuffer, version: number) {
  231. // Length of record dimension
  232. // sum of the varSize's of all the record variables.
  233. const header: Partial<NetCDFHeader> = { recordDimension: { length: buffer.readUint32() } };
  234. // Version
  235. header.version = version;
  236. // List of dimensions
  237. const dimList = dimensionsList(buffer) as { dimensions: NetCDFDimension[], recordId: number, recordName: string };
  238. header.recordDimension!.id = dimList.recordId;
  239. header.recordDimension!.name = dimList.recordName;
  240. header.dimensions = dimList.dimensions;
  241. // List of global attributes
  242. header.globalAttributes = attributesList(buffer);
  243. // List of variables
  244. const variables = variablesList(buffer, dimList.recordId, version) as { variables: any[], recordStep: number };
  245. header.variables = variables.variables;
  246. header.recordDimension!.recordStep = variables.recordStep;
  247. return header;
  248. }
  249. /**
  250. * List of dimensions
  251. */
  252. function dimensionsList(buffer: IOBuffer) {
  253. let dimensions: NetCDFDimension[], recordId, recordName;
  254. const dimList = buffer.readUint32();
  255. if (dimList === ZERO) {
  256. notNetcdf((buffer.readUint32() !== ZERO), 'wrong empty tag for list of dimensions');
  257. return [];
  258. } else {
  259. notNetcdf((dimList !== NC_DIMENSION), 'wrong tag for list of dimensions');
  260. // Length of dimensions
  261. const dimensionSize = buffer.readUint32();
  262. dimensions = new Array(dimensionSize);
  263. for (let dim = 0; dim < dimensionSize; dim++) {
  264. // Read name
  265. const name = readName(buffer);
  266. // Read dimension size
  267. const size = buffer.readUint32();
  268. if (size === 0) {
  269. recordId = dim;
  270. recordName = name;
  271. }
  272. dimensions[dim] = {
  273. name: name,
  274. size: size
  275. };
  276. }
  277. return {
  278. dimensions: dimensions,
  279. recordId: recordId,
  280. recordName: recordName
  281. };
  282. }
  283. }
  284. /**
  285. * List of attributes
  286. */
  287. function attributesList(buffer: IOBuffer) {
  288. let attributes: { name: string, type: ReturnType<typeof num2str>, value: any }[];
  289. const gAttList = buffer.readUint32();
  290. if (gAttList === ZERO) {
  291. notNetcdf((buffer.readUint32() !== ZERO), 'wrong empty tag for list of attributes');
  292. return [];
  293. } else {
  294. notNetcdf((gAttList !== NC_ATTRIBUTE), 'wrong tag for list of attributes');
  295. // Length of attributes
  296. const attributeSize = buffer.readUint32();
  297. attributes = new Array(attributeSize);
  298. for (let gAtt = 0; gAtt < attributeSize; gAtt++) {
  299. // Read name
  300. const name = readName(buffer);
  301. // Read type
  302. const type = buffer.readUint32();
  303. notNetcdf(((type < 1) || (type > 6)), 'non valid type ' + type);
  304. // Read attribute
  305. const size = buffer.readUint32();
  306. const value = readType(buffer, type, size);
  307. // Apply padding
  308. padding(buffer);
  309. attributes[gAtt] = {
  310. name: name,
  311. type: num2str(type),
  312. value: value
  313. };
  314. }
  315. }
  316. return attributes;
  317. }
  318. /**
  319. * List of variables
  320. */
  321. function variablesList(buffer: IOBuffer, recordId: number, version: number) {
  322. const varList = buffer.readUint32();
  323. let recordStep = 0;
  324. let variables;
  325. if (varList === ZERO) {
  326. notNetcdf(
  327. (buffer.readUint32() !== ZERO),
  328. 'wrong empty tag for list of variables'
  329. );
  330. return [];
  331. } else {
  332. notNetcdf((varList !== NC_VARIABLE), 'wrong tag for list of variables');
  333. // Length of variables
  334. const variableSize = buffer.readUint32();
  335. variables = new Array(variableSize);
  336. for (let v = 0; v < variableSize; v++) {
  337. // Read name
  338. const name = readName(buffer);
  339. // Read dimensionality of the variable
  340. const dimensionality = buffer.readUint32();
  341. // Index into the list of dimensions
  342. const dimensionsIds = new Array(dimensionality);
  343. for (let dim = 0; dim < dimensionality; dim++) {
  344. dimensionsIds[dim] = buffer.readUint32();
  345. }
  346. // Read variables size
  347. const attributes = attributesList(buffer);
  348. // Read type
  349. const type = buffer.readUint32();
  350. notNetcdf(((type < 1) && (type > 6)), 'non valid type ' + type);
  351. // Read variable size
  352. // The 32-bit varSize field is not large enough to contain the
  353. // size of variables that require more than 2^32 - 4 bytes,
  354. // so 2^32 - 1 is used in the varSize field for such variables.
  355. const varSize = buffer.readUint32();
  356. // Read offset
  357. let offset = buffer.readUint32();
  358. if (version === 2) {
  359. notNetcdf((offset > 0), 'offsets larger than 4GB not supported');
  360. offset = buffer.readUint32();
  361. }
  362. // Count amount of record variables
  363. if (dimensionsIds[0] === recordId) {
  364. recordStep += varSize;
  365. }
  366. variables[v] = {
  367. name: name,
  368. dimensions: dimensionsIds,
  369. attributes: attributes,
  370. type: num2str(type),
  371. size: varSize,
  372. offset: offset,
  373. record: (dimensionsIds[0] === recordId)
  374. };
  375. }
  376. }
  377. return {
  378. variables: variables,
  379. recordStep: recordStep
  380. };
  381. }
  382. /**
  383. * Reads a NetCDF v3.x file
  384. * https://www.unidata.ucar.edu/software/netcdf/docs/file_format_specifications.html
  385. */
  386. export class NetcdfReader {
  387. header: Partial<NetCDFHeader>;
  388. buffer: IOBuffer;
  389. constructor(data: ArrayBuffer) {
  390. const buffer = new IOBuffer(data);
  391. buffer.setBigEndian();
  392. // Validate that it's a NetCDF file
  393. notNetcdf((buffer.readChars(3) !== 'CDF'), 'should start with CDF');
  394. // Check the NetCDF format
  395. const version = buffer.readByte();
  396. notNetcdf((version > 2), 'unknown version');
  397. // Read the header
  398. this.header = header(buffer, version);
  399. this.buffer = buffer;
  400. }
  401. /**
  402. * Version for the NetCDF format
  403. */
  404. get version() {
  405. if (this.header.version === 1) {
  406. return 'classic format';
  407. } else {
  408. return '64-bit offset format';
  409. }
  410. }
  411. get recordDimension() {
  412. return this.header.recordDimension;
  413. }
  414. get dimensions() {
  415. return this.header.dimensions;
  416. }
  417. get globalAttributes() {
  418. return this.header.globalAttributes;
  419. }
  420. get variables() {
  421. return this.header.variables;
  422. }
  423. /**
  424. * Checks if a variable is available
  425. * @param {string|object} variableName - Name of the variable to check
  426. * @return {Boolean} - Variable existence
  427. */
  428. hasDataVariable(variableName: string) {
  429. return this.header.variables && this.header.variables.findIndex(val => val.name === variableName) !== -1;
  430. }
  431. /**
  432. * Retrieves the data for a given variable
  433. * @param {string|object} variableName - Name of the variable to search or variable object
  434. * @return {Array} - List with the variable values
  435. */
  436. getDataVariable(variableName: string | NetCDFVariable) {
  437. let variable: NetCDFVariable | undefined;
  438. if (typeof variableName === 'string') {
  439. // search the variable
  440. variable = this.header.variables?.find((val) => val.name === variableName);
  441. } else {
  442. variable = variableName;
  443. }
  444. // throws if variable not found
  445. if (variable === undefined) throw new Error('variable not found');
  446. // go to the offset position
  447. this.buffer.seek(variable.offset);
  448. if (variable.record) {
  449. // record variable case
  450. return record(this.buffer, variable, this.header.recordDimension!);
  451. } else {
  452. // non-record variable case
  453. return nonRecord(this.buffer, variable);
  454. }
  455. }
  456. }