parser.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /**
  2. * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Schäfer, Marco <marco.schaefer@uni-tuebingen.de>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { Tokens, TokenBuilder, Tokenizer } from '../common/text/tokenizer'
  8. import * as Data from './schema'
  9. import{ ReaderResult } from '../result'
  10. import {Task, RuntimeContext, chunkedSubtask } from 'mol-task'
  11. import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser'
  12. const enum PlyTokenType {
  13. Value = 0,
  14. Comment = 1,
  15. End = 2,
  16. property = 3,
  17. element = 4
  18. }
  19. interface State {
  20. data: string;
  21. tokenizer: Tokenizer,
  22. tokenType: PlyTokenType;
  23. runtimeCtx: RuntimeContext,
  24. tokens: Tokens[],
  25. fieldCount: number,
  26. columnCount: number,
  27. propertyCount: number,
  28. vertexCount: number,
  29. currentVertex: number,
  30. currentProperty: number,
  31. currentFace: number,
  32. currentFaceElement: number,
  33. faceCount: number,
  34. endHeader: number,
  35. initialHead: string[],
  36. properties: number[],
  37. vertices: number[],
  38. colors: number[],
  39. normals: number[],
  40. faces: number[],
  41. propertyNames: string[],
  42. check: string[],
  43. commentCharCode: number,
  44. propertyCharCode: number,
  45. elementCharCode: number
  46. }
  47. function State(data: string, runtimeCtx: RuntimeContext, opts: PlyOptions): State {
  48. const tokenizer = Tokenizer(data)
  49. return {
  50. data,
  51. tokenizer,
  52. tokenType: PlyTokenType.End,
  53. runtimeCtx,
  54. tokens: [],
  55. fieldCount: 0,
  56. columnCount: 0,
  57. propertyCount: 0,
  58. vertexCount: 0,
  59. currentVertex: 0,
  60. currentProperty: 0,
  61. currentFace: 0,
  62. currentFaceElement: 0,
  63. faceCount: 0,
  64. endHeader: 0,
  65. initialHead: [],
  66. properties: [],
  67. vertices: [],
  68. colors: [],
  69. normals: [],
  70. faces: [],
  71. propertyNames: [],
  72. check: [],
  73. commentCharCode: opts.comment.charCodeAt(0),
  74. propertyCharCode: opts.property.charCodeAt(0),
  75. elementCharCode: opts.element.charCodeAt(0)
  76. };
  77. }
  78. /**
  79. * Eat everything until a delimiter (whitespace) or newline occurs.
  80. * Ignores whitespace at the end of the value, i.e. trim right.
  81. * Returns true when a newline occurs after the value.
  82. */
  83. function eatValue(state: Tokenizer) {
  84. while (state.position < state.length) {
  85. const c = state.data.charCodeAt(state.position);
  86. ++state.position
  87. switch (c) {
  88. case 10: // \n
  89. case 13: // \r
  90. return true;
  91. case 32: // ' ' Delimeter of ply is space (Unicode 32)
  92. return true;
  93. case 9: // \t
  94. case 32: // ' '
  95. break;
  96. default:
  97. ++state.tokenEnd;
  98. break;
  99. }
  100. }
  101. }
  102. function eatLine (state: Tokenizer) {
  103. while (state.position < state.length) {
  104. const c = state.data.charCodeAt(state.position);
  105. ++state.position
  106. switch (c) {
  107. case 10: // \n
  108. case 13: // \r
  109. return true;
  110. case 9: // \t
  111. break;
  112. default:
  113. ++state.tokenEnd;
  114. break;
  115. }
  116. }
  117. }
  118. function skipLine(state: Tokenizer) {
  119. while (state.position < state.length) {
  120. const c = state.data.charCodeAt(state.position);
  121. if (c === 10 || c === 13) return // \n or \r
  122. ++state.position
  123. }
  124. }
  125. function getColumns(state: State, numberOfColumns: number) {
  126. eatLine(state.tokenizer);
  127. let tmp = Tokenizer.getTokenString(state.tokenizer)
  128. let split = tmp.split(' ', numberOfColumns);
  129. return split;
  130. }
  131. /**
  132. * Move to the next token.
  133. * Returns true when the current char is a newline, i.e. indicating a full record.
  134. */
  135. function moveNextInternal(state: State) {
  136. const tokenizer = state.tokenizer
  137. if (tokenizer.position >= tokenizer.length) {
  138. state.tokenType = PlyTokenType.End;
  139. return true;
  140. }
  141. tokenizer.tokenStart = tokenizer.position;
  142. tokenizer.tokenEnd = tokenizer.position;
  143. const c = state.data.charCodeAt(tokenizer.position);
  144. switch (c) {
  145. case state.commentCharCode:
  146. state.tokenType = PlyTokenType.Comment;
  147. skipLine(tokenizer);
  148. break;
  149. case state.propertyCharCode: // checks all line beginning with 'p'
  150. state.check = getColumns(state, 3);
  151. if (state.check[0] !== 'ply' && state.faceCount === 0) {
  152. state.propertyNames.push(state.check[1]);
  153. state.propertyNames.push(state.check[2]);
  154. state.propertyCount++;
  155. }
  156. return;
  157. case state.elementCharCode: // checks all line beginning with 'e'
  158. state.check = getColumns(state, 3);
  159. if (state.check[1] === 'vertex') state.vertexCount= Number(state.check[2]);
  160. if (state.check[1] === 'face') state.faceCount = Number(state.check[2]);
  161. if (state.check[0] === 'end_header') state.endHeader = 1;
  162. return;
  163. default: // for all the other lines
  164. state.tokenType = PlyTokenType.Value;
  165. let return_value = eatValue(tokenizer);
  166. if (state.endHeader === 1) {
  167. if (state.currentVertex < state.vertexCount) {
  168. // TODO the numbers are parsed twice
  169. state.properties[state.currentVertex * state.propertyCount + state.currentProperty] = Number(Tokenizer.getTokenString(state.tokenizer));
  170. if (state.currentProperty < 3) {
  171. state.vertices[state.currentVertex * 3 + state.currentProperty] = fastParseFloat(state.tokenizer.data, state.tokenizer.tokenStart, state.tokenizer.tokenEnd);
  172. }
  173. if (state.currentProperty >= 3 && state.currentProperty < 6) {
  174. state.colors[state.currentVertex * 3 + state.currentProperty - 3] = fastParseInt(state.tokenizer.data, state.tokenizer.tokenStart, state.tokenizer.tokenEnd);
  175. }
  176. if (state.currentProperty >= 7 && state.currentProperty < 10) {
  177. state.normals[state.currentVertex * 3 + state.currentProperty - 7] = fastParseFloat(state.tokenizer.data, state.tokenizer.tokenStart, state.tokenizer.tokenEnd);
  178. }
  179. state.currentProperty++;
  180. if (state.currentProperty === state.propertyCount) {
  181. state.currentProperty = 0;
  182. state.currentVertex++;
  183. }
  184. return return_value;
  185. }
  186. if (state.currentFace < state.faceCount && state.currentVertex === state.vertexCount) {
  187. state.faces[state.currentFace * 4 + state.currentFaceElement] = fastParseInt(state.tokenizer.data, state.tokenizer.tokenStart, state.tokenizer.tokenEnd);
  188. state.currentFaceElement++;
  189. if (state.currentProperty === 4) {
  190. state.currentFaceElement = 0;
  191. state.currentFace++;
  192. }
  193. }
  194. }
  195. return return_value;
  196. }
  197. }
  198. /**
  199. * Moves to the next non-comment token/line.
  200. * Returns true when the current char is a newline, i.e. indicating a full record.
  201. */
  202. function moveNext(state: State) {
  203. let newRecord = moveNextInternal(state);
  204. while (state.tokenType === PlyTokenType.Comment) { // skip comment lines (marco)
  205. newRecord = moveNextInternal(state);
  206. }
  207. return newRecord
  208. }
  209. function readRecordsChunk(chunkSize: number, state: State) {
  210. if (state.tokenType === PlyTokenType.End) return 0
  211. moveNext(state);
  212. const { tokens, tokenizer } = state;
  213. let counter = 0;
  214. while (state.tokenType === PlyTokenType.Value && counter < chunkSize) {
  215. TokenBuilder.add(tokens[state.fieldCount % state.columnCount], tokenizer.tokenStart, tokenizer.tokenEnd);
  216. ++state.fieldCount
  217. moveNext(state);
  218. ++counter;
  219. }
  220. return counter;
  221. }
  222. function readRecordsChunks(state: State) {
  223. return chunkedSubtask(state.runtimeCtx, 100000, state, readRecordsChunk,
  224. (ctx, state) => ctx.update({ message: 'Parsing...', current: state.tokenizer.position, max: state.data.length }));
  225. }
  226. function addHeadEntry (state: State) {
  227. const head = Tokenizer.getTokenString(state.tokenizer)
  228. state.initialHead.push(head)
  229. state.tokens.push(TokenBuilder.create(head, state.data.length / 80))
  230. }
  231. function init(state: State) { // only for first two lines to get the format and the coding! (marco)
  232. let newRecord = moveNext(state)
  233. while (!newRecord) { // newRecord is only true when a newline occurs (marco)
  234. addHeadEntry(state)
  235. newRecord = moveNext(state);
  236. }
  237. addHeadEntry(state)
  238. newRecord = moveNext(state);
  239. while (!newRecord) {
  240. addHeadEntry(state)
  241. newRecord = moveNext(state);
  242. }
  243. addHeadEntry(state)
  244. if (state.initialHead[0] !== 'ply') {
  245. console.log('ERROR: this is not a .ply file!')
  246. throw new Error('this is not a .ply file!');
  247. return 0;
  248. }
  249. if (state.initialHead[2] !== 'ascii') {
  250. console.log('ERROR: only ASCII-DECODING is supported!');
  251. throw new Error('only ASCII-DECODING is supported!');
  252. return 0;
  253. }
  254. state.columnCount = state.initialHead.length
  255. return 1;
  256. }
  257. async function handleRecords(state: State): Promise<Data.PlyData> {
  258. if (!init(state)) {
  259. console.log('ERROR: parsing file (PLY) failed!')
  260. throw new Error('arsing file (PLY) failed!');
  261. }
  262. await readRecordsChunks(state)
  263. return Data.PlyData(state.vertexCount, state.faceCount, state.propertyCount, state.initialHead, state.propertyNames, state.properties, state.vertices, state.colors, state.normals, state.faces)
  264. }
  265. async function parseInternal(data: string, ctx: RuntimeContext, opts: PlyOptions): Promise<ReaderResult<Data.PlyFile>> {
  266. const state = State(data, ctx, opts);
  267. ctx.update({ message: 'Parsing...', current: 0, max: data.length });
  268. const PLYdata = await handleRecords(state)
  269. const result = Data.PlyFile(PLYdata)
  270. return ReaderResult.success(result);
  271. }
  272. interface PlyOptions {
  273. comment: string;
  274. property: string;
  275. element: string;
  276. }
  277. export function parse(data: string, opts?: Partial<PlyOptions>) {
  278. const completeOpts = Object.assign({}, { comment: 'c', property: 'p', element: 'e' }, opts)
  279. return Task.create<ReaderResult<Data.PlyFile>>('Parse PLY', async ctx => {
  280. return await parseInternal(data, ctx, completeOpts);
  281. });
  282. }
  283. export default parse;