parser.ts 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /**
  2. * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { ReaderResult as Result } from '../result'
  7. import { Task, RuntimeContext } from 'mol-task'
  8. import { PlyFile, PlyType, PlyElement } from './schema';
  9. import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer';
  10. import { Column } from 'mol-data/db';
  11. import { TokenColumn } from '../common/text/column/token';
  12. interface State {
  13. data: string
  14. tokenizer: Tokenizer
  15. runtimeCtx: RuntimeContext
  16. comments: string[]
  17. elementSpecs: ElementSpec[]
  18. elements: PlyElement[]
  19. }
  20. function State(data: string, runtimeCtx: RuntimeContext): State {
  21. const tokenizer = Tokenizer(data)
  22. return {
  23. data,
  24. tokenizer,
  25. runtimeCtx,
  26. comments: [],
  27. elementSpecs: [],
  28. elements: []
  29. }
  30. }
  31. type ColumnProperty = { kind: 'column', type: PlyType, name: string }
  32. type ListProperty = { kind: 'list', countType: PlyType, dataType: PlyType, name: string }
  33. type Property = ColumnProperty | ListProperty
  34. type TableElementSpec = { kind: 'table', name: string, count: number, properties: ColumnProperty[] }
  35. type ListElementSpec = { kind: 'list', name: string, count: number, property: ListProperty }
  36. type ElementSpec = TableElementSpec | ListElementSpec
  37. function markHeader(tokenizer: Tokenizer) {
  38. const endHeaderIndex = tokenizer.data.indexOf('end_header', tokenizer.position)
  39. if (endHeaderIndex === -1) throw new Error(`no 'end_header' record found`)
  40. // TODO set `tokenizer.lineNumber` correctly
  41. tokenizer.tokenStart = tokenizer.position
  42. tokenizer.tokenEnd = endHeaderIndex
  43. tokenizer.position = endHeaderIndex
  44. Tokenizer.eatLine(tokenizer)
  45. }
  46. function parseHeader(state: State) {
  47. const { tokenizer, comments, elementSpecs } = state
  48. markHeader(tokenizer)
  49. const headerLines = Tokenizer.getTokenString(tokenizer).split(/\r?\n/)
  50. if (headerLines[0] !== 'ply') throw new Error(`data not starting with 'ply'`)
  51. if (headerLines[1] !== 'format ascii 1.0') throw new Error(`format not 'ascii 1.0'`)
  52. let currentName: string | undefined
  53. let currentCount: number | undefined
  54. let currentProperties: Property[] | undefined
  55. function addCurrentElementSchema() {
  56. if (currentName !== undefined && currentCount !== undefined && currentProperties !== undefined) {
  57. let isList = false
  58. for (let i = 0, il = currentProperties.length; i < il; ++i) {
  59. const p = currentProperties[i]
  60. if (p.kind === 'list') {
  61. isList = true
  62. break
  63. }
  64. }
  65. if (isList && currentProperties.length !== 1) throw new Error('expected single list property')
  66. if (isList) {
  67. elementSpecs.push({
  68. kind: 'list',
  69. name: currentName,
  70. count: currentCount,
  71. property: currentProperties[0] as ListProperty
  72. })
  73. } else {
  74. elementSpecs.push({
  75. kind: 'table',
  76. name: currentName,
  77. count: currentCount,
  78. properties: currentProperties as ColumnProperty[]
  79. })
  80. }
  81. }
  82. }
  83. for (let i = 2, il = headerLines.length; i < il; ++i) {
  84. const l = headerLines[i]
  85. const ls = l.split(' ')
  86. if (l.startsWith('comment')) {
  87. comments.push(l.substr(8))
  88. } else if (l.startsWith('element')) {
  89. addCurrentElementSchema()
  90. currentProperties = []
  91. currentName = ls[1]
  92. currentCount = parseInt(ls[2])
  93. } else if (l.startsWith('property')) {
  94. if (currentProperties === undefined) throw new Error(`properties outside of element`)
  95. if (ls[1] === 'list') {
  96. currentProperties.push({
  97. kind: 'list',
  98. countType: PlyType(ls[2]),
  99. dataType: PlyType(ls[3]),
  100. name: ls[4]
  101. })
  102. } else {
  103. currentProperties.push({
  104. kind: 'column',
  105. type: PlyType(ls[1]),
  106. name: ls[2]
  107. })
  108. }
  109. } else if (l.startsWith('end_header')) {
  110. addCurrentElementSchema()
  111. } else {
  112. console.warn('unknown header line')
  113. }
  114. }
  115. }
  116. function parseElements(state: State) {
  117. const { elementSpecs } = state
  118. for (let i = 0, il = elementSpecs.length; i < il; ++i) {
  119. const spec = elementSpecs[i]
  120. if (spec.kind === 'table') parseTableElement(state, spec)
  121. else if (spec.kind === 'list') parseListElement(state, spec)
  122. }
  123. }
  124. function getColumnSchema(type: PlyType): Column.Schema {
  125. switch (type) {
  126. case 'char': case 'uchar': case 'int8': case 'uint8':
  127. case 'short': case 'ushort': case 'int16': case 'uint16':
  128. case 'int': case 'uint': case 'int32': case 'uint32':
  129. return Column.Schema.int
  130. case 'float': case 'double': case 'float32': case 'float64':
  131. return Column.Schema.float
  132. }
  133. }
  134. function parseTableElement(state: State, spec: TableElementSpec) {
  135. const { elements, tokenizer } = state
  136. const { count, properties } = spec
  137. const propertyCount = properties.length
  138. const propertyNames: string[] = []
  139. const propertyTypes: PlyType[] = []
  140. const propertyTokens: Tokens[] = []
  141. const propertyColumns = new Map<string, Column<number>>()
  142. for (let i = 0, il = propertyCount; i < il; ++i) {
  143. const tokens = TokenBuilder.create(tokenizer.data, count * 2)
  144. propertyTokens.push(tokens)
  145. }
  146. for (let i = 0, il = count; i < il; ++i) {
  147. for (let j = 0, jl = propertyCount; j < jl; ++j) {
  148. Tokenizer.skipWhitespace(tokenizer)
  149. Tokenizer.markStart(tokenizer)
  150. Tokenizer.eatValue(tokenizer)
  151. TokenBuilder.addUnchecked(propertyTokens[j], tokenizer.tokenStart, tokenizer.tokenEnd)
  152. }
  153. }
  154. for (let i = 0, il = propertyCount; i < il; ++i) {
  155. const { type, name } = properties[i]
  156. const column = TokenColumn(propertyTokens[i], getColumnSchema(type))
  157. propertyNames.push(name)
  158. propertyTypes.push(type)
  159. propertyColumns.set(name, column)
  160. }
  161. elements.push({
  162. kind: 'table',
  163. rowCount: count,
  164. propertyNames,
  165. propertyTypes,
  166. getProperty: (name: string) => propertyColumns.get(name)
  167. })
  168. }
  169. function parseListElement(state: State, spec: ListElementSpec) {
  170. const { elements, tokenizer } = state
  171. const { count, property } = spec
  172. // initial tokens size assumes triangle index data
  173. const tokens = TokenBuilder.create(tokenizer.data, count * 2 * 3)
  174. const offsets = new Uint32Array(count + 1)
  175. let entryCount = 0
  176. for (let i = 0, il = count; i < il; ++i) {
  177. // skip over row entry count as it is determined by line break
  178. Tokenizer.skipWhitespace(tokenizer)
  179. Tokenizer.eatValue(tokenizer)
  180. while (Tokenizer.skipWhitespace(tokenizer) !== 10) {
  181. ++entryCount
  182. Tokenizer.markStart(tokenizer)
  183. Tokenizer.eatValue(tokenizer)
  184. TokenBuilder.addToken(tokens, tokenizer)
  185. }
  186. offsets[i + 1] = entryCount
  187. }
  188. // console.log(tokens.indices)
  189. // console.log(offsets)
  190. /** holds row value entries transiently */
  191. const listValue = {
  192. entries: [] as number[],
  193. count: 0
  194. }
  195. const column = TokenColumn(tokens, getColumnSchema(property.dataType))
  196. elements.push({
  197. kind: 'list',
  198. rowCount: count,
  199. name: property.name,
  200. type: property.dataType,
  201. value: (row: number) => {
  202. const start = offsets[row]
  203. const end = offsets[row + 1]
  204. for (let i = start; i < end; ++i) {
  205. listValue.entries[i - start] = column.value(i)
  206. }
  207. listValue.count = end - start
  208. return listValue
  209. }
  210. })
  211. }
  212. async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<PlyFile>> {
  213. const state = State(data, ctx);
  214. ctx.update({ message: 'Parsing...', current: 0, max: data.length });
  215. parseHeader(state)
  216. // console.log(state.comments)
  217. // console.log(JSON.stringify(state.elementSpecs, undefined, 4))
  218. parseElements(state)
  219. const { elements, elementSpecs, comments } = state
  220. const elementNames = elementSpecs.map(s => s.name)
  221. const result = PlyFile(elements, elementNames, comments)
  222. return Result.success(result);
  223. }
  224. export function parse(data: string) {
  225. return Task.create<Result<PlyFile>>('Parse PLY', async ctx => {
  226. return await parseInternal(data, ctx)
  227. })
  228. }
  229. export default parse;