parser.ts 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /**
  2. * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  5. */
  6. import { ReaderResult as Result } from '../result'
  7. import { Task, RuntimeContext } from '../../../mol-task'
  8. import { PlyFile, PlyType, PlyElement } from './schema';
  9. import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer';
  10. import { Column } from '../../../mol-data/db';
  11. import { TokenColumn } from '../common/text/column/token';
  12. // TODO add support for binary ply files
  13. // TODO parse elements asynchronously
  14. // TODO handle lists with appended properties
  15. interface State {
  16. data: string
  17. tokenizer: Tokenizer
  18. runtimeCtx: RuntimeContext
  19. comments: string[]
  20. elementSpecs: ElementSpec[]
  21. elements: PlyElement[]
  22. }
  23. function State(data: string, runtimeCtx: RuntimeContext): State {
  24. const tokenizer = Tokenizer(data)
  25. return {
  26. data,
  27. tokenizer,
  28. runtimeCtx,
  29. comments: [],
  30. elementSpecs: [],
  31. elements: []
  32. }
  33. }
  34. type ColumnProperty = { kind: 'column', type: PlyType, name: string }
  35. type ListProperty = { kind: 'list', countType: PlyType, dataType: PlyType, name: string }
  36. type Property = ColumnProperty | ListProperty
  37. type TableElementSpec = { kind: 'table', name: string, count: number, properties: ColumnProperty[] }
  38. type ListElementSpec = { kind: 'list', name: string, count: number, property: ListProperty }
  39. type ElementSpec = TableElementSpec | ListElementSpec
  40. function markHeader(tokenizer: Tokenizer) {
  41. const endHeaderIndex = tokenizer.data.indexOf('end_header', tokenizer.position)
  42. if (endHeaderIndex === -1) throw new Error(`no 'end_header' record found`)
  43. // TODO set `tokenizer.lineNumber` correctly
  44. tokenizer.tokenStart = tokenizer.position
  45. tokenizer.tokenEnd = endHeaderIndex
  46. tokenizer.position = endHeaderIndex
  47. Tokenizer.eatLine(tokenizer)
  48. }
  49. function parseHeader(state: State) {
  50. const { tokenizer, comments, elementSpecs } = state
  51. markHeader(tokenizer)
  52. const headerLines = Tokenizer.getTokenString(tokenizer).split(/\r?\n/)
  53. if (headerLines[0] !== 'ply') throw new Error(`data not starting with 'ply'`)
  54. if (headerLines[1] !== 'format ascii 1.0') throw new Error(`format not 'ascii 1.0'`)
  55. let currentName: string | undefined
  56. let currentCount: number | undefined
  57. let currentProperties: Property[] | undefined
  58. function addCurrentElementSchema() {
  59. if (currentName !== undefined && currentCount !== undefined && currentProperties !== undefined) {
  60. let isList = false
  61. for (let i = 0, il = currentProperties.length; i < il; ++i) {
  62. const p = currentProperties[i]
  63. if (p.kind === 'list') {
  64. isList = true
  65. break
  66. }
  67. }
  68. if (isList && currentProperties.length !== 1) {
  69. // TODO handle lists with appended properties
  70. // currently only the list part will be accessible
  71. }
  72. if (isList) {
  73. elementSpecs.push({
  74. kind: 'list',
  75. name: currentName,
  76. count: currentCount,
  77. property: currentProperties[0] as ListProperty
  78. })
  79. } else {
  80. elementSpecs.push({
  81. kind: 'table',
  82. name: currentName,
  83. count: currentCount,
  84. properties: currentProperties as ColumnProperty[]
  85. })
  86. }
  87. }
  88. }
  89. for (let i = 2, il = headerLines.length; i < il; ++i) {
  90. const l = headerLines[i]
  91. const ls = l.split(' ')
  92. if (l.startsWith('comment')) {
  93. comments.push(l.substr(8))
  94. } else if (l.startsWith('element')) {
  95. addCurrentElementSchema()
  96. currentProperties = []
  97. currentName = ls[1]
  98. currentCount = parseInt(ls[2])
  99. } else if (l.startsWith('property')) {
  100. if (currentProperties === undefined) throw new Error(`properties outside of element`)
  101. if (ls[1] === 'list') {
  102. currentProperties.push({
  103. kind: 'list',
  104. countType: PlyType(ls[2]),
  105. dataType: PlyType(ls[3]),
  106. name: ls[4]
  107. })
  108. } else {
  109. currentProperties.push({
  110. kind: 'column',
  111. type: PlyType(ls[1]),
  112. name: ls[2]
  113. })
  114. }
  115. } else if (l.startsWith('end_header')) {
  116. addCurrentElementSchema()
  117. } else {
  118. console.warn('unknown header line')
  119. }
  120. }
  121. }
  122. function parseElements(state: State) {
  123. const { elementSpecs } = state
  124. for (let i = 0, il = elementSpecs.length; i < il; ++i) {
  125. const spec = elementSpecs[i]
  126. if (spec.kind === 'table') parseTableElement(state, spec)
  127. else if (spec.kind === 'list') parseListElement(state, spec)
  128. }
  129. }
  130. function getColumnSchema(type: PlyType): Column.Schema {
  131. switch (type) {
  132. case 'char': case 'uchar': case 'int8': case 'uint8':
  133. case 'short': case 'ushort': case 'int16': case 'uint16':
  134. case 'int': case 'uint': case 'int32': case 'uint32':
  135. return Column.Schema.int
  136. case 'float': case 'double': case 'float32': case 'float64':
  137. return Column.Schema.float
  138. }
  139. }
  140. function parseTableElement(state: State, spec: TableElementSpec) {
  141. const { elements, tokenizer } = state
  142. const { count, properties } = spec
  143. const propertyCount = properties.length
  144. const propertyNames: string[] = []
  145. const propertyTypes: PlyType[] = []
  146. const propertyTokens: Tokens[] = []
  147. const propertyColumns = new Map<string, Column<number>>()
  148. for (let i = 0, il = propertyCount; i < il; ++i) {
  149. const tokens = TokenBuilder.create(tokenizer.data, count * 2)
  150. propertyTokens.push(tokens)
  151. }
  152. for (let i = 0, il = count; i < il; ++i) {
  153. for (let j = 0, jl = propertyCount; j < jl; ++j) {
  154. Tokenizer.skipWhitespace(tokenizer)
  155. Tokenizer.markStart(tokenizer)
  156. Tokenizer.eatValue(tokenizer)
  157. TokenBuilder.addUnchecked(propertyTokens[j], tokenizer.tokenStart, tokenizer.tokenEnd)
  158. }
  159. }
  160. for (let i = 0, il = propertyCount; i < il; ++i) {
  161. const { type, name } = properties[i]
  162. const column = TokenColumn(propertyTokens[i], getColumnSchema(type))
  163. propertyNames.push(name)
  164. propertyTypes.push(type)
  165. propertyColumns.set(name, column)
  166. }
  167. elements.push({
  168. kind: 'table',
  169. rowCount: count,
  170. propertyNames,
  171. propertyTypes,
  172. getProperty: (name: string) => propertyColumns.get(name)
  173. })
  174. }
  175. function parseListElement(state: State, spec: ListElementSpec) {
  176. const { elements, tokenizer } = state
  177. const { count, property } = spec
  178. // initial tokens size assumes triangle index data
  179. const tokens = TokenBuilder.create(tokenizer.data, count * 2 * 3)
  180. const offsets = new Uint32Array(count + 1)
  181. let entryCount = 0
  182. for (let i = 0, il = count; i < il; ++i) {
  183. Tokenizer.skipWhitespace(tokenizer)
  184. Tokenizer.markStart(tokenizer)
  185. while (Tokenizer.skipWhitespace(tokenizer) !== 10) {
  186. ++entryCount
  187. Tokenizer.markStart(tokenizer)
  188. Tokenizer.eatValue(tokenizer)
  189. TokenBuilder.addToken(tokens, tokenizer)
  190. }
  191. offsets[i + 1] = entryCount
  192. }
  193. /** holds row value entries transiently */
  194. const listValue = {
  195. entries: [] as number[],
  196. count: 0
  197. }
  198. const column = TokenColumn(tokens, getColumnSchema(property.dataType))
  199. elements.push({
  200. kind: 'list',
  201. rowCount: count,
  202. name: property.name,
  203. type: property.dataType,
  204. value: (row: number) => {
  205. const offset = offsets[row] + 1
  206. const count = column.value(offset - 1)
  207. for (let i = offset, il = offset + count; i < il; ++i) {
  208. listValue.entries[i - offset] = column.value(i)
  209. }
  210. listValue.count = count
  211. return listValue
  212. }
  213. })
  214. }
  215. async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<PlyFile>> {
  216. const state = State(data, ctx);
  217. ctx.update({ message: 'Parsing...', current: 0, max: data.length });
  218. parseHeader(state)
  219. parseElements(state)
  220. const { elements, elementSpecs, comments } = state
  221. const elementNames = elementSpecs.map(s => s.name)
  222. const result = PlyFile(elements, elementNames, comments)
  223. return Result.success(result);
  224. }
  225. export function parse(data: string) {
  226. return Task.create<Result<PlyFile>>('Parse PLY', async ctx => {
  227. return await parseInternal(data, ctx)
  228. })
  229. }
  230. export default parse;