Forráskód Böngészése

better handle scientific number type in getCifFieldType

Alexander Rose 5 éve
szülő
commit
e46a8c4369

+ 10 - 2
src/mol-io/reader/_spec/common.spec.ts

@@ -1,10 +1,10 @@
 /**
- * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import { parseFloat as fastParseFloat, parseInt as fastParseInt } from '../../../mol-io/reader/common/text/number-parser';
+import { parseFloat as fastParseFloat, parseInt as fastParseInt, getNumberType, NumberType } from '../../../mol-io/reader/common/text/number-parser';
 
 describe('common', () => {
     it('number-parser fastParseFloat', () => {
@@ -14,4 +14,12 @@ describe('common', () => {
     it('number-parser fastParseInt', () => {
         expect(fastParseInt('11(23)', 0, 11)).toBe(11)
     });
+
+    it('number-parser getNumberType', () => {
+        expect(getNumberType('11')).toBe(NumberType.Int)
+        expect(getNumberType('5E93')).toBe(NumberType.Scientific)
+        expect(getNumberType('0.42')).toBe(NumberType.Float)
+        expect(getNumberType('Foo123')).toBe(NumberType.NaN)
+        expect(getNumberType('11.0829(23)')).toBe(NumberType.NaN)
+    });
 });

+ 6 - 2
src/mol-io/reader/cif/data-model.ts

@@ -300,7 +300,7 @@ export function getTensor(category: CifCategory, field: string, space: Tensor.Sp
 }
 
 export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Schema.Float | Column.Schema.Str {
-    let floatCount = 0, hasString = false, undefinedCount = 0;
+    let floatCount = 0, scientificCount = 0, hasString = false, undefinedCount = 0;
     for (let i = 0, _i = field.rowCount; i < _i; i++) {
         const k = field.valueKind(i);
         if (k !== Column.ValueKind.Present) {
@@ -310,10 +310,14 @@ export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Sch
         const type = getNumberType(field.str(i));
         if (type === NumberType.Int) continue;
         else if (type === NumberType.Float) floatCount++;
+        else if (type === NumberType.Scientific) scientificCount++;
         else { hasString = true; break; }
     }
 
     if (hasString || undefinedCount === field.rowCount) return Column.Schema.str;
-    if (floatCount > 0) return Column.Schema.float;
+    // numbers in scientific notation and plain text are not distinguishable
+    // keep as text if there is no other number type
+    if (scientificCount === field.rowCount) return Column.Schema.str;
+    if (floatCount > 0 || scientificCount > 0) return Column.Schema.float;
     return Column.Schema.int;
 }

+ 6 - 3
src/mol-io/reader/common/text/number-parser.ts

@@ -1,8 +1,10 @@
 /**
- * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
- * from https://github.com/dsehnal/CIFTools.js
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ *
+ * based in part on https://github.com/dsehnal/CIFTools.js
  */
 
 /**
@@ -78,6 +80,7 @@ export function parseFloat(str: string, start: number, end: number) {
 export const enum NumberType {
     Int,
     Float,
+    Scientific,
     NaN
 }
 
@@ -94,7 +97,7 @@ function isInt(str: string, start: number, end: number) {
 function getNumberTypeScientific(str: string, start: number, end: number) {
     // handle + in '1e+1' separately.
     if (str.charCodeAt(start) === 43 /* + */) start++;
-    return isInt(str, start, end) ? NumberType.Float : NumberType.NaN;
+    return isInt(str, start, end) ? NumberType.Scientific : NumberType.NaN;
 }
 
 /** The whole range must match, otherwise returns NaN */