فهرست منبع

fix CIF text encoder edge cases & added test

dsehnal 3 سال پیش
والد
کامیت
40a4211e75
3فایلهای تغییر یافته به همراه60 افزوده شده و 22 حذف شده
  1. 5 0
      CHANGELOG.md
  2. 27 0
      src/mol-io/writer/_spec/cif.spec.ts
  3. 28 22
      src/mol-io/writer/cif/encoder/text.ts

+ 5 - 0
CHANGELOG.md

@@ -3,10 +3,15 @@ All notable changes to this project will be documented in this file, following t
 
 Note that since we don't clearly distinguish between a public and private interfaces there will be changes in non-major versions that are potentially breaking. If we make breaking changes to less used interfaces we will highlight it in here.
 
+
 ## [Unreleased]
 
+
+## [v2.0.7] - 2021-06-23
+
 - Add ability to specify ``volumeIndex`` in ``Viewer.loadVolumeFromUrl`` to better support Volume Server inputs.
 - Support in-place reordering for trajectory ``Frame.x/y/z`` arrays for better memory efficiency.
+- Fixed text CIF encoder edge cases (most notably single whitespace not being escaped).
 
 ## [v2.0.6] - 2021-06-01
 

+ 27 - 0
src/mol-io/writer/_spec/cif.spec.ts

@@ -3,7 +3,10 @@ import { CifWriter } from '../cif';
 import { decodeMsgPack } from '../../common/msgpack/decode';
 import { EncodedFile, EncodedCategory } from '../../common/binary-cif';
 import { Field } from '../../reader/cif/binary/field';
+import { TextEncoder } from '../cif/encoder/text';
 import * as C from '../cif/encoder';
+import { Column, Database, Table } from '../../../mol-data/db';
+import { parseCifText } from '../../reader/cif/text/parser';
 
 const cartn_x = Data.CifField.ofNumbers([1.001, 1.002, 1.003, 1.004, 1.005, 1.006, 1.007, 1.008, 1.009]);
 const cartn_y = Data.CifField.ofNumbers([-3.0, -2.666, -2.3333, -2.0, -1.666, -1.333, -1.0, -0.666, -0.333]);
@@ -38,6 +41,30 @@ const encoding_aware_encoder = CifWriter.createEncoder({
     ])
 });
 
+test('cif writer value escaping', async () => {
+    const values = ['1', ' ', '  ', ` ' `, `a'`, `b"`, `"`, '  a  ', `"'"`, `'"`, `\na`];
+    const table = Table.ofArrays({ values: Column.Schema.str }, { values });
+
+    const encoder = new TextEncoder();
+    C.Encoder.writeDatabase(encoder, 'test', Database.ofTables('test', { test: table._schema }, { test: table }));
+    encoder.encode();
+    const data = encoder.getData();
+
+    const result = await parseCifText(data).run();
+    if (result.isError) {
+        expect(false).toBe(true);
+        return;
+    }
+
+    const cat = result.result.blocks[0].categories['test'];
+    const parsed = cat.getField('values')?.toStringArray();
+
+    expect(values.length).toBe(parsed?.length);
+    for (let i = 0; i < values.length; i++) {
+        expect(values[i]).toBe(parsed?.[i]);
+    }
+});
+
 describe('encoding-config', () => {
     const decoded = process(encoding_aware_encoder);
 

+ 28 - 22
src/mol-io/writer/cif/encoder/text.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js)
  *
@@ -208,55 +208,61 @@ function writeChecked(builder: StringBuilder, val: string) {
         return false;
     }
 
-    let escape = val.charCodeAt(0) === 95 /* _ */, escapeCharStart = '\'', escapeCharEnd = '\' ';
-    let hasWhitespace = false;
-    let hasSingle = false;
-    let hasDouble = false;
-    for (let i = 0, _l = val.length - 1; i < _l; i++) {
+    const fst = val.charCodeAt(0);
+    let escape = false;
+    let escapeKind = 0; // 0 => ', 1 => "
+    let hasSingleQuote = false, hasDoubleQuote = false;
+    for (let i = 0, _l = val.length - 1; i <= _l; i++) {
         const c = val.charCodeAt(i);
 
         switch (c) {
-            case 9: hasWhitespace = true; break; // \t
+            case 9: // \t
+                escape = true;
+                break;
             case 10: // \n
                 writeMultiline(builder, val);
                 return true;
-            case 32: hasWhitespace = true; break; // ' '
+            case 32: // ' '
+                escape = true;
+                break;
             case 34: // "
-                if (hasSingle) {
+                // no need to escape quote if it's the last char and the length is > 1
+                if (i && i === _l) break;
+
+                if (hasSingleQuote) {
+                    // the string already has a " => use multiline value
                     writeMultiline(builder, val);
                     return true;
                 }
 
-                hasDouble = true;
+                hasDoubleQuote = true;
                 escape = true;
-                escapeCharStart = '\'';
-                escapeCharEnd = '\' ';
+                escapeKind = 0;
                 break;
             case 39: // '
-                if (hasDouble) {
+                // no need to escape quote if it's the last char and the length is > 1
+                if (i && i === _l) break;
+
+                if (hasDoubleQuote) {
                     writeMultiline(builder, val);
                     return true;
                 }
 
+                hasSingleQuote = true;
                 escape = true;
-                hasSingle = true;
-                escapeCharStart = '"';
-                escapeCharEnd = '" ';
+                escapeKind = 1;
                 break;
         }
     }
 
-    const fst = val.charCodeAt(0);
-    if (!escape && (fst === 35 /* # */|| fst === 36 /* $ */ || fst === 59 /* ; */ || fst === 91 /* [ */ || fst === 93 /* ] */ || hasWhitespace)) {
-        escapeCharStart = '\'';
-        escapeCharEnd = '\' ';
+    if (!escape && (fst === 35 /* # */ || fst === 36 /* $ */ || fst === 59 /* ; */ || fst === 91 /* [ */ || fst === 93 /* ] */ || fst === 95 /* _ */)) {
         escape = true;
     }
 
     if (escape) {
-        StringBuilder.writeSafe(builder, escapeCharStart);
+        StringBuilder.writeSafe(builder, escapeKind ? '"' : '\'');
         StringBuilder.writeSafe(builder, val);
-        StringBuilder.writeSafe(builder, escapeCharEnd);
+        StringBuilder.writeSafe(builder, escapeKind ? '" ' : '\' ');
     } else {
         StringBuilder.writeSafe(builder, val);
         StringBuilder.writeSafe(builder, ' ');