Pārlūkot izejas kodu

mol-io: CIF triple quote support

David Sehnal 5 gadi atpakaļ
vecāks
revīzija
b864e992ef

+ 19 - 0
src/mol-io/reader/_spec/cif.spec.ts

@@ -8,6 +8,7 @@
 import * as Data from '../cif/data-model'
 import * as Schema from '../cif/schema'
 import { Column } from '../../../mol-data/db'
+import parse from '../cif/text/parser';
 
 const columnData = `123abc d,e,f '4 5 6'`;
 // 123abc d,e,f '4 5 6'
@@ -36,6 +37,24 @@ namespace TestSchema {
     export const schema = { test }
 }
 
+test('cif triple quote', async () => {
+    const data = `data_test
+_test.field1 '''123 " '' 1'''
+_test.field2 ''' c glide reflection through the plane (x,1/4,z)
+chosen as one of the generators of the space group'''`;
+
+    const result = await parse(data).run();
+    if (result.isError) {
+        expect(false).toBe(true);
+        return;
+    }
+
+    const cat = result.result.blocks[0].categories['test'];
+    expect(cat.getField('field1')!.str(0)).toBe(`123 " '' 1`);
+    expect(cat.getField('field2')!.str(0)).toBe(` c glide reflection through the plane (x,1/4,z)
+chosen as one of the generators of the space group`);
+});
+
 describe('schema', () => {
     const db = Schema.toDatabase(TestSchema.schema, testBlock);
     it('property access', () => {

+ 2 - 2
src/mol-io/reader/cif.ts

@@ -14,7 +14,7 @@ import { CCD_Schema, CCD_Database } from './cif/schema/ccd'
 import { BIRD_Schema, BIRD_Database } from './cif/schema/bird'
 import { dic_Schema, dic_Database } from './cif/schema/dic'
 import { DensityServer_Data_Schema, DensityServer_Data_Database } from './cif/schema/density-server'
-import { cifCore_Database, cifCore_Schema, cifCore_Aliases } from './cif/schema/cif-core'
+import { CifCore_Database, CifCore_Schema, CifCore_Aliases } from './cif/schema/cif-core'
 
 export const CIF = {
     parse: (data: string|Uint8Array) => typeof data === 'string' ? parseText(data) : parseBinary(data),
@@ -27,7 +27,7 @@ export const CIF = {
         CCD: (frame: CifFrame) => toDatabase<CCD_Schema, CCD_Database>(CCD_Schema, frame),
         BIRD: (frame: CifFrame) => toDatabase<BIRD_Schema, BIRD_Database>(BIRD_Schema, frame),
         dic: (frame: CifFrame) => toDatabase<dic_Schema, dic_Database>(dic_Schema, frame),
-        cifCore: (frame: CifFrame) => toDatabase<cifCore_Schema, cifCore_Database>(cifCore_Schema, frame, cifCore_Aliases),
+        cifCore: (frame: CifFrame) => toDatabase<CifCore_Schema, CifCore_Database>(CifCore_Schema, frame, CifCore_Aliases),
         densityServer: (frame: CifFrame) => toDatabase<DensityServer_Data_Schema, DensityServer_Data_Database>(DensityServer_Data_Schema, frame),
     }
 }

+ 40 - 1
src/mol-io/reader/cif/text/parser.ts

@@ -131,6 +131,28 @@ function eatEscaped(state: TokenizerState, esc: number) {
     state.tokenEnd = state.position;
 }
 
+/**
+ * Eats an escaped value "triple quote" (''') value.
+ */
+function eatTripleQuote(state: TokenizerState) {
+    // skip the '''
+    state.position += 3;
+    while (state.position < state.length) {
+        if (state.data.charCodeAt(state.position) === 39 /* ' */ && isTripleQuoteAtPosition(state)) {
+            // get rid of the quotes.
+            state.tokenStart += 3;
+            state.tokenEnd = state.position;
+            state.isEscaped = true;
+            state.position += 3;
+            return;
+        }
+
+        ++state.position;
+    }
+
+    state.tokenEnd = state.position;
+}
+
 /**
  * Eats a multiline token of the form NL;....NL;
  */
@@ -235,6 +257,18 @@ function skipWhitespace(state: TokenizerState): number {
     return prev;
 }
 
+
+/**
+ * Returns true if there are two consecutive ' in +1 and +2 positions.
+ */
+function isTripleQuoteAtPosition(state: TokenizerState): boolean {
+    if (state.length - state.position < 2) return false;
+    if (state.data.charCodeAt(state.position + 1) !== 39) return false; // '
+    if (state.data.charCodeAt(state.position + 2) !== 39) return false; // '
+
+    return true;
+}
+
 function isData(state: TokenizerState): boolean {
     // here we already assume the 5th char is _ and that the length >= 5
 
@@ -393,8 +427,13 @@ function moveNextInternal(state: TokenizerState) {
             skipCommentLine(state);
             state.tokenType = CifTokenType.Comment;
             break;
-        case 34: // ", escaped value
         case 39: // ', escaped value
+            if (isTripleQuoteAtPosition(state)) {
+                eatTripleQuote(state);
+                state.tokenType = CifTokenType.Value;
+                break;
+            }
+        case 34: // ", escaped value
             eatEscaped(state, c);
             state.tokenType = CifTokenType.Value;
             break;