Browse Source

guessElement in gro reader

Alexander Rose 6 years ago
parent
commit
6c2a6e3fa2

+ 1 - 1
src/mol-io/reader/cif/data-model.ts

@@ -101,7 +101,7 @@ export namespace CifField {
         return ofStrings([value]);
     }
 
-    export function ofStrings(values: string[]): CifField {
+    export function ofStrings(values: ArrayLike<string>): CifField {
         const rowCount = values.length;
         const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };
         const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; };

+ 2 - 2
src/mol-model-formats/structure/_spec/pdb.spec.ts

@@ -4,8 +4,8 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
-import { guessElementSymbol } from '../pdb/to-cif';
 import { TokenBuilder } from 'mol-io/reader/common/text/tokenizer';
+import { guessElementSymbolTokens } from '../util';
 
 const records = [
     ['ATOM     19 HD23 LEU A   1     151.940 143.340 155.670  0.00  0.00', 'H'],
@@ -19,7 +19,7 @@ describe('PDB to-cif', () => {
         for (let i = 0, il = records.length; i < il; ++i) {
             const [ data, element ] = records[i]
             const tokens = TokenBuilder.create(data, 2)
-            guessElementSymbol(tokens, data, 12, 16)
+            guessElementSymbolTokens(tokens, data, 12, 16)
             expect(data.substring(tokens.indices[0], tokens.indices[1])).toBe(element)
         }
     });

+ 4 - 2
src/mol-model-formats/structure/gro.ts

@@ -12,6 +12,7 @@ import { GroFile, GroAtoms } from 'mol-io/reader/gro/schema';
 import { CifCategory, CifField } from 'mol-io/reader/cif';
 import { Column } from 'mol-data/db';
 import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { guessElementSymbolString } from './util';
 
 // TODO multi model files
 // TODO seperate chains
@@ -37,7 +38,7 @@ function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?:
         auth_comp_id,
         auth_seq_id,
         B_iso_or_equiv: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.float)),
-        Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)), 
+        Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)),
         Cartn_y: CifField.ofNumbers(Column.mapToArray(atoms.y, y => y * 10, Float32Array)),
         Cartn_z: CifField.ofNumbers(Column.mapToArray(atoms.z, z => z * 10, Float32Array)),
         group_PDB: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
@@ -52,7 +53,8 @@ function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?:
         label_entity_id: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)),
 
         occupancy: CifField.ofColumn(Column.ofConst(1, atoms.count, Column.Schema.float)),
-        type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
+        type_symbol: CifField.ofStrings(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))),
+        // type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
 
         pdbx_PDB_ins_code: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
         pdbx_PDB_model_num: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)),

+ 4 - 40
src/mol-model-formats/structure/pdb/to-cif.ts

@@ -8,11 +8,12 @@
 import { substringStartsWith } from 'mol-util/string';
 import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif';
 import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
-import { TokenBuilder, Tokenizer, Tokens } from 'mol-io/reader/common/text/tokenizer';
+import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
 import { PdbFile } from 'mol-io/reader/pdb/schema';
 import { parseCryst1, parseRemark350, parseMtrix } from './assembly';
 import { WaterNames } from 'mol-model/structure/model/types';
 import { parseHelix, parseSheet } from './secondary-structure';
+import { guessElementSymbolTokens } from '../util';
 
 function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
     return {
@@ -89,43 +90,6 @@ function getEntityId(residueName: string, isHet: boolean) {
     return '1';
 }
 
-export function guessElementSymbol(tokens: Tokens, str: string, start: number, end: number) {
-    let s = start, e = end - 1
-
-    // trim spaces and numbers
-    let c = str.charCodeAt(s)
-    while ((c === 32 || (c >= 48 && c <= 57)) && s <= e) c = str.charCodeAt(++s)
-    c = str.charCodeAt(e)
-    while ((c === 32 || (c >= 48 && c <= 57)) && e >= s) c = str.charCodeAt(--e)
-
-    ++e
-
-    if (s === e) return TokenBuilder.add(tokens, s, e) // empty
-    if (s + 1 === e) return TokenBuilder.add(tokens, s, e) // one char
-
-    c = str.charCodeAt(s)
-
-    if (s + 2 === e) { // two chars
-        const c2 = str.charCodeAt(s + 1)
-        if (
-            ((c === 78 || c === 110) && (c2 === 65 || c2 ===  97)) || // NA na Na nA
-            ((c === 67 || c ===  99) && (c2 === 76 || c2 === 108)) || // CL
-            ((c === 70 || c === 102) && (c2 === 69 || c2 === 101))    // FE
-        ) return TokenBuilder.add(tokens, s, s + 2)
-    }
-
-    if (
-        c === 67 || c ===  99 || // C c
-        c === 72 || c === 104 || // H h
-        c === 78 || c === 110 || // N n
-        c === 79 || c === 111 || // O o
-        c === 80 || c === 112 || // P p
-        c === 83 || c === 115    // S s
-    ) return TokenBuilder.add(tokens, s, s + 1)
-
-    TokenBuilder.add(tokens, s, s) // no reasonable guess, add empty token
-}
-
 function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) {
     const { data: str } = data;
     const length = e - s;
@@ -199,10 +163,10 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num
         if (data.tokenStart < data.tokenEnd) {
             TokenBuilder.addToken(sites.type_symbol, data);
         } else {
-            guessElementSymbol(sites.type_symbol, str, s + 12, s + 16)
+            guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
         }
     } else {
-        guessElementSymbol(sites.type_symbol, str, s + 12, s + 16)
+        guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
     }
 
     sites.label_entity_id[sites.index] = getEntityId(residueName, isHet);

+ 62 - 0
src/mol-model-formats/structure/util.ts

@@ -0,0 +1,62 @@
+/**
+ * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { TokenBuilder, Tokens } from 'mol-io/reader/common/text/tokenizer';
+
+export function guessElementSymbolTokens(tokens: Tokens, str: string, start: number, end: number) {
+    let s = start, e = end - 1
+
+    // trim spaces and numbers
+    let c = str.charCodeAt(s)
+    while ((c === 32 || (c >= 48 && c <= 57)) && s <= e) c = str.charCodeAt(++s)
+    c = str.charCodeAt(e)
+    while ((c === 32 || (c >= 48 && c <= 57)) && e >= s) c = str.charCodeAt(--e)
+
+    ++e
+
+    if (s === e) return TokenBuilder.add(tokens, s, e) // empty
+    if (s + 1 === e) return TokenBuilder.add(tokens, s, e) // one char
+
+    c = str.charCodeAt(s)
+
+    if (s + 2 === e) { // two chars
+        const c2 = str.charCodeAt(s + 1)
+        if (
+            ((c === 78 || c === 110) && (c2 === 65 || c2 ===  97)) || // NA na Na nA
+            ((c === 67 || c ===  99) && (c2 === 76 || c2 === 108)) || // CL
+            ((c === 70 || c === 102) && (c2 === 69 || c2 === 101))    // FE
+        ) return TokenBuilder.add(tokens, s, s + 2)
+    }
+
+    if (
+        c === 67 || c ===  99 || // C c
+        c === 72 || c === 104 || // H h
+        c === 78 || c === 110 || // N n
+        c === 79 || c === 111 || // O o
+        c === 80 || c === 112 || // P p
+        c === 83 || c === 115    // S s
+    ) return TokenBuilder.add(tokens, s, s + 1)
+
+    TokenBuilder.add(tokens, s, s) // no reasonable guess, add empty token
+}
+
+export function guessElementSymbolString(str: string) {
+    // trim spaces and numbers, convert to upper case
+    str = str.trim().toUpperCase()
+    const l = str.length
+
+    if (l === 0) return str // empty
+    if (l === 1) return str // one char
+
+    if (l === 2) { // two chars
+        if (str === 'NA' || str === 'CL' || str === 'FE') return str
+    }
+
+    const c = str[0]
+    if (c === 'C' || c === 'H' || c === 'N' || c === 'O' || c === 'P' || c === 'S') return c
+
+    return '' // no reasonable guess, return empty string
+}