Ver Fonte

wip, fixing mol2 parser (multi model still broken)

Alexander Rose há 7 anos atrás
pai
commit
923404c20e
2 ficheiros alterados com 189 adições e 167 exclusões
  1. 131 122
      src/mol-io/reader/_spec/mol2.spec.ts
  2. 58 45
      src/mol-io/reader/mol2/parser.ts

+ 131 - 122
src/mol-io/reader/_spec/mol2.spec.ts

@@ -1,66 +1,66 @@
 
 import Mol2 from '../mol2/parser'
 
-// const Mol2String = `@<TRIPOS>MOLECULE
-// 5816
-//  26 26 0 0 0
-// SMALL
-// GASTEIGER
+const Mol2String = `@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
 
-// @<TRIPOS>ATOM
-//       1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
-//       2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
-//       3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
-//       4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
-//       5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
-//       6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
-//       7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
-//       8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
-//       9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
-//      10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
-//      11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
-//      12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
-//      13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
-//      14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
-//      15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
-//      16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
-//      17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
-//      18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
-//      19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
-//      20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
-//      21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
-//      22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
-//      23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
-//      24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
-//      25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
-//      26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
-// @<TRIPOS>BOND
-//      1     1     5    1
-//      2     1    21    1
-//      3     2    10    1
-//      4     2    25    1
-//      5     3    12    1
-//      6     3    26    1
-//      7     4     7    1
-//      8     4    13    1
-//      9     4    19    1
-//     10     5     6    1
-//     11     5     7    1
-//     12     5    14    1
-//     13     6     8   ar
-//     14     6     9   ar
-//     15     7    15    1
-//     16     7    16    1
-//     17     8    10   ar
-//     18     8    17    1
-//     19     9    11   ar
-//     20     9    18    1
-//     21    10    12   ar
-//     22    11    12   ar
-//     23    11    20    1
-//     24    13    22    1
-//     25    13    23    1
-//     26    13    24    1`
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1`
 
 // const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
 // 5816
@@ -245,60 +245,63 @@ GASTEIGER
     26    13    24    1`
 
 describe('mol2 reader', () => {
-    // it('basic', async () => {
-    //     const parsed =  await Mol2(Mol2String)();
-    //     if (parsed.isError) {
-    //         console.log(parsed)
-    //         return;
-    //     }
-    //     const mol2File = parsed.result;
-    //     const data = mol2File.structures[0];
-    //     const { molecule, atoms, bonds } = data;
+    it('basic', async () => {
+        const parsed =  await Mol2(Mol2String)();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const mol2File = parsed.result;
+        const data = mol2File.structures[0];
+        const { molecule, atoms, bonds } = data;
 
-    //     expect(molecule.mol_name).toBe('5816')
-    //     expect(molecule.num_atoms).toBe(26)
-    //     expect(molecule.num_bonds).toBe(26);
-    //     expect(molecule.num_subst).toBe(0);
-    //     expect(molecule.num_feat).toBe(0);
-    //     expect(molecule.num_sets).toBe(0);
-    //     expect(molecule.mol_type).toBe("SMALL")
-    //     expect(molecule.charge_type).toBe("GASTEIGER");
-    //     expect(molecule.status_bits).toBe("");
-    //     expect(molecule.mol_comment).toBe("");
+        // molecule fields
+        expect(molecule.mol_name).toBe('5816')
+        expect(molecule.num_atoms).toBe(26)
+        expect(molecule.num_bonds).toBe(26);
+        expect(molecule.num_subst).toBe(0);
+        expect(molecule.num_feat).toBe(0);
+        expect(molecule.num_sets).toBe(0);
+        expect(molecule.mol_type).toBe("SMALL")
+        expect(molecule.charge_type).toBe("GASTEIGER");
+        expect(molecule.status_bits).toBe("");
+        expect(molecule.mol_comment).toBe("");
 
-    //     expect(atoms.count).toBe(26);
-    //     expect(atoms.atom_id.value(0)).toBe(1);
-    //     expect(atoms.atom_name.value(0)).toBe('O');
-    //     expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
-    //     expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
-    //     expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
-    //     expect(atoms.atom_type.value(0)).toBe("O.3");
-    //     ///// optionals
-    //     expect(atoms.subst_id.value(0)).toBe(1);
-    //     expect(atoms.subst_name.value(0)).toBe('LIG1');
-    //     expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
-    //     expect(atoms.status_bit.value(0)).toBe('');
+        // required atom fields
+        expect(atoms.count).toBe(26);
+        expect(atoms.atom_id.value(0)).toBe(1);
+        expect(atoms.atom_name.value(0)).toBe('O');
+        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        expect(atoms.atom_type.value(0)).toBe("O.3");
 
-    //     expect(bonds.count).toBe(26);
-    //     expect(bonds.bond_id.value(0)).toBe(1);
-    //     expect(bonds.origin_atom_id.value(0)).toBe(1);
-    //     expect(bonds.target_atom_id.value(0)).toBe(5);
-    //     expect(bonds.bond_type.value(0)).toBe('1');
-    //     /////// optional
-    //     expect(bonds.status_bits.value(0)).toBe('');
+        // optional atom fields
+        expect(atoms.subst_id.value(0)).toBe(1);
+        expect(atoms.subst_name.value(0)).toBe('LIG1');
+        expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
+        expect(atoms.status_bit.value(0)).toBe('');
 
-    // });
+        // required bond fields
+        expect(bonds.count).toBe(26);
+        expect(bonds.bond_id.value(0)).toBe(1);
+        expect(bonds.origin_atom_id.value(0)).toBe(1);
+        expect(bonds.target_atom_id.value(0)).toBe(5);
+        expect(bonds.bond_type.value(0)).toBe('1');
+
+        // optional bond fields
+        expect(bonds.status_bits.value(0)).toBe('');
+    });
 
     // it('multiblocks', async () => {
     //     const parsed =  await Mol2(Mol2StringMultiBlocks)();
     //     if (parsed.isError) {
-    //         console.log(parsed)
-    //         return;
+    //         throw new Error(parsed.message);
     //     }
     //     const mol2File = parsed.result;
     //     const data = mol2File.structures[1];
     //     const { molecule, atoms, bonds } = data;
 
+    //     // molecule fields
     //     expect(molecule.mol_name).toBe('5816')
     //     expect(molecule.num_atoms).toBe(26)
     //     expect(molecule.num_bonds).toBe(26);
@@ -310,6 +313,7 @@ describe('mol2 reader', () => {
     //     expect(molecule.status_bits).toBe("");
     //     expect(molecule.mol_comment).toBe("");
 
+    //     // required atom fields
     //     expect(atoms.count).toBe(26);
     //     expect(atoms.atom_id.value(0)).toBe(1);
     //     expect(atoms.atom_name.value(0)).toBe('O');
@@ -317,32 +321,34 @@ describe('mol2 reader', () => {
     //     expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
     //     expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
     //     expect(atoms.atom_type.value(0)).toBe("O.3");
-    //     ///// optionals
+
+    //     // optional atom fields
     //     expect(atoms.subst_id.value(0)).toBe(1);
     //     expect(atoms.subst_name.value(0)).toBe('LIG1');
     //     expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
     //     expect(atoms.status_bit.value(0)).toBe('');
 
+    //     // required bond fields
     //     expect(bonds.count).toBe(26);
     //     expect(bonds.bond_id.value(0)).toBe(1);
     //     expect(bonds.origin_atom_id.value(0)).toBe(1);
     //     expect(bonds.target_atom_id.value(0)).toBe(5);
     //     expect(bonds.bond_type.value(0)).toBe('1');
-    //     /////// optional
-    //     expect(bonds.status_bits.value(0)).toBe('');
 
+    //     // optional bond fields
+    //     expect(bonds.status_bits.value(0)).toBe('');
     // });
 
     it('minimal', async () => {
         const parsed =  await Mol2(Mol2StringMinimal)();
         if (parsed.isError) {
-            console.log(parsed)
-            return;
+            throw new Error(parsed.message);
         }
         const mol2File = parsed.result;
         const data = mol2File.structures[0];
         const { molecule, atoms, bonds } = data;
 
+        // molecule fields
         expect(molecule.mol_name).toBe('5816')
         expect(molecule.num_atoms).toBe(26)
         expect(molecule.num_bonds).toBe(26);
@@ -354,26 +360,29 @@ describe('mol2 reader', () => {
         expect(molecule.status_bits).toBe("");
         expect(molecule.mol_comment).toBe("");
 
+        // required atom fields
         expect(atoms.count).toBe(26);
-        // expect(atoms.atom_id.value(0)).toBe(1);
-        // expect(atoms.atom_name.value(0)).toBe('O');
-        // expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
-        // expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
-        // expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
-        // expect(atoms.atom_type.value(0)).toBe("O.3");
-        ///// optionals
-        // expect(atoms.subst_id.value(0)).toBe(0);
-        // expect(atoms.subst_name.value(0)).toBe('');
-        // expect(atoms.charge.value(0)).toBeCloseTo(0);
-        // expect(atoms.status_bit.value(0)).toBe('');
+        expect(atoms.atom_id.value(0)).toBe(1);
+        expect(atoms.atom_name.value(0)).toBe('O');
+        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        expect(atoms.atom_type.value(0)).toBe("O.3");
+
+        // optional atom fields
+        expect(atoms.subst_id.value(0)).toBe(0);
+        expect(atoms.subst_name.value(0)).toBe('');
+        expect(atoms.charge.value(0)).toBeCloseTo(0);
+        expect(atoms.status_bit.value(0)).toBe('');
 
+        // required bond fields
         expect(bonds.count).toBe(26);
-        // expect(bonds.bond_id.value(0)).toBe(1);
-        // expect(bonds.origin_atom_id.value(0)).toBe(1);
-        // expect(bonds.target_atom_id.value(0)).toBe(5);
-        // expect(bonds.bond_type.value(0)).toBe('1');
-        // /////// optional
-        // expect(bonds.status_bits.value(0)).toBe('');
+        expect(bonds.bond_id.value(0)).toBe(1);
+        expect(bonds.origin_atom_id.value(0)).toBe(1);
+        expect(bonds.target_atom_id.value(0)).toBe(5);
+        expect(bonds.bond_type.value(0)).toBe('1');
 
+        // optional bond fields
+        expect(bonds.status_bits.value(0)).toBe('');
     });
 });

+ 58 - 45
src/mol-io/reader/mol2/parser.ts

@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Zepei Xu <xuzepei19950617@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
 //               NOTES
 //When want to created undefined string column, must use
 // undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
@@ -11,6 +18,8 @@ import * as Schema from './schema'
 import Result from '../result'
 import Computation from 'mol-util/computation'
 
+const { skipWhitespace, eatValue, markLine, getTokenString, readLine } = Tokenizer;
+
 interface State {
     tokenizer: Tokenizer,
     molecule: Schema.Molecule,
@@ -40,36 +49,37 @@ function State(tokenizer: Tokenizer, ctx: Computation.Context): State {
     };
 }
 
+const reWhitespace = /\s+/g;
+
 function handleMolecule(state: State) {
     const { tokenizer, molecule } = state;
-    Tokenizer.markLine(tokenizer);
-    Tokenizer.markLine(tokenizer);
-    molecule.mol_name = Tokenizer.getTokenString(tokenizer);
+    markLine(tokenizer);
+    markLine(tokenizer);
+    molecule.mol_name = getTokenString(tokenizer);
 
-    Tokenizer.markLine(tokenizer);
-    const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g);
-    molecule.num_atoms = parseInt(values[0]) ? parseInt(values[1]) : 0;
+    markLine(tokenizer);
+    const values = getTokenString(tokenizer).trim().split(reWhitespace);
+    molecule.num_atoms = parseInt(values[0]) ? parseInt(values[0]) : 0;
     molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0;
-    molecule.num_subst = parseInt(values[2]) ? parseInt(values[1]) : 0;
-    molecule.num_feat = parseInt(values[3]) ? parseInt(values[1]) : 0;
-    molecule.num_sets = parseInt(values[4]) ? parseInt(values[1]) : 0;
+    molecule.num_subst = parseInt(values[2]) ? parseInt(values[2]) : 0;
+    molecule.num_feat = parseInt(values[3]) ? parseInt(values[3]) : 0;
+    molecule.num_sets = parseInt(values[4]) ? parseInt(values[4]) : 0;
 
-    Tokenizer.markLine(tokenizer);
-    molecule.mol_type = Tokenizer.getTokenString(tokenizer);
+    markLine(tokenizer);
+    molecule.mol_type = getTokenString(tokenizer);
 
-    Tokenizer.markLine(tokenizer);
-    molecule.charge_type = Tokenizer.getTokenString(tokenizer);
+    markLine(tokenizer);
+    molecule.charge_type = getTokenString(tokenizer);
 
-    Tokenizer.markLine(tokenizer);
-    if (Tokenizer.getTokenString(tokenizer) == '') return
-    molecule.status_bits = Tokenizer.getTokenString(tokenizer)
+    markLine(tokenizer);
+    if (getTokenString(tokenizer) === '') return
+    molecule.status_bits = getTokenString(tokenizer)
 
-    Tokenizer.markLine(tokenizer);
-    if (Tokenizer.getTokenString(tokenizer) == '') return
-    molecule.mol_comment = Tokenizer.getTokenString(tokenizer)
+    markLine(tokenizer);
+    if (getTokenString(tokenizer) === '') return
+    molecule.mol_comment = getTokenString(tokenizer)
 }
 
-
 function isStatus_bit(aString: String): Boolean{
     if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP')
        || aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL')
@@ -79,7 +89,6 @@ function isStatus_bit(aString: String): Boolean{
     return false;
 }
 
-
 async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const { tokenizer, molecule } = state;
     let hasSubst_id = false;
@@ -88,17 +97,17 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     let hasStatus_bit = false;
 
     // skip empty lines and '@<TRIPOS>ATOM'
-    while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){
-        Tokenizer.markLine(tokenizer);
+    while(getTokenString(tokenizer) != '@<TRIPOS>ATOM'){
+        markLine(tokenizer);
     }
 
     const initialTokenizerPosition = tokenizer.position;
     const initialTokenizerLineNumber = tokenizer.lineNumber;
-    const firstLine = Tokenizer.readLine(tokenizer);
+    const firstLine = readLine(tokenizer);
     const firstLineArray = firstLine.trim().split(/\s+/g)
     const firstLineLength = firstLineArray.length;
 
-    // optionals are in order "integer string float string".
+    // optional columns are in order "integer string float string".
     // Use this to find out which column is missing or empty
     for(let i = 6; i < firstLineLength; i++){
         if(!isNaN(Number(firstLineArray[i]))){
@@ -116,17 +125,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
         }
     }
 
+    // required columns
     const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);;
     const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    // optionals
-    const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
 
     const atom_idTokenColumn = TokenColumn(atom_idTokens);
     const atom_nameTokenColumn = TokenColumn(atom_nameTokens);
@@ -134,7 +139,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const yTokenColumn = TokenColumn(yTokens);
     const zTokenColumn = TokenColumn(zTokens);
     const atom_typeColumn = TokenColumn(atom_typeTokens);
-    // optionals
+
+    // optional columns
+    const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+
     const subst_idTokenColumn = TokenColumn(subst_idTokens);
     const subst_nameTokenColumn = TokenColumn(subst_nameTokens);
     const chargeTokenColumn = TokenColumn(chargeTokens);
@@ -144,7 +155,7 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int);
     const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str);
 
-    let numOfColumn = 5;
+    let numOfColumn = 6;
     if(hasSubst_id){numOfColumn++}
     if(hasSubst_name){numOfColumn++}
     if(hasCharge){numOfColumn++}
@@ -163,8 +174,9 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
             let chargeWritten = false;
             let status_bitWritten = false;
             for(let j = 0; j < numOfColumn; j++){
-                Tokenizer.skipWhitespace(tokenizer);
-                Tokenizer.eatValue(tokenizer);
+                skipWhitespace(tokenizer);
+                tokenizer.tokenStart = tokenizer.position;
+                eatValue(tokenizer);
                 switch(j){
                     case 0:
                         TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
@@ -185,16 +197,16 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
                         TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     default:
-                        if(hasSubst_id == true && subst_idWritten == false){
+                        if(hasSubst_id === true && subst_idWritten === false){
                             TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             subst_idWritten = true;
-                        }else if(hasSubst_name == true && subst_nameWritten == false){
+                        }else if(hasSubst_name === true && subst_nameWritten === false){
                             TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             subst_nameWritten = true;
-                        }else if(hasCharge == true && chargeWritten == false){
+                        }else if(hasCharge === true && chargeWritten === false){
                             TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             chargeWritten = true;
-                        }else if(hasStatus_bit == true && status_bitWritten == false){
+                        }else if(hasStatus_bit === true && status_bitWritten === false){
                             TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             status_bitWritten = true;
                         }
@@ -213,12 +225,12 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
         y: yTokenColumn(Column.Schema.float),
         z: zTokenColumn(Column.Schema.float),
         atom_type: atom_typeColumn(Column.Schema.str),
-        // optional properties
+
+        // optional columns
         subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt,
         subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr,
         charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat,
         status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr,
-
     };
     return ret;
 }
@@ -227,13 +239,13 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
     const { tokenizer, molecule } = state;
     let hasStatus_bit = false;
 
-    while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){
-        Tokenizer.markLine(tokenizer);
+    while(getTokenString(tokenizer) !== '@<TRIPOS>BOND'){
+        markLine(tokenizer);
     }
 
     const initialTokenizerPosition = tokenizer.position;
     const initialTokenizerLineNumber = tokenizer.lineNumber;
-    const firstLine = Tokenizer.readLine(tokenizer);
+    const firstLine = readLine(tokenizer);
     const firstLineArray = firstLine.trim().split(/\s+/g)
     const firstLineLength = firstLineArray.length;
     if(firstLineLength === 5){
@@ -268,8 +280,9 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
         const linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize);
         for(let i = 0; i < linesToRead; i++){
             for(let j = 0; j < numberOfColumn; j++){
-                Tokenizer.skipWhitespace(tokenizer);
-                Tokenizer.eatValue(tokenizer);
+                skipWhitespace(tokenizer);
+                tokenizer.tokenStart = tokenizer.position;
+                eatValue(tokenizer);
                 switch(j){
                     case 0:
                         TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);