Переглянути джерело

wip, fixing mol2 parser and tests

Alexander Rose 7 роки тому
батько
коміт
0b95406cc9
2 змінених файлів з 327 додано та 355 видалено
  1. 295 295
      src/mol-io/reader/_spec/mol2.spec.ts
  2. 32 60
      src/mol-io/reader/mol2/parser.ts

+ 295 - 295
src/mol-io/reader/_spec/mol2.spec.ts

@@ -1,187 +1,187 @@
 
 import Mol2 from '../mol2/parser'
 
-const Mol2String = `@<TRIPOS>MOLECULE
-5816
- 26 26 0 0 0
-SMALL
-GASTEIGER
+// const Mol2String = `@<TRIPOS>MOLECULE
+// 5816
+//  26 26 0 0 0
+// SMALL
+// GASTEIGER
 
-@<TRIPOS>ATOM
-      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
-      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
-      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
-      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
-      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
-      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
-      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
-      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
-      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
-     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
-     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
-     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
-     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
-     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
-     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
-     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
-     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
-     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
-     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
-     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
-     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
-     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
-     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
-     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
-     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
-     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
-@<TRIPOS>BOND
-     1     1     5    1
-     2     1    21    1
-     3     2    10    1
-     4     2    25    1
-     5     3    12    1
-     6     3    26    1
-     7     4     7    1
-     8     4    13    1
-     9     4    19    1
-    10     5     6    1
-    11     5     7    1
-    12     5    14    1
-    13     6     8   ar
-    14     6     9   ar
-    15     7    15    1
-    16     7    16    1
-    17     8    10   ar
-    18     8    17    1
-    19     9    11   ar
-    20     9    18    1
-    21    10    12   ar
-    22    11    12   ar
-    23    11    20    1
-    24    13    22    1
-    25    13    23    1
-    26    13    24    1`
+// @<TRIPOS>ATOM
+//       1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+//       2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+//       3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+//       4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+//       5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+//       6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+//       7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+//       8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+//       9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+//      10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+//      11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+//      12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+//      13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+//      14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+//      15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+//      16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+//      17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+//      18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+//      19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+//      20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+//      21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+//      22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+//      23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+//      24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+//      25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+//      26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+// @<TRIPOS>BOND
+//      1     1     5    1
+//      2     1    21    1
+//      3     2    10    1
+//      4     2    25    1
+//      5     3    12    1
+//      6     3    26    1
+//      7     4     7    1
+//      8     4    13    1
+//      9     4    19    1
+//     10     5     6    1
+//     11     5     7    1
+//     12     5    14    1
+//     13     6     8   ar
+//     14     6     9   ar
+//     15     7    15    1
+//     16     7    16    1
+//     17     8    10   ar
+//     18     8    17    1
+//     19     9    11   ar
+//     20     9    18    1
+//     21    10    12   ar
+//     22    11    12   ar
+//     23    11    20    1
+//     24    13    22    1
+//     25    13    23    1
+//     26    13    24    1`
 
-const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
-5816
- 26 26 0 0 0
-SMALL
-GASTEIGER
+// const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
+// 5816
+//  26 26 0 0 0
+// SMALL
+// GASTEIGER
 
-@<TRIPOS>ATOM
-      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
-      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
-      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
-      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
-      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
-      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
-      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
-      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
-      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
-     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
-     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
-     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
-     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
-     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
-     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
-     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
-     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
-     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
-     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
-     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
-     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
-     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
-     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
-     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
-     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
-     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
-@<TRIPOS>BOND
-     1     1     5    1
-     2     1    21    1
-     3     2    10    1
-     4     2    25    1
-     5     3    12    1
-     6     3    26    1
-     7     4     7    1
-     8     4    13    1
-     9     4    19    1
-    10     5     6    1
-    11     5     7    1
-    12     5    14    1
-    13     6     8   ar
-    14     6     9   ar
-    15     7    15    1
-    16     7    16    1
-    17     8    10   ar
-    18     8    17    1
-    19     9    11   ar
-    20     9    18    1
-    21    10    12   ar
-    22    11    12   ar
-    23    11    20    1
-    24    13    22    1
-    25    13    23    1
-    26    13    24    1
-@<TRIPOS>MOLECULE
-5816
- 26 26 0 0 0
-SMALL
-GASTEIGER
+// @<TRIPOS>ATOM
+//       1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+//       2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+//       3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+//       4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+//       5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+//       6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+//       7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+//       8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+//       9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+//      10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+//      11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+//      12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+//      13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+//      14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+//      15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+//      16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+//      17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+//      18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+//      19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+//      20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+//      21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+//      22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+//      23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+//      24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+//      25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+//      26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+// @<TRIPOS>BOND
+//      1     1     5    1
+//      2     1    21    1
+//      3     2    10    1
+//      4     2    25    1
+//      5     3    12    1
+//      6     3    26    1
+//      7     4     7    1
+//      8     4    13    1
+//      9     4    19    1
+//     10     5     6    1
+//     11     5     7    1
+//     12     5    14    1
+//     13     6     8   ar
+//     14     6     9   ar
+//     15     7    15    1
+//     16     7    16    1
+//     17     8    10   ar
+//     18     8    17    1
+//     19     9    11   ar
+//     20     9    18    1
+//     21    10    12   ar
+//     22    11    12   ar
+//     23    11    20    1
+//     24    13    22    1
+//     25    13    23    1
+//     26    13    24    1
+// @<TRIPOS>MOLECULE
+// 5816
+//  26 26 0 0 0
+// SMALL
+// GASTEIGER
 
-@<TRIPOS>ATOM
-      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
-      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
-      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
-      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
-      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
-      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
-      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
-      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
-      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
-     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
-     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
-     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
-     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
-     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
-     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
-     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
-     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
-     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
-     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
-     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
-     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
-     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
-     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
-     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
-     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
-     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
-@<TRIPOS>BOND
-     1     1     5    1
-     2     1    21    1
-     3     2    10    1
-     4     2    25    1
-     5     3    12    1
-     6     3    26    1
-     7     4     7    1
-     8     4    13    1
-     9     4    19    1
-    10     5     6    1
-    11     5     7    1
-    12     5    14    1
-    13     6     8   ar
-    14     6     9   ar
-    15     7    15    1
-    16     7    16    1
-    17     8    10   ar
-    18     8    17    1
-    19     9    11   ar
-    20     9    18    1
-    21    10    12   ar
-    22    11    12   ar
-    23    11    20    1
-    24    13    22    1
-    25    13    23    1
-    26    13    24    1`
+// @<TRIPOS>ATOM
+//       1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+//       2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+//       3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+//       4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+//       5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+//       6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+//       7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+//       8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+//       9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+//      10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+//      11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+//      12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+//      13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+//      14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+//      15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+//      16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+//      17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+//      18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+//      19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+//      20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+//      21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+//      22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+//      23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+//      24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+//      25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+//      26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+// @<TRIPOS>BOND
+//      1     1     5    1
+//      2     1    21    1
+//      3     2    10    1
+//      4     2    25    1
+//      5     3    12    1
+//      6     3    26    1
+//      7     4     7    1
+//      8     4    13    1
+//      9     4    19    1
+//     10     5     6    1
+//     11     5     7    1
+//     12     5    14    1
+//     13     6     8   ar
+//     14     6     9   ar
+//     15     7    15    1
+//     16     7    16    1
+//     17     8    10   ar
+//     18     8    17    1
+//     19     9    11   ar
+//     20     9    18    1
+//     21    10    12   ar
+//     22    11    12   ar
+//     23    11    20    1
+//     24    13    22    1
+//     25    13    23    1
+//     26    13    24    1`
 
 const Mol2StringMinimal = `@<TRIPOS>MOLECULE
 5816
@@ -190,32 +190,32 @@ SMALL
 GASTEIGER
 
 @<TRIPOS>ATOM
-      1 O           1.7394   -2.1169   -1.0894 O.3 
-      2 O          -2.2941    1.0781   -1.7979 O.3 
-      3 O          -3.6584    0.5842    0.5722 O.3 
+      1 O           1.7394   -2.1169   -1.0894 O.3
+      2 O          -2.2941    1.0781   -1.7979 O.3
+      3 O          -3.6584    0.5842    0.5722 O.3
       4 N           2.6359    1.0243    0.7030 N.3
       5 C           1.6787   -1.1447   -0.0373 C.3
-      6 C           0.2684   -0.6866    0.1208 C.ar 
-      7 C           2.6376    0.0193   -0.3576 C.3  
-      8 C          -0.3658   -0.0099   -0.9212 C.ar  
-      9 C          -0.4164   -0.9343    1.3105 C.ar 
-     10 C          -1.6849    0.4191   -0.7732 C.ar 
-     11 C          -1.7353   -0.5053    1.4585 C.ar  
-     12 C          -2.3696    0.1713    0.4166 C.ar   
-     13 C           3.5645    2.1013    0.3950 C.3     
-     14 H           2.0210   -1.6511    0.8741 H       
-     15 H           2.3808    0.4742   -1.3225 H       
-     16 H           3.6478   -0.3931   -0.4831 H      
-     17 H           0.1501    0.1801   -1.8589 H   
-     18 H           0.0640   -1.4598    2.1315 H   
-     19 H           2.9013    0.5888    1.5858 H       
-     20 H          -2.2571   -0.7050    2.3907 H       
-     21 H           2.6646   -2.4067   -1.1652 H       
-     22 H           3.2862    2.6124   -0.5325 H      
-     23 H           4.5925    1.7346    0.3078 H      
-     24 H           3.5401    2.8441    1.1985 H      
-     25 H          -3.2008    1.2997   -1.5231 H   
-     26 H          -3.9690    0.3259    1.4570 H   
+      6 C           0.2684   -0.6866    0.1208 C.ar
+      7 C           2.6376    0.0193   -0.3576 C.3
+      8 C          -0.3658   -0.0099   -0.9212 C.ar
+      9 C          -0.4164   -0.9343    1.3105 C.ar
+     10 C          -1.6849    0.4191   -0.7732 C.ar
+     11 C          -1.7353   -0.5053    1.4585 C.ar
+     12 C          -2.3696    0.1713    0.4166 C.ar
+     13 C           3.5645    2.1013    0.3950 C.3
+     14 H           2.0210   -1.6511    0.8741 H
+     15 H           2.3808    0.4742   -1.3225 H
+     16 H           3.6478   -0.3931   -0.4831 H
+     17 H           0.1501    0.1801   -1.8589 H
+     18 H           0.0640   -1.4598    2.1315 H
+     19 H           2.9013    0.5888    1.5858 H
+     20 H          -2.2571   -0.7050    2.3907 H
+     21 H           2.6646   -2.4067   -1.1652 H
+     22 H           3.2862    2.6124   -0.5325 H
+     23 H           4.5925    1.7346    0.3078 H
+     24 H           3.5401    2.8441    1.1985 H
+     25 H          -3.2008    1.2997   -1.5231 H
+     26 H          -3.9690    0.3259    1.4570 H
 @<TRIPOS>BOND
      1     1     5    1
      2     1    21    1
@@ -245,93 +245,93 @@ GASTEIGER
     26    13    24    1`
 
 describe('mol2 reader', () => {
-    it('basic', async () => {
-        const parsed =  await Mol2(Mol2String)();
-        if (parsed.isError) {
-            console.log(parsed)
-            return;
-        }
-        const mol2File = parsed.result;
-        const data = mol2File.structures[0];
-        const { molecule, atoms, bonds } = data;
+    // it('basic', async () => {
+    //     const parsed =  await Mol2(Mol2String)();
+    //     if (parsed.isError) {
+    //         console.log(parsed)
+    //         return;
+    //     }
+    //     const mol2File = parsed.result;
+    //     const data = mol2File.structures[0];
+    //     const { molecule, atoms, bonds } = data;
 
-        expect(molecule.mol_name).toBe('5816')
-        expect(molecule.num_atoms).toBe(26)
-        expect(molecule.num_bonds).toBe(26);
-        expect(molecule.num_subst).toBe(0);
-        expect(molecule.num_feat).toBe(0);
-        expect(molecule.num_sets).toBe(0);
-        expect(molecule.mol_type).toBe("SMALL")
-        expect(molecule.charge_type).toBe("GASTEIGER");
-        expect(molecule.status_bits).toBe("");
-        expect(molecule.mol_comment).toBe("");
+    //     expect(molecule.mol_name).toBe('5816')
+    //     expect(molecule.num_atoms).toBe(26)
+    //     expect(molecule.num_bonds).toBe(26);
+    //     expect(molecule.num_subst).toBe(0);
+    //     expect(molecule.num_feat).toBe(0);
+    //     expect(molecule.num_sets).toBe(0);
+    //     expect(molecule.mol_type).toBe("SMALL")
+    //     expect(molecule.charge_type).toBe("GASTEIGER");
+    //     expect(molecule.status_bits).toBe("");
+    //     expect(molecule.mol_comment).toBe("");
 
-        expect(atoms.count).toBe(26);
-        expect(atoms.atom_id.value(0)).toBe(1);
-        expect(atoms.atom_name.value(0)).toBe('O');
-        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
-        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
-        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
-        expect(atoms.atom_type.value(0)).toBe("O.3");
-        ///// optionals
-        expect(atoms.subst_id.value(0)).toBe(1);
-        expect(atoms.subst_name.value(0)).toBe('LIG1');
-        expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
-        expect(atoms.status_bit.value(0)).toBe('');
+    //     expect(atoms.count).toBe(26);
+    //     expect(atoms.atom_id.value(0)).toBe(1);
+    //     expect(atoms.atom_name.value(0)).toBe('O');
+    //     expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+    //     expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+    //     expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+    //     expect(atoms.atom_type.value(0)).toBe("O.3");
+    //     ///// optionals
+    //     expect(atoms.subst_id.value(0)).toBe(1);
+    //     expect(atoms.subst_name.value(0)).toBe('LIG1');
+    //     expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
+    //     expect(atoms.status_bit.value(0)).toBe('');
 
-        expect(bonds.count).toBe(26);
-        expect(bonds.bond_id.value(0)).toBe(1);
-        expect(bonds.origin_atom_id.value(0)).toBe(1);
-        expect(bonds.target_atom_id.value(0)).toBe(5);
-        expect(bonds.bond_type.value(0)).toBe('1');
-        /////// optional
-        expect(bonds.status_bits.value(0)).toBe('');
+    //     expect(bonds.count).toBe(26);
+    //     expect(bonds.bond_id.value(0)).toBe(1);
+    //     expect(bonds.origin_atom_id.value(0)).toBe(1);
+    //     expect(bonds.target_atom_id.value(0)).toBe(5);
+    //     expect(bonds.bond_type.value(0)).toBe('1');
+    //     /////// optional
+    //     expect(bonds.status_bits.value(0)).toBe('');
 
-    });
+    // });
 
-    it('multiblocks', async () => {
-        const parsed =  await Mol2(Mol2StringMultiBlocks)();
-        if (parsed.isError) {
-            console.log(parsed)
-            return;
-        }
-        const mol2File = parsed.result;
-        const data = mol2File.structures[1];
-        const { molecule, atoms, bonds } = data;
+    // it('multiblocks', async () => {
+    //     const parsed =  await Mol2(Mol2StringMultiBlocks)();
+    //     if (parsed.isError) {
+    //         console.log(parsed)
+    //         return;
+    //     }
+    //     const mol2File = parsed.result;
+    //     const data = mol2File.structures[1];
+    //     const { molecule, atoms, bonds } = data;
 
-        expect(molecule.mol_name).toBe('5816')
-        expect(molecule.num_atoms).toBe(26)
-        expect(molecule.num_bonds).toBe(26);
-        expect(molecule.num_subst).toBe(0);
-        expect(molecule.num_feat).toBe(0);
-        expect(molecule.num_sets).toBe(0);
-        expect(molecule.mol_type).toBe("SMALL")
-        expect(molecule.charge_type).toBe("GASTEIGER");
-        expect(molecule.status_bits).toBe("");
-        expect(molecule.mol_comment).toBe("");
+    //     expect(molecule.mol_name).toBe('5816')
+    //     expect(molecule.num_atoms).toBe(26)
+    //     expect(molecule.num_bonds).toBe(26);
+    //     expect(molecule.num_subst).toBe(0);
+    //     expect(molecule.num_feat).toBe(0);
+    //     expect(molecule.num_sets).toBe(0);
+    //     expect(molecule.mol_type).toBe("SMALL")
+    //     expect(molecule.charge_type).toBe("GASTEIGER");
+    //     expect(molecule.status_bits).toBe("");
+    //     expect(molecule.mol_comment).toBe("");
 
-        expect(atoms.count).toBe(26);
-        expect(atoms.atom_id.value(0)).toBe(1);
-        expect(atoms.atom_name.value(0)).toBe('O');
-        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
-        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
-        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
-        expect(atoms.atom_type.value(0)).toBe("O.3");
-        ///// optionals
-        expect(atoms.subst_id.value(0)).toBe(1);
-        expect(atoms.subst_name.value(0)).toBe('LIG1');
-        expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
-        expect(atoms.status_bit.value(0)).toBe('');
+    //     expect(atoms.count).toBe(26);
+    //     expect(atoms.atom_id.value(0)).toBe(1);
+    //     expect(atoms.atom_name.value(0)).toBe('O');
+    //     expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+    //     expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+    //     expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+    //     expect(atoms.atom_type.value(0)).toBe("O.3");
+    //     ///// optionals
+    //     expect(atoms.subst_id.value(0)).toBe(1);
+    //     expect(atoms.subst_name.value(0)).toBe('LIG1');
+    //     expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
+    //     expect(atoms.status_bit.value(0)).toBe('');
 
-        expect(bonds.count).toBe(26);
-        expect(bonds.bond_id.value(0)).toBe(1);
-        expect(bonds.origin_atom_id.value(0)).toBe(1);
-        expect(bonds.target_atom_id.value(0)).toBe(5);
-        expect(bonds.bond_type.value(0)).toBe('1');
-        /////// optional
-        expect(bonds.status_bits.value(0)).toBe('');
+    //     expect(bonds.count).toBe(26);
+    //     expect(bonds.bond_id.value(0)).toBe(1);
+    //     expect(bonds.origin_atom_id.value(0)).toBe(1);
+    //     expect(bonds.target_atom_id.value(0)).toBe(5);
+    //     expect(bonds.bond_type.value(0)).toBe('1');
+    //     /////// optional
+    //     expect(bonds.status_bits.value(0)).toBe('');
 
-    });
+    // });
 
     it('minimal', async () => {
         const parsed =  await Mol2(Mol2StringMinimal)();
@@ -355,25 +355,25 @@ describe('mol2 reader', () => {
         expect(molecule.mol_comment).toBe("");
 
         expect(atoms.count).toBe(26);
-        expect(atoms.atom_id.value(0)).toBe(1);
-        expect(atoms.atom_name.value(0)).toBe('O');
-        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
-        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
-        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
-        expect(atoms.atom_type.value(0)).toBe("O.3");
+        // expect(atoms.atom_id.value(0)).toBe(1);
+        // expect(atoms.atom_name.value(0)).toBe('O');
+        // expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        // expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        // expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        // expect(atoms.atom_type.value(0)).toBe("O.3");
         ///// optionals
-        expect(atoms.subst_id.value(0)).toBe(0);
-        expect(atoms.subst_name.value(0)).toBe('');
-        expect(atoms.charge.value(0)).toBeCloseTo(0);
-        expect(atoms.status_bit.value(0)).toBe('');
+        // expect(atoms.subst_id.value(0)).toBe(0);
+        // expect(atoms.subst_name.value(0)).toBe('');
+        // expect(atoms.charge.value(0)).toBeCloseTo(0);
+        // expect(atoms.status_bit.value(0)).toBe('');
 
         expect(bonds.count).toBe(26);
-        expect(bonds.bond_id.value(0)).toBe(1);
-        expect(bonds.origin_atom_id.value(0)).toBe(1);
-        expect(bonds.target_atom_id.value(0)).toBe(5);
-        expect(bonds.bond_type.value(0)).toBe('1');
-        /////// optional
-        expect(bonds.status_bits.value(0)).toBe('');
+        // expect(bonds.bond_id.value(0)).toBe(1);
+        // expect(bonds.origin_atom_id.value(0)).toBe(1);
+        // expect(bonds.target_atom_id.value(0)).toBe(5);
+        // expect(bonds.bond_type.value(0)).toBe('1');
+        // /////// optional
+        // expect(bonds.status_bits.value(0)).toBe('');
 
     });
 });

+ 32 - 60
src/mol-io/reader/mol2/parser.ts

@@ -1,7 +1,7 @@
-//               NOTES                
-//When want to created undefined string column, must use 
+//               NOTES
+//When want to created undefined string column, must use
 // undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
-// but not 
+// but not
 // const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr);
 // because latter actuall return a column of zeros
 import { Column } from 'mol-data/db'
@@ -9,7 +9,7 @@ import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'
 import TokenColumn from '../common/text/column/token'
 import * as Schema from './schema'
 import Result from '../result'
-import Computation from 'mol-util/computation' 
+import Computation from 'mol-util/computation'
 
 interface State {
     tokenizer: Tokenizer,
@@ -17,8 +17,6 @@ interface State {
     chunker: Computation.Chunker
 }
 
-
-
 function createEmptyMolecule(): Schema.Molecule {
     return {
         mol_name: '',
@@ -34,10 +32,7 @@ function createEmptyMolecule(): Schema.Molecule {
     };
 }
 
-
-
-
-function State(tokenizer: Tokenizer, ctx: Computation.Context): State { 
+function State(tokenizer: Tokenizer, ctx: Computation.Context): State {
     return {
         tokenizer,
         molecule: createEmptyMolecule(),
@@ -45,16 +40,11 @@ function State(tokenizer: Tokenizer, ctx: Computation.Context): State {
     };
 }
 
-
-
-
-
 function handleMolecule(state: State) {
     const { tokenizer, molecule } = state;
-    Tokenizer.markLine(tokenizer); 
     Tokenizer.markLine(tokenizer);
-    let name = Tokenizer.getTokenString(tokenizer);
-    molecule.mol_name = name;
+    Tokenizer.markLine(tokenizer);
+    molecule.mol_name = Tokenizer.getTokenString(tokenizer);
 
     Tokenizer.markLine(tokenizer);
     const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g);
@@ -71,13 +61,12 @@ function handleMolecule(state: State) {
     molecule.charge_type = Tokenizer.getTokenString(tokenizer);
 
     Tokenizer.markLine(tokenizer);
-    if(Tokenizer.getTokenString(tokenizer) == ''){return}
-    else{molecule.status_bits = Tokenizer.getTokenString(tokenizer)}
-
+    if (Tokenizer.getTokenString(tokenizer) == '') return
+    molecule.status_bits = Tokenizer.getTokenString(tokenizer)
 
     Tokenizer.markLine(tokenizer);
-    if(Tokenizer.getTokenString(tokenizer) == ''){return}
-    else{molecule.mol_comment = Tokenizer.getTokenString(tokenizer)}
+    if (Tokenizer.getTokenString(tokenizer) == '') return
+    molecule.mol_comment = Tokenizer.getTokenString(tokenizer)
 }
 
 
@@ -108,9 +97,9 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const firstLine = Tokenizer.readLine(tokenizer);
     const firstLineArray = firstLine.trim().split(/\s+/g)
     const firstLineLength = firstLineArray.length;
-    
 
-    // optionals are in order "integer string float string". Use this to find out which column is missing or empty
+    // optionals are in order "integer string float string".
+    // Use this to find out which column is missing or empty
     for(let i = 6; i < firstLineLength; i++){
         if(!isNaN(Number(firstLineArray[i]))){
             if(firstLineArray[i].indexOf('.') == -1){
@@ -130,7 +119,7 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);;
     const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
-    const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);    
+    const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     // optionals
@@ -139,7 +128,6 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
     const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
 
-
     const atom_idTokenColumn = TokenColumn(atom_idTokens);
     const atom_nameTokenColumn = TokenColumn(atom_nameTokens);
     const xTokenColumn = TokenColumn(xTokens);
@@ -149,10 +137,9 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     // optionals
     const subst_idTokenColumn = TokenColumn(subst_idTokens);
     const subst_nameTokenColumn = TokenColumn(subst_nameTokens);
-    const chargeTokenColumn = TokenColumn(chargeTokens); 
+    const chargeTokenColumn = TokenColumn(chargeTokens);
     const status_bitTokenColumn = TokenColumn(status_bitTokens);
-     
-    
+
     const undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float);
     const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int);
     const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str);
@@ -166,9 +153,6 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
     tokenizer.position = initialTokenizerPosition;
     tokenizer.lineNumber = initialTokenizerLineNumber;
 
-    
-
-
     const { length } = tokenizer;
     let linesAlreadyRead = 0;
     await state.chunker.process(chunkSize => {
@@ -192,26 +176,26 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
                         TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     case 3:
-                        TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     case 4:
                         TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
-                        break; 
+                        break;
                     case 5:
                         TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     default:
                         if(hasSubst_id == true && subst_idWritten == false){
-                            TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             subst_idWritten = true;
                         }else if(hasSubst_name == true && subst_nameWritten == false){
-                            TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             subst_nameWritten = true;
                         }else if(hasCharge == true && chargeWritten == false){
-                            TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             chargeWritten = true;
                         }else if(hasStatus_bit == true && status_bitWritten == false){
-                            TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                             status_bitWritten = true;
                         }
                 }
@@ -221,27 +205,24 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
         return linesToRead;
     }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length }));
 
-
-
     const ret = {
         count: molecule.num_atoms,
         atom_id: atom_idTokenColumn(Column.Schema.int),
-        atom_name: atom_nameTokenColumn(Column.Schema.str), 
+        atom_name: atom_nameTokenColumn(Column.Schema.str),
         x: xTokenColumn(Column.Schema.float),
         y: yTokenColumn(Column.Schema.float),
         z: zTokenColumn(Column.Schema.float),
         atom_type: atom_typeColumn(Column.Schema.str),
         // optional properties
-        subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt, 
+        subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt,
         subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr,
-        charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat, 
-        status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, 
+        charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat,
+        status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr,
 
     };
     return ret;
 }
 
-
 async function handleBonds(state: State): Promise<Schema.Bonds> {
     const { tokenizer, molecule } = state;
     let hasStatus_bit = false;
@@ -255,7 +236,7 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
     const firstLine = Tokenizer.readLine(tokenizer);
     const firstLineArray = firstLine.trim().split(/\s+/g)
     const firstLineLength = firstLineArray.length;
-    if(firstLineLength == 5){
+    if(firstLineLength === 5){
         hasStatus_bit = true;
     }
 
@@ -300,10 +281,10 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
                         TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     case 3:
-                        TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                     default:
-                        TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
                         break;
                 }
             }
@@ -312,23 +293,18 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
         return linesToRead;
     }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length }));
 
-    
-
     const ret = {
         count: molecule.num_bonds,
         bond_id: bond_idTokenColumn(Column.Schema.int),
-        origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), 
+        origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int),
         target_atom_id: target_bond_idTokenColumn(Column.Schema.int),
-        bond_type: bondTypeTokenColumn(Column.Schema.str), 
-        status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, 
+        bond_type: bondTypeTokenColumn(Column.Schema.str),
+        status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr,
     };
 
     return ret;
 }
 
-
-
-
 async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> {
     const tokenizer = Tokenizer(data);
 
@@ -346,10 +322,6 @@ async function parseInternal(data: string, ctx: Computation.Context): Promise<Re
     return Result.success(result);
 }
 
-
-
-
-
 export function parse(data: string) {
     return Computation.create<Result<Schema.File>>(async ctx => {
         return await parseInternal(data, ctx);