Browse Source

Merge pull request #1 from arose/mol2-2

Mol2 parser
David Sehnal 7 years ago
parent
commit
6dbc1b3560

+ 60 - 0
examples/adrenalin.mol2

@@ -0,0 +1,60 @@
+@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1

+ 71 - 0
examples/sp-after.mol2

@@ -0,0 +1,71 @@
+@<TRIPOS>MOLECULE
+ZINC12921206_1_1
+   30    33     0     0     0
+SMALL
+USER_CHARGES
+
+@<TRIPOS>ATOM
+      1 C1         10.8630   31.3859   32.2468           1 CAN         0.0000
+      2 C2         11.5701   31.4080   33.4817           1 CAN         0.0000
+      3 C3         17.5821   22.4473   38.5639           1 CAN         0.0000
+      4 C4         17.9273   24.7114   37.0200           1 CAN         0.0000
+      5 C5         10.5243   30.1190   31.8528           1 CAN         0.0000
+      6 C6         12.3942   31.2627   36.0033           1 CAN         0.0000
+      7 C7         12.3703   30.0954   35.2910           1 CAN         0.0000
+      8 C8         12.7652   28.9134   36.0072           1 CAN         0.0000
+      9 C9         16.4753   23.1064   38.0609           1 CAN         0.0000
+     10 C10        16.6480   24.2419   37.2829           1 CAN         0.0000
+     11 C11        18.8581   22.9199   38.3189           1 CAN         0.0000
+     12 C12        19.0369   24.0561   37.5370           1 CAN         0.0000
+     13 C13        11.8010   30.1533   33.9700           1 CAN         0.0000
+     14 C14        12.9774   29.2537   37.3208           1 CAN         0.0000
+     15 C15        13.3581   27.0671   37.7950           1 CAN         0.0000
+     16 C16        13.0045   27.5630   35.4958           1 CAN         0.0000
+     17 C17        14.5101   24.5014   36.0984           1 CAN         0.0000
+     18 C18        15.0909   22.6148   38.4220           1 CAN         0.0000
+     19 C19        19.6116   21.0974   39.7116           1 CAN         0.0000
+     20 C20        21.4990   23.9541   37.7136           1 CAN         0.0000
+     21 C21        13.2073   25.2763   36.0792           1 CAN         0.0000
+     22 N1         13.3019   28.3134   38.2813           1 CAN         0.0000
+     23 N2         13.2604   26.6887   36.4706           1 CAN         0.0000
+     24 N3         15.5062   24.9862   36.8657           1 CAN         0.0000
+     25 O1         13.0317   27.1909   34.3441           1 CAN         0.0000
+     26 O2         14.6486   23.4801   35.4573           1 CAN         0.0000
+     27 O3         19.9073   22.2497   38.8981           1 CAN         0.0000
+     28 O4         20.2810   24.5742   37.2617           1 CAN         0.0000
+     29 S1         11.0122   28.9270   33.0152           1 CAN         0.0000
+     30 S2         12.7971   30.9513   37.6624           1 CAN         0.0000
+@<TRIPOS>BOND
+     1    1    2 0
+     2    1    5 0
+     3    2   13 0
+     4    3   11 0
+     5    3    9 0
+     6    4   12 0
+     7    4   10 0
+     8    5   29 0
+     9    6    7 0
+    10    6   30 0
+    11    7    8 0
+    12    7   13 0
+    13    8   14 0
+    14    8   16 0
+    15    9   10 0
+    16    9   18 0
+    17   10   24 0
+    18   11   27 0
+    19   11   12 0
+    20   12   28 0
+    21   13   29 0
+    22   14   22 0
+    23   14   30 0
+    24   15   22 0
+    25   15   23 0
+    26   16   23 0
+    27   16   25 0
+    28   17   21 0
+    29   17   24 0
+    30   17   26 0
+    31   19   27 0
+    32   20   28 0
+    33   21   23 0

+ 109 - 0
examples/sp-ido40.mol2

@@ -0,0 +1,109 @@
+@<TRIPOS>MOLECULE
+ZINC12921206_1
+ 49 52 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 C1          0.6216   -0.2760   -6.8683 C.ar    1  <1>        -0.0541
+      2 C2          0.3683   -0.8994   -5.6124 C.ar    1  <1>        -0.0469
+      3 C3         -0.4563   -9.3380   -7.8499 C.ar    1  <1>        -0.0116
+      4 C4         -1.3323   -7.7730   -5.7131 C.ar    1  <1>         0.0052
+      5 C5          1.6788    0.6049   -6.8049 C.ar    1  <1>        -0.0237
+      6 C6          1.0293   -0.0678   -2.1894 C.ar    1  <1>        -0.0133
+      7 C7          1.2229   -0.9407   -3.2374 C.ar    1  <1>         0.0102
+      8 C8          1.3908   -2.2806   -2.7857 C.ar    1  <1>         0.0803
+      9 C9          0.4711   -8.6946   -7.0304 C.ar    1  <1>        -0.0256
+     10 C10         0.0332   -7.9122   -5.9620 C.ar    1  <1>         0.0401
+     11 C11        -1.8217   -9.1987   -7.6011 C.ar    1  <1>         0.1619
+     12 C12        -2.2597   -8.4163   -6.5327 C.ar    1  <1>         0.1633
+     13 C13         1.2418   -0.4712   -4.6368 C.ar    1  <1>         0.0206
+     14 C14         1.3196   -2.3849   -1.4088 C.ar    1  <1>         0.1157
+     15 C15         1.6389   -4.5865   -1.3837 C.ar    1  <1>         0.1003
+     16 C16         1.6164   -3.4830   -3.5741 C.ar    1  <1>         0.2632
+     17 C17         0.6789   -6.4424   -4.0211 C.2     1  <1>         0.2371
+     18 C18         1.9274   -8.8602   -7.3213 C.3     1  <1>        -0.0378
+     19 C19        -2.1874  -10.6131   -9.4777 C.3     1  <1>         0.0790
+     20 C20        -3.9546   -7.4559   -5.1686 C.3     1  <1>         0.0790
+     21 C21         1.9636   -5.9327   -3.3970 C.3     1  <1>         0.1040
+     22 N1          1.4391   -3.5205   -0.6696 N.ar    1  <1>        -0.2298
+     23 N2          1.7329   -4.6371   -2.7763 N.ar    1  <1>        -0.2898
+     24 N3          0.9585   -7.2517   -5.1191 N.am    1  <1>        -0.2837
+     25 O1          1.6923   -3.4619   -4.7990 O.2     1  <1>        -0.2669
+     26 O2         -0.4256   -6.1563   -3.5700 O.2     1  <1>        -0.2735
+     27 O3         -2.7176   -9.8330   -8.4079 O.3     1  <1>        -0.4914
+     28 O4         -3.5906   -8.2727   -6.2796 O.3     1  <1>        -0.4914
+     29 S1          2.3612    0.6778   -5.2360 S.2     1  <1>        -0.0966
+     30 S2          1.0512   -0.8647   -0.6747 S.2     1  <1>        -0.0763
+     31 H1          0.0574   -0.4648   -7.7724 H       1  <1>         0.0623
+     32 H2         -0.4177   -1.6249   -5.4418 H       1  <1>         0.0629
+     33 H3         -0.1187   -9.9489   -8.6838 H       1  <1>         0.0658
+     34 H4         -1.6794   -7.1646   -4.8809 H       1  <1>         0.0674
+     35 H5          2.0837    1.2088   -7.6047 H       1  <1>         0.0693
+     36 H6          0.8746    1.0019   -2.2311 H       1  <1>         0.0700
+     37 H7          1.7520   -5.5673   -0.8940 H       1  <1>         0.1030
+     38 H8          2.2679   -8.0781   -8.0093 H       1  <1>         0.0278
+     39 H9          2.5235   -8.8023   -6.4025 H       1  <1>         0.0278
+     40 H10         2.1317   -9.8382   -7.7740 H       1  <1>         0.0278
+     41 H11        -3.0074  -11.0637  -10.0422 H       1  <1>         0.0660
+     42 H12        -1.5373  -11.3932   -9.0735 H       1  <1>         0.0660
+     43 H13        -1.6073   -9.9638  -10.1376 H       1  <1>         0.0660
+     44 H14        -5.0433   -7.4258   -5.0787 H       1  <1>         0.0660
+     45 H15        -3.5623   -6.4462   -5.3135 H       1  <1>         0.0660
+     46 H16        -3.5278   -7.8834   -4.2582 H       1  <1>         0.0660
+     47 H17         2.7361   -5.8372   -4.1665 H       1  <1>         0.0589
+     48 H18         2.3025   -6.6506   -2.6435 H       1  <1>         0.0589
+     49 H19         1.9385   -7.3938   -5.3543 H       1  <1>         0.1549
+@<TRIPOS>BOND
+     1     1     2   ar
+     2     1     5   ar
+     3     2    13   ar
+     4     3     9   ar
+     5     3    11   ar
+     6     4    10   ar
+     7     4    12   ar
+     8     5    29   ar
+     9     6     7   ar
+    10     6    30   ar
+    11     7     8   ar
+    12     7    13    1
+    13     8    14   ar
+    14     8    16   ar
+    15     9    10   ar
+    16     9    18    1
+    17    10    24    1
+    18    11    12   ar
+    19    11    27    1
+    20    12    28    1
+    21    13    29   ar
+    22    14    22   ar
+    23    14    30   ar
+    24    15    22   ar
+    25    15    23   ar
+    26    16    23   ar
+    27    16    25    2
+    28    17    21    1
+    29    17    24   am
+    30    17    26    2
+    31    19    27    1
+    32    20    28    1
+    33    21    23    1
+    34     1    31    1
+    35     2    32    1
+    36     3    33    1
+    37     4    34    1
+    38     5    35    1
+    39     6    36    1
+    40    15    37    1
+    41    18    38    1
+    42    18    39    1
+    43    18    40    1
+    44    19    41    1
+    45    19    42    1
+    46    19    43    1
+    47    20    44    1
+    48    20    45    1
+    49    20    46    1
+    50    21    47    1
+    51    21    48    1
+    52    24    49    1

+ 0 - 1
src/mol-io/reader/_spec/gro.spec.ts

@@ -32,7 +32,6 @@ describe('gro reader', () => {
             console.log(parsed)
             return;
         }
-
         const groFile = parsed.result;
         const data = groFile.structures[0];
 

+ 379 - 0
src/mol-io/reader/_spec/mol2.spec.ts

@@ -0,0 +1,379 @@
+
+import Mol2 from '../mol2/parser'
+
+const Mol2String = `@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1`
+
+const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1
+@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3     1  LIG1       -0.3859
+      2 O          -2.2941    1.0781   -1.7979 O.3     1  LIG1       -0.5033
+      3 O          -3.6584    0.5842    0.5722 O.3     1  LIG1       -0.5033
+      4 N           2.6359    1.0243    0.7030 N.3     1  LIG1       -0.3162
+      5 C           1.6787   -1.1447   -0.0373 C.3     1  LIG1        0.0927
+      6 C           0.2684   -0.6866    0.1208 C.ar    1  LIG1       -0.0143
+      7 C           2.6376    0.0193   -0.3576 C.3     1  LIG1        0.0258
+      8 C          -0.3658   -0.0099   -0.9212 C.ar    1  LIG1       -0.0109
+      9 C          -0.4164   -0.9343    1.3105 C.ar    1  LIG1       -0.0524
+     10 C          -1.6849    0.4191   -0.7732 C.ar    1  LIG1        0.1586
+     11 C          -1.7353   -0.5053    1.4585 C.ar    1  LIG1       -0.0162
+     12 C          -2.3696    0.1713    0.4166 C.ar    1  LIG1        0.1582
+     13 C           3.5645    2.1013    0.3950 C.3     1  LIG1       -0.0157
+     14 H           2.0210   -1.6511    0.8741 H       1  LIG1        0.0656
+     15 H           2.3808    0.4742   -1.3225 H       1  LIG1        0.0453
+     16 H           3.6478   -0.3931   -0.4831 H       1  LIG1        0.0453
+     17 H           0.1501    0.1801   -1.8589 H       1  LIG1        0.0659
+     18 H           0.0640   -1.4598    2.1315 H       1  LIG1        0.0622
+     19 H           2.9013    0.5888    1.5858 H       1  LIG1        0.1217
+     20 H          -2.2571   -0.7050    2.3907 H       1  LIG1        0.0655
+     21 H           2.6646   -2.4067   -1.1652 H       1  LIG1        0.2103
+     22 H           3.2862    2.6124   -0.5325 H       1  LIG1        0.0388
+     23 H           4.5925    1.7346    0.3078 H       1  LIG1        0.0388
+     24 H           3.5401    2.8441    1.1985 H       1  LIG1        0.0388
+     25 H          -3.2008    1.2997   -1.5231 H       1  LIG1        0.2923
+     26 H          -3.9690    0.3259    1.4570 H       1  LIG1        0.2923
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1`
+
+const Mol2StringMinimal = `@<TRIPOS>MOLECULE
+5816
+ 26 26 0 0 0
+SMALL
+GASTEIGER
+
+@<TRIPOS>ATOM
+      1 O           1.7394   -2.1169   -1.0894 O.3 
+      2 O          -2.2941    1.0781   -1.7979 O.3 
+      3 O          -3.6584    0.5842    0.5722 O.3 
+      4 N           2.6359    1.0243    0.7030 N.3
+      5 C           1.6787   -1.1447   -0.0373 C.3
+      6 C           0.2684   -0.6866    0.1208 C.ar 
+      7 C           2.6376    0.0193   -0.3576 C.3  
+      8 C          -0.3658   -0.0099   -0.9212 C.ar  
+      9 C          -0.4164   -0.9343    1.3105 C.ar 
+     10 C          -1.6849    0.4191   -0.7732 C.ar 
+     11 C          -1.7353   -0.5053    1.4585 C.ar  
+     12 C          -2.3696    0.1713    0.4166 C.ar   
+     13 C           3.5645    2.1013    0.3950 C.3     
+     14 H           2.0210   -1.6511    0.8741 H       
+     15 H           2.3808    0.4742   -1.3225 H       
+     16 H           3.6478   -0.3931   -0.4831 H      
+     17 H           0.1501    0.1801   -1.8589 H   
+     18 H           0.0640   -1.4598    2.1315 H   
+     19 H           2.9013    0.5888    1.5858 H       
+     20 H          -2.2571   -0.7050    2.3907 H       
+     21 H           2.6646   -2.4067   -1.1652 H       
+     22 H           3.2862    2.6124   -0.5325 H      
+     23 H           4.5925    1.7346    0.3078 H      
+     24 H           3.5401    2.8441    1.1985 H      
+     25 H          -3.2008    1.2997   -1.5231 H   
+     26 H          -3.9690    0.3259    1.4570 H   
+@<TRIPOS>BOND
+     1     1     5    1
+     2     1    21    1
+     3     2    10    1
+     4     2    25    1
+     5     3    12    1
+     6     3    26    1
+     7     4     7    1
+     8     4    13    1
+     9     4    19    1
+    10     5     6    1
+    11     5     7    1
+    12     5    14    1
+    13     6     8   ar
+    14     6     9   ar
+    15     7    15    1
+    16     7    16    1
+    17     8    10   ar
+    18     8    17    1
+    19     9    11   ar
+    20     9    18    1
+    21    10    12   ar
+    22    11    12   ar
+    23    11    20    1
+    24    13    22    1
+    25    13    23    1
+    26    13    24    1`
+
+describe('mol2 reader', () => {
+    it('basic', async () => {
+        const parsed =  await Mol2(Mol2String)();
+        if (parsed.isError) {
+            console.log(parsed)
+            return;
+        }
+        const mol2File = parsed.result;
+        const data = mol2File.structures[0];
+        const { molecule, atoms, bonds } = data;
+
+        expect(molecule.mol_name).toBe('5816')
+        expect(molecule.num_atoms).toBe(26)
+        expect(molecule.num_bonds).toBe(26);
+        expect(molecule.num_subst).toBe(0);
+        expect(molecule.num_feat).toBe(0);
+        expect(molecule.num_sets).toBe(0);
+        expect(molecule.mol_type).toBe("SMALL")
+        expect(molecule.charge_type).toBe("GASTEIGER");
+        expect(molecule.status_bits).toBe("");
+        expect(molecule.mol_comment).toBe("");
+
+        expect(atoms.count).toBe(26);
+        expect(atoms.atom_id.value(0)).toBe(1);
+        expect(atoms.atom_name.value(0)).toBe('O');
+        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        expect(atoms.atom_type.value(0)).toBe("O.3");
+        ///// optionals
+        expect(atoms.subst_id.value(0)).toBe(1);
+        expect(atoms.subst_name.value(0)).toBe('LIG1');
+        expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
+        expect(atoms.status_bit.value(0)).toBe('');
+
+        expect(bonds.count).toBe(26);
+        expect(bonds.bond_id.value(0)).toBe(1);
+        expect(bonds.origin_atom_id.value(0)).toBe(1);
+        expect(bonds.target_atom_id.value(0)).toBe(5);
+        expect(bonds.bond_type.value(0)).toBe('1');
+        /////// optional
+        expect(bonds.status_bits.value(0)).toBe('');
+
+    });
+
+    it('multiblocks', async () => {
+        const parsed =  await Mol2(Mol2StringMultiBlocks)();
+        if (parsed.isError) {
+            console.log(parsed)
+            return;
+        }
+        const mol2File = parsed.result;
+        const data = mol2File.structures[1];
+        const { molecule, atoms, bonds } = data;
+
+        expect(molecule.mol_name).toBe('5816')
+        expect(molecule.num_atoms).toBe(26)
+        expect(molecule.num_bonds).toBe(26);
+        expect(molecule.num_subst).toBe(0);
+        expect(molecule.num_feat).toBe(0);
+        expect(molecule.num_sets).toBe(0);
+        expect(molecule.mol_type).toBe("SMALL")
+        expect(molecule.charge_type).toBe("GASTEIGER");
+        expect(molecule.status_bits).toBe("");
+        expect(molecule.mol_comment).toBe("");
+
+        expect(atoms.count).toBe(26);
+        expect(atoms.atom_id.value(0)).toBe(1);
+        expect(atoms.atom_name.value(0)).toBe('O');
+        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        expect(atoms.atom_type.value(0)).toBe("O.3");
+        ///// optionals
+        expect(atoms.subst_id.value(0)).toBe(1);
+        expect(atoms.subst_name.value(0)).toBe('LIG1');
+        expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
+        expect(atoms.status_bit.value(0)).toBe('');
+
+        expect(bonds.count).toBe(26);
+        expect(bonds.bond_id.value(0)).toBe(1);
+        expect(bonds.origin_atom_id.value(0)).toBe(1);
+        expect(bonds.target_atom_id.value(0)).toBe(5);
+        expect(bonds.bond_type.value(0)).toBe('1');
+        /////// optional
+        expect(bonds.status_bits.value(0)).toBe('');
+
+    });
+
+    it('minimal', async () => {
+        const parsed =  await Mol2(Mol2StringMinimal)();
+        if (parsed.isError) {
+            console.log(parsed)
+            return;
+        }
+        const mol2File = parsed.result;
+        const data = mol2File.structures[0];
+        const { molecule, atoms, bonds } = data;
+
+        expect(molecule.mol_name).toBe('5816')
+        expect(molecule.num_atoms).toBe(26)
+        expect(molecule.num_bonds).toBe(26);
+        expect(molecule.num_subst).toBe(0);
+        expect(molecule.num_feat).toBe(0);
+        expect(molecule.num_sets).toBe(0);
+        expect(molecule.mol_type).toBe("SMALL")
+        expect(molecule.charge_type).toBe("GASTEIGER");
+        expect(molecule.status_bits).toBe("");
+        expect(molecule.mol_comment).toBe("");
+
+        expect(atoms.count).toBe(26);
+        expect(atoms.atom_id.value(0)).toBe(1);
+        expect(atoms.atom_name.value(0)).toBe('O');
+        expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
+        expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
+        expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
+        expect(atoms.atom_type.value(0)).toBe("O.3");
+        ///// optionals
+        expect(atoms.subst_id.value(0)).toBe(0);
+        expect(atoms.subst_name.value(0)).toBe('');
+        expect(atoms.charge.value(0)).toBeCloseTo(0);
+        expect(atoms.status_bit.value(0)).toBe('');
+
+        expect(bonds.count).toBe(26);
+        expect(bonds.bond_id.value(0)).toBe(1);
+        expect(bonds.origin_atom_id.value(0)).toBe(1);
+        expect(bonds.target_atom_id.value(0)).toBe(5);
+        expect(bonds.bond_type.value(0)).toBe('1');
+        /////// optional
+        expect(bonds.status_bits.value(0)).toBe('');
+
+    });
+});

+ 359 - 0
src/mol-io/reader/mol2/parser.ts

@@ -0,0 +1,359 @@
+//               NOTES                
+//When want to created undefined string column, must use 
+// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
+// but not 
+// const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr);
+// because latter actuall return a column of zeros
+import { Column } from 'mol-data/db'
+import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'
+import TokenColumn from '../common/text/column/token'
+import * as Schema from './schema'
+import Result from '../result'
+import Computation from 'mol-util/computation' 
+
+interface State {
+    tokenizer: Tokenizer,
+    molecule: Schema.Molecule,
+    chunker: Computation.Chunker
+}
+
+
+
+function createEmptyMolecule(): Schema.Molecule {
+    return {
+        mol_name: '',
+        num_atoms: 0,
+        num_bonds: 0,
+        num_subst: 0,
+        num_feat: 0,
+        num_sets: 0,
+        mol_type: '',
+        charge_type: '',
+        status_bits:'',
+        mol_comment: ''
+    };
+}
+
+
+
+
+function State(tokenizer: Tokenizer, ctx: Computation.Context): State { 
+    return {
+        tokenizer,
+        molecule: createEmptyMolecule(),
+        chunker: Computation.chunker(ctx, 100000)
+    };
+}
+
+
+
+
+
+function handleMolecule(state: State) {
+    const { tokenizer, molecule } = state;
+    Tokenizer.markLine(tokenizer); 
+    Tokenizer.markLine(tokenizer);
+    let name = Tokenizer.getTokenString(tokenizer);
+    molecule.mol_name = name;
+
+    Tokenizer.markLine(tokenizer);
+    const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g);
+    molecule.num_atoms = parseInt(values[0]) ? parseInt(values[1]) : 0;
+    molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0;
+    molecule.num_subst = parseInt(values[2]) ? parseInt(values[1]) : 0;
+    molecule.num_feat = parseInt(values[3]) ? parseInt(values[1]) : 0;
+    molecule.num_sets = parseInt(values[4]) ? parseInt(values[1]) : 0;
+
+    Tokenizer.markLine(tokenizer);
+    molecule.mol_type = Tokenizer.getTokenString(tokenizer);
+
+    Tokenizer.markLine(tokenizer);
+    molecule.charge_type = Tokenizer.getTokenString(tokenizer);
+
+    Tokenizer.markLine(tokenizer);
+    if(Tokenizer.getTokenString(tokenizer) == ''){return}
+    else{molecule.status_bits = Tokenizer.getTokenString(tokenizer)}
+
+
+    Tokenizer.markLine(tokenizer);
+    if(Tokenizer.getTokenString(tokenizer) == ''){return}
+    else{molecule.mol_comment = Tokenizer.getTokenString(tokenizer)}
+}
+
+
+function isStatus_bit(aString: String): Boolean{
+    if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP')
+       || aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL')
+       || aString.includes('WATER') || aString.includes('DIRECT')){
+        return true;
+    }
+    return false;
+}
+
+
+async function handleAtoms(state: State): Promise<Schema.Atoms> {
+    const { tokenizer, molecule } = state;
+    let hasSubst_id = false;
+    let hasSubst_name = false;
+    let hasCharge = false;
+    let hasStatus_bit = false;
+
+    // skip empty lines and '@<TRIPOS>ATOM'
+    while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){
+        Tokenizer.markLine(tokenizer);
+    }
+
+    const initialTokenizerPosition = tokenizer.position;
+    const initialTokenizerLineNumber = tokenizer.lineNumber;
+    const firstLine = Tokenizer.readLine(tokenizer);
+    const firstLineArray = firstLine.trim().split(/\s+/g)
+    const firstLineLength = firstLineArray.length;
+    
+
+    // optionals are in order "integer string float string". Use this to find out which column is missing or empty
+    for(let i = 6; i < firstLineLength; i++){
+        if(!isNaN(Number(firstLineArray[i]))){
+            if(firstLineArray[i].indexOf('.') == -1){
+                hasSubst_id = true;
+            }else{
+                hasCharge = true;
+            }
+        }else if(isNaN(Number(firstLineArray[i]))){
+            if(!isStatus_bit(firstLineArray[i])){
+                hasSubst_name = true;
+            }else{
+                hasStatus_bit = true;
+            }
+        }
+    }
+
+    const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);;
+    const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);    
+    const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    // optionals
+    const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
+
+
+    const atom_idTokenColumn = TokenColumn(atom_idTokens);
+    const atom_nameTokenColumn = TokenColumn(atom_nameTokens);
+    const xTokenColumn = TokenColumn(xTokens);
+    const yTokenColumn = TokenColumn(yTokens);
+    const zTokenColumn = TokenColumn(zTokens);
+    const atom_typeColumn = TokenColumn(atom_typeTokens);
+    // optionals
+    const subst_idTokenColumn = TokenColumn(subst_idTokens);
+    const subst_nameTokenColumn = TokenColumn(subst_nameTokens);
+    const chargeTokenColumn = TokenColumn(chargeTokens); 
+    const status_bitTokenColumn = TokenColumn(status_bitTokens);
+     
+    
+    const undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float);
+    const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int);
+    const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str);
+
+    let numOfColumn = 5;
+    if(hasSubst_id){numOfColumn++}
+    if(hasSubst_name){numOfColumn++}
+    if(hasCharge){numOfColumn++}
+    if(hasStatus_bit){numOfColumn++}
+
+    tokenizer.position = initialTokenizerPosition;
+    tokenizer.lineNumber = initialTokenizerLineNumber;
+
+    
+
+
+    const { length } = tokenizer;
+    let linesAlreadyRead = 0;
+    await state.chunker.process(chunkSize => {
+        const linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize);
+        for(let i = 0; i < linesToRead; i++){
+            let subst_idWritten = false;
+            let subst_nameWritten = false;
+            let chargeWritten = false;
+            let status_bitWritten = false;
+            for(let j = 0; j < numOfColumn; j++){
+                Tokenizer.skipWhitespace(tokenizer);
+                Tokenizer.eatValue(tokenizer);
+                switch(j){
+                    case 0:
+                        TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 1:
+                        TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 2:
+                        TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 3:
+                        TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        break;
+                    case 4:
+                        TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break; 
+                    case 5:
+                        TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    default:
+                        if(hasSubst_id == true && subst_idWritten == false){
+                            TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            subst_idWritten = true;
+                        }else if(hasSubst_name == true && subst_nameWritten == false){
+                            TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            subst_nameWritten = true;
+                        }else if(hasCharge == true && chargeWritten == false){
+                            TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            chargeWritten = true;
+                        }else if(hasStatus_bit == true && status_bitWritten == false){
+                            TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                            status_bitWritten = true;
+                        }
+                }
+            }
+        }
+        linesAlreadyRead += linesToRead;
+        return linesToRead;
+    }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length }));
+
+
+
+    const ret = {
+        count: molecule.num_atoms,
+        atom_id: atom_idTokenColumn(Column.Schema.int),
+        atom_name: atom_nameTokenColumn(Column.Schema.str), 
+        x: xTokenColumn(Column.Schema.float),
+        y: yTokenColumn(Column.Schema.float),
+        z: zTokenColumn(Column.Schema.float),
+        atom_type: atom_typeColumn(Column.Schema.str),
+        // optional properties
+        subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt, 
+        subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr,
+        charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat, 
+        status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, 
+
+    };
+    return ret;
+}
+
+
+async function handleBonds(state: State): Promise<Schema.Bonds> {
+    const { tokenizer, molecule } = state;
+    let hasStatus_bit = false;
+
+    while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){
+        Tokenizer.markLine(tokenizer);
+    }
+
+    const initialTokenizerPosition = tokenizer.position;
+    const initialTokenizerLineNumber = tokenizer.lineNumber;
+    const firstLine = Tokenizer.readLine(tokenizer);
+    const firstLineArray = firstLine.trim().split(/\s+/g)
+    const firstLineLength = firstLineArray.length;
+    if(firstLineLength == 5){
+        hasStatus_bit = true;
+    }
+
+    const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+    // optional
+    const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
+
+    const bond_idTokenColumn = TokenColumn(bond_idTokens);
+    const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens);
+    const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens);
+    const bondTypeTokenColumn = TokenColumn(bondTypeTokens);
+    // optional
+    const status_bitTokenColumn = TokenColumn(status_bitTokens);
+
+    const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str);
+
+    let numberOfColumn = 4;
+    if(hasStatus_bit){numberOfColumn++}
+
+    tokenizer.position = initialTokenizerPosition;
+    tokenizer.lineNumber = initialTokenizerLineNumber;
+
+    const { length } = tokenizer;
+    let linesAlreadyRead = 0;
+    await state.chunker.process(chunkSize => {
+        const linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize);
+        for(let i = 0; i < linesToRead; i++){
+            for(let j = 0; j < numberOfColumn; j++){
+                Tokenizer.skipWhitespace(tokenizer);
+                Tokenizer.eatValue(tokenizer);
+                switch(j){
+                    case 0:
+                        TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 1:
+                        TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 2:
+                        TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+                        break;
+                    case 3:
+                        TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        break;
+                    default:
+                        TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); 
+                        break;
+                }
+            }
+        }
+        linesAlreadyRead += linesToRead;
+        return linesToRead;
+    }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length }));
+
+    
+
+    const ret = {
+        count: molecule.num_bonds,
+        bond_id: bond_idTokenColumn(Column.Schema.int),
+        origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), 
+        target_atom_id: target_bond_idTokenColumn(Column.Schema.int),
+        bond_type: bondTypeTokenColumn(Column.Schema.str), 
+        status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, 
+    };
+
+    return ret;
+}
+
+
+
+
+async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> {
+    const tokenizer = Tokenizer(data);
+
+    ctx.update({ message: 'Parsing...', current: 0, max: data.length });
+    const structures: Schema.Structure[] = [];
+    while (tokenizer.position < data.length) {
+        const state = State(tokenizer, ctx);
+        handleMolecule(state);
+        const atoms = await handleAtoms(state);
+        const bonds = await handleBonds(state);
+        structures.push({ molecule: state.molecule, atoms, bonds });
+    }
+
+    const result: Schema.File = { structures };
+    return Result.success(result);
+}
+
+
+
+
+
+export function parse(data: string) {
+    return Computation.create<Result<Schema.File>>(async ctx => {
+        return await parseInternal(data, ctx);
+    });
+}
+
+export default parse;