Browse Source

improve jmol transpiler

- add basic within function
- add basic backbone and protein keyword
- allow withspace in parans
Alexander Rose 2 năm trước cách đây
mục cha
commit
e3f6dfad5b

+ 7 - 4
src/mol-script/transpilers/_spec/jmol.spec.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Koya Sakuma <koya.sakuma.work@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -62,14 +62,17 @@ const general = {
         '100, 42, ALA',
         // residue numbering
         '(1-10,15,21-30)',
+        // within
+        'within(5,[HEM])',
         // within with parentheses
-        // '( within(5,[HEM]) ) and backbone',
+        '(within(5,[HEM])) and backbone',
+        '( within(5,[HEM]) ) and backbone',
         // trimming
         '[ALA] and [VAL]  ',
         ' [ALA] and [VAL] ',
         '  [ALA] and [VAL]',
         // within with whitespaces
-        // 'within (   5 ,  [HEM] ) ',
+        'within (   5 ,  [HEM] ) ',
         // un-braketed residue name
         'LEU and ILE',
         // un-parenthesized residue index range
@@ -77,7 +80,7 @@ const general = {
         // un-parenthesized residue index
         '20',
         // within in the head or the middle of sentence
-        // 'within (   5 ,  [HEM] ) and backbone',
+        'within (   5 ,  [HEM] ) and backbone',
     ],
     unsupported: [
         // values outside of comparisons

+ 75 - 19
src/mol-script/transpilers/jmol/keywords.ts

@@ -1,8 +1,11 @@
 /**
- * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
  * @author Koya Sakuma <koya.sakuma.work@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ *
  * Adapted from MolQL project
-**/
+ */
 
 
 import { MolScriptBuilder } from '../../../mol-script/language/builder';
@@ -10,12 +13,34 @@ const B = MolScriptBuilder;
 import * as h from '../helper';
 import { KeywordDict } from '../types';
 
+const ResDict = {
+    acidic: ['ASP', 'GLU'],
+    aliphatic: ['ALA', 'GLY', 'ILE', 'LEU', 'VAL'],
+    amino: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'ASX', 'GLX', 'UNK'],
+    aromatic: ['HIS', 'PHE', 'TRP', 'TYR'],
+    basic: ['ARG', 'HIS', 'LYS'],
+    buried: ['ALA', 'CYS', 'ILE', 'LEU', 'MET', 'PHE', 'TRP', 'VAL'],
+    cg: ['CYT', 'C', 'GUA', 'G'],
+    cyclic: ['HIS', 'PHE', 'PRO', 'TRP', 'TYR'],
+    hydrophobic: ['ALA', 'GLY', 'ILE', 'LEU', 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'VAL'],
+    large: ['ARG', 'GLU', 'GLN', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'TRP', 'TYR'],
+    medium: ['ASN', 'ASP', 'CYS', 'PRO', 'THR', 'VAL'],
+    small: ['ALA', 'GLY', 'SER'],
+
+    nucleic: ['G', 'C', 'A', 'T', 'U', 'I', 'DG', 'DC', 'DA', 'DT', 'DU', 'DI', '+G', '+C', '+A', '+T', '+U', '+I']
+};
+
+const Backbone = {
+    nucleic: ['P', "O3'", "O5'", "C5'", "C4'", "C3'", 'OP1', 'OP2', 'O3*', 'O5*', 'C5*', 'C4*', 'C3*',
+	      "C2'", "C1'", "O4'", "O2'"],
+    protein: ['C', 'N', 'CA']
+};
 
 function nucleicExpr() {
     return B.struct.combinator.merge([
         B.struct.generator.atomGroups({
             'residue-test': B.core.set.has([
-                B.set(...['G', 'C', 'A', 'T', 'U', 'I', 'DG', 'DC', 'DA', 'DT', 'DU', 'DI', '+G', '+C', '+A', '+T', '+U', '+I']),
+                B.set(...ResDict.nucleic),
                 B.ammp('label_comp_id')
             ])
         }),
@@ -46,20 +71,49 @@ function nucleicExpr() {
     ]);
 }
 
-const ResDict = {
-    acidic: ['ASP', 'GLU'],
-    aliphatic: ['ALA', 'GLY', 'ILE', 'LEU', 'VAL'],
-    amino: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'ASX', 'GLX', 'UNK'],
-    aromatic: ['HIS', 'PHE', 'TRP', 'TYR'],
-    basic: ['ARG', 'HIS', 'LYS'],
-    buried: ['ALA', 'CYS', 'ILE', 'LEU', 'MET', 'PHE', 'TRP', 'VAL'],
-    cg: ['CYT', 'C', 'GUA', 'G'],
-    cyclic: ['HIS', 'PHE', 'PRO', 'TRP', 'TYR'],
-    hydrophobic: ['ALA', 'GLY', 'ILE', 'LEU', 'MET', 'PHE', 'PRO', 'TRP', 'TYR', 'VAL'],
-    large: ['ARG', 'GLU', 'GLN', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'TRP', 'TYR'],
-    medium: ['ASN', 'ASP', 'CYS', 'PRO', 'THR', 'VAL'],
-    small: ['ALA', 'GLY', 'SER'],
-};
+// TODO: improve, see keywords.protein['@desc'] below
+function proteinExpr() {
+    return B.struct.generator.atomGroups({
+        'residue-test': B.core.set.has([
+            B.set(...ResDict.amino),
+            B.ammp('label_comp_id')
+        ])
+    });
+}
+
+// TODO: improve, see keywords.backbone['@desc'] below
+function backboneExpr() {
+    return B.struct.combinator.merge([
+	    B.struct.modifier.intersectBy({
+            0: B.struct.generator.atomGroups({
+                'residue-test': B.core.set.has([
+                    B.core.type.set(ResDict.amino),
+                    B.ammp('label_comp_id')
+                ])
+            }),
+		    by: B.struct.generator.atomGroups({
+		       'atom-test': B.core.set.has([
+			        B.core.type.set(Backbone.protein),
+			        B.ammp('label_atom_id')
+                ])
+            })
+        }),
+	    B.struct.modifier.intersectBy({
+            0: B.struct.generator.atomGroups({
+		        'residue-test': B.core.set.has([
+                    B.core.type.set(ResDict.nucleic),
+                    B.ammp('label_comp_id')
+		        ])
+            }),
+		    by: B.struct.generator.atomGroups({
+		        'atom-test': B.core.set.has([
+			        B.core.type.set(Backbone.nucleic),
+			        B.ammp('label_atom_id')
+                ])
+            })
+        }),
+    ]);
+}
 
 export const keywords: KeywordDict = {
     // general terms
@@ -336,7 +390,8 @@ export const keywords: KeywordDict = {
         })
     },
     protein: {
-        '@desc': 'defined as a group that (a) has one of the following group names: ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, ASX, GLX, or UNK; or (b) contains PDB atom designations [C, O, CA, and N] bonded correctly; or (c) does not contain "O" but contains [C, CA, and N] bonded correctly; or (d) has only one atom, which has name CA and does not have the group name CA (indicating a calcium atom).'
+        '@desc': 'defined as a group that (a) has one of the following group names: ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, ASX, GLX, or UNK; or (b) contains PDB atom designations [C, O, CA, and N] bonded correctly; or (c) does not contain "O" but contains [C, CA, and N] bonded correctly; or (d) has only one atom, which has name CA and does not have the group name CA (indicating a calcium atom).',
+        map: () => proteinExpr()
     },
     acidic: {
         '@desc': 'ASP GLU',
@@ -496,7 +551,8 @@ export const keywords: KeywordDict = {
     },
     backbone: {
         '@desc': '(*.C, *.CA, *.N, and all nucleic other than the bases themselves)',
-        abbr: ['mainchain']
+        abbr: ['mainchain'],
+        map: () => backboneExpr()
     },
     sidechain: {
         '@desc': '((protein or nucleic) and not backbone)'

+ 14 - 3
src/mol-script/transpilers/jmol/parser.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Koya Sakuma < koya.sakuma.work@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -18,6 +18,7 @@ import { keywords } from './keywords';
 import { AtomGroupArgs } from '../types';
 import { Transpiler } from '../transpiler';
 import { OperatorList } from '../types';
+import { Expression } from '../../language/expression';
 
 // <, <=, =, >=, >, !=, and LIKE
 const valueOperators: OperatorList = [
@@ -116,7 +117,7 @@ const lang = P.MonadicParser.createLanguage({
             r.Parens,
             r.Operator,
             r.Expression
-        ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')'));
+        ).wrap(P.MonadicParser.regexp(/\(\s*/), P.MonadicParser.regexp(/\s*\)/));
     },
 
     Expression: function (r: any) {
@@ -129,11 +130,12 @@ const lang = P.MonadicParser.createLanguage({
                     B.core.rel.lte([B.ammp('auth_seq_id'), x[1]])
                 ])
             })),
-            r.Resno.lookahead(P.MonadicParser.regexp(/\s*(?!(LIKE|>=|<=|!=|[\[:^%/.=><]))/i)).map((x: any) => B.struct.generator.atomGroups({
+            r.Resno.lookahead(P.MonadicParser.regexp(/\s*(?!(LIKE|>=|<=|!=|[\[:^%/.=><]))/i)).map((x: number) => B.struct.generator.atomGroups({
                 'residue-test': B.core.rel.eq([B.ammp('auth_seq_id'), x])
             })),
             r.AtomExpression.map(atomExpressionQuery),
 
+            r.Within.map((x: [number, Expression]) => B.struct.modifier.includeSurroundings({ 0: x[1], radius: x[0] })),
             r.ValueQuery,
 
             r.Element.map((x: string) => B.struct.generator.atomGroups({
@@ -186,6 +188,15 @@ const lang = P.MonadicParser.createLanguage({
             r.Integer
         ).desc('resno-range');
     },
+    Within: (r: any) => {
+        return P.MonadicParser.regexp(/within/i)
+            .skip(P.MonadicParser.regexp(/\s*\(\s*/))
+            .then(P.MonadicParser.seq(
+                r.Integer.skip(P.MonadicParser.regexp(/\s*,\s*/)),
+                r.Query
+            ))
+            .skip(P.MonadicParser.regexp(/\)/));
+    },
 
     Keywords: () => P.MonadicParser.alt(...h.getKeywordRules(keywords)).desc('keyword'),