فهرست منبع

Rasmol parser WIP

yakomaxa 2 سال پیش
والد
کامیت
03cce830bc

+ 1 - 1
src/mol-plugin-state/transforms/model.ts

@@ -786,7 +786,7 @@ const StructureSelectionFromScript = PluginStateTransform.BuiltIn({
     from: SO.Molecule.Structure,
     to: SO.Molecule.Structure,
     params: () => ({
-        script: PD.Script({ language: 'pymol', expression: 'all' }),
+        script: PD.Script({ language: 'rasmol', expression: 'all' }),
         label: PD.Optional(PD.Text(''))
     })
 })({

+ 105 - 17
src/mol-script/transpilers/rasmol/parser.ts

@@ -12,7 +12,9 @@ import * as h from '../helper';
 import { MolScriptBuilder } from '../../../mol-script/language/builder';
 const B = MolScriptBuilder;
 import { properties, structureMap, structureDict } from './properties';
-import { special } from './special';
+import { special_properties } from './special_properties';
+import { special_keywords } from './special_keywords';
+import { special_operators } from './special_operators';
 import { operators } from './operators';
 import { keywords } from './keywords';
 import { AtomGroupArgs } from '../types';
@@ -23,6 +25,38 @@ import { OperatorList } from '../types';
 
 // const slash = P.MonadicParser.string('/');
 
+const propertiesDict = h.getPropertyRules(special_properties);
+
+const slash = P.MonadicParser.string('/');
+
+/* is Parser -> MonadicParser substitution correct? */
+function orNull(rule: P.MonadicParser<any>) {
+    return rule.or(P.MonadicParser.of(null));
+}
+
+
+function atomSelectionQuery2(x: any) {
+    const tests: AtomGroupArgs = {};
+    const props: { [k: string]: any[] } = {};
+
+    for (const k in x) {
+        const ps = special_properties[k];
+        if (!ps) {
+            throw new Error(`property '${k}' not supported, value '${x[k]}'`);
+        }
+        if (x[k] === null) continue;
+        if (!props[ps.level]) props[ps.level] = [];
+        props[ps.level].push(x[k]);
+    }
+
+    for (const p in props) {
+        tests[p] = h.andExpr(props[p]);
+    }
+
+    return B.struct.generator.atomGroups(tests);
+}
+
+
 
 // <, <=, =, >=, >, !=, and LIKE
 const valueOperators: OperatorList = [
@@ -127,23 +161,77 @@ const lang = P.MonadicParser.createLanguage({
     Expression: function (r: any) {
         return P.MonadicParser.alt(
 	    //	    r.NamedAtomProperties,
-	    r.RangeListProperty,
-	    r.Keywords,
-	    r.Resno.lookahead(P.MonadicParser.regexp(/\s*(?!(LIKE|>=|<=|!=|[:^%/.=><]))/i)).map((x: any) => B.struct.generator.atomGroups({
-                'residue-test': B.core.rel.eq([B.ammp('auth_seq_id'), x])
-	    })),
-	    r.AtomExpression.map(atomExpressionQuery),
-
-	    r.ValueQuery,
-
-	    r.Element.map((x: string) => B.struct.generator.atomGroups({
-                'atom-test': B.core.rel.eq([B.acp('elementSymbol'), B.struct.type.elementSymbol(x)])
-	    })),
-	    r.Resname.map((x: string) => B.struct.generator.atomGroups({
-                'residue-test': B.core.rel.eq([B.ammp('label_comp_id'), x])
-	    })),
+	    r.AtomSelectionMacro.map(atomSelectionQuery2),
+        );
+    },
+
+        AtomSelectionMacro: function (r: any) {
+        return P.MonadicParser.alt(
+            slash.then(P.MonadicParser.alt(
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty).skip(slash),
+                    orNull(propertiesDict.segi).skip(slash),
+                    orNull(propertiesDict.chain).skip(slash),
+                    orNull(propertiesDict.resi).skip(slash),
+                    orNull(propertiesDict.name)
+                ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3], name: x[4] }; }),
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty).skip(slash),
+                    orNull(propertiesDict.segi).skip(slash),
+                    orNull(propertiesDict.chain).skip(slash),
+                    orNull(propertiesDict.resi)
+                ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3] }; }),
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty).skip(slash),
+                    orNull(propertiesDict.segi).skip(slash),
+                    orNull(propertiesDict.chain)
+                ).map(x => { return { object: x[0], segi: x[1], chain: x[2] }; }),
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty).skip(slash),
+                    orNull(propertiesDict.segi)
+                ).map(x => { return { object: x[0], segi: x[1] }; }),
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty)
+                ).map(x => { return { object: x[0] }; }),
+            )),
+            P.MonadicParser.alt(
+                P.MonadicParser.seq(
+                    orNull(r.ObjectProperty).skip(slash),
+                    orNull(propertiesDict.segi).skip(slash),
+                    orNull(propertiesDict.chain).skip(slash),
+                    orNull(propertiesDict.resi).skip(slash),
+                    orNull(propertiesDict.name)
+                ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3], name: x[4] }; }),
+                P.MonadicParser.seq(
+                    orNull(propertiesDict.segi).skip(slash),
+                    orNull(propertiesDict.chain).skip(slash),
+                    orNull(propertiesDict.resi).skip(slash),
+                    orNull(propertiesDict.name)
+                ).map(x => { return { segi: x[0], chain: x[1], resi: x[2], name: x[3] }; }),
+                P.MonadicParser.seq(
+                    orNull(propertiesDict.chain).skip(slash),
+                    orNull(propertiesDict.resi).skip(slash),
+                    orNull(propertiesDict.name)
+                ).map(x => { return { chain: x[0], resi: x[1], name: x[2] }; }),
+                P.MonadicParser.seq(
+                    orNull(propertiesDict.resi).skip(slash),
+                    orNull(propertiesDict.name)
+                ).map(x => { return { resi: x[0], name: x[1] }; }),
+            )
         );
+	},
+
+    ObjectProperty: () => {
+	const w = h.getReservedWords(special_properties, special_keywords, special_operators)
+              .sort(h.strLenSortFn).map(h.escapeRegExp).join('|');
+        return P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i'));
     },
+    Object: (r: any) => {
+        return r.ObjectProperty.notFollowedBy(slash)
+            .map((x: any) => { throw new Error(`property 'object' not supported, value '${x}'`); });
+    },
+
+
 
     NamedAtomProperties: function () {
         return P.MonadicParser.alt(...h.getNamedPropertyRules(properties));
@@ -159,7 +247,7 @@ const lang = P.MonadicParser.createLanguage({
 
     RangeListProperty: function (r: any) {
         return P.MonadicParser.seq(
-            P.MonadicParser.alt(...h.getPropertyNameRules(special, /\s/))
+            P.MonadicParser.alt(...h.getPropertyNameRules(special_properties, /\s/))
                 .skip(P.MonadicParser.whitespace),
             P.MonadicParser.alt(
                 r.ValueRange,

+ 0 - 64
src/mol-script/transpilers/rasmol/special.ts

@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
- * @author Alexander Rose <alexander.rose@weirdbyte.de>
- * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
- *
- * @author Koya Sakuma
- * This module was taken from jmol transpiler from MolQL and modified in similar manner as pymol and vmd tranpilers.                                             \
-*/
-
-import { MolScriptBuilder } from '../../../mol-script/language/builder';
-const B = MolScriptBuilder;
-import { PropertyDict } from '../types';
-
-const reFloat = /[-+]?[0-9]*\.?[0-9]+/;
-const rePosInt = /[0-9]+/;
-
-function str(x: string) { return x; }
-
-export function sstrucMap(x: string) {
-    return B.struct.type.secondaryStructureFlags(
-        [structureDict[x.toUpperCase()] || 'none']
-    );
-}
-
-
-export const structureDict: {[key: string]: string} = {
-    none: 'none',
-    turn: 'turn',
-    sheet: 'beta',
-    helix: 'helix',
-    dna: 'dna',
-    rna: 'rna',
-
-    0: 'none',
-    1: 'turn',
-    2: 'beta',
-    3: 'helix',
-    4: 'dna',
-    5: 'rna',
-    6: 'carbohydrate',
-    7: '3-10',
-    8: 'alpha',
-    9: 'pi',
-};
-export function structureMap(x: any) {
-    if (x.head && x.head === 'core.type.regex') x = x.args[0].replace(/^\^|\$$/g, '');
-    x = structureDict[x.toString().toLowerCase()] || 'none';
-    if (['dna', 'rna', 'carbohydrate'].indexOf(x) !== -1) {
-        throw new Error("values 'dna', 'rna', 'carbohydrate' not yet supported for 'structure' property");
-    } else {
-        return B.struct.type.secondaryStructureFlags([x]);
-    }
-}
-
-export const special: PropertyDict = {
-    hoge: {
-        '@desc': 'PDB residue number, not including insertion code (see also seqcode, below)',
-        '@examples': ['resno = 100'],
-//        isNumeric: true,
-        regex: /-?[0-9]+/, map: x => parseInt(x),
-        level: 'residue-test', property: B.ammp('auth_seq_id')
-    },
-};
-

+ 232 - 0
src/mol-script/transpilers/rasmol/special_keywords.ts

@@ -0,0 +1,232 @@
+/**
+ * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
+
+import { MolScriptBuilder } from '../../../mol-script/language/builder';
+const B = MolScriptBuilder;
+import * as h from '../helper';
+import { KeywordDict } from '../types';
+
+const ResDict = {
+    nucleic: ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'],
+    protein: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'CYX', 'GLN', 'GLU', 'GLY', 'HIS', 'HID', 'HIE', 'HIP', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'],
+    solvent: ['HOH', 'WAT', 'H20', 'TIP', 'SOL']
+};
+
+const Backbone = {
+    nucleic: ['P', "O3'", "O5'", "C5'", "C4'", "C3'", 'OP1', 'OP2', 'O3*', 'O5*', 'C5*', 'C4*', 'C3*'],
+    protein: ['C', 'N', 'CA', 'O']
+};
+
+
+export const special_keywords: KeywordDict = {
+    all: {
+        '@desc': 'All atoms currently loaded into PyMOL',
+        abbr: ['*'],
+        map: () => B.struct.generator.all()
+    },
+    none: {
+        '@desc': 'No atoms (empty selection)',
+        map: () => B.struct.generator.empty()
+    },
+    hydrogens: {
+        '@desc': 'All hydrogen atoms currently loaded into PyMOL',
+        abbr: ['hydro', 'h.'],
+        map: () => B.struct.generator.atomGroups({
+            'atom-test': B.core.rel.eq([
+                B.acp('elementSymbol'),
+                B.es('H')
+            ])
+        })
+    },
+    hetatm: {
+        '@desc': 'All atoms loaded from Protein Data Bank HETATM records',
+        abbr: ['het'],
+        map: () => B.struct.generator.atomGroups({
+            'atom-test': B.core.rel.eq([B.ammp('isHet'), true])
+        })
+    },
+    visible: {
+        '@desc': 'All atoms in enabled objects with at least one visible representation',
+        abbr: ['v.']
+    },
+    polymer: {
+        '@desc': 'All atoms on the polymer (not het). Finds atoms with residue identifiers matching a known polymer, such a peptide and DNA.',
+        abbr: ['pol.'],
+        map: () => B.struct.generator.atomGroups({
+            'residue-test': B.core.set.has([
+                B.core.type.set(ResDict.nucleic.concat(ResDict.protein)),
+                B.ammp('label_comp_id')
+            ])
+        })
+    },
+    sidechain: {
+        '@desc': 'Polymer non-backbone atoms (new in PyMOL 1.6.1)',
+    },
+    present: {
+        '@desc': 'All atoms with defined coordinates in the current state (used in creating movies)',
+        abbr: ['pr.']
+    },
+    center: {
+        '@desc': 'Pseudo-atom at the center of the scene'
+    },
+    origin: {
+        '@desc': 'Pseudo-atom at the origin of rotation',
+    },
+    enabled: {
+        '@desc': 'All enabled objects or selections from the object list.',
+    },
+    masked: {
+        '@desc': 'All masked atoms.',
+        abbr: ['msk.']
+    },
+    protected: {
+        '@desc': 'All protected atoms.',
+        abbr: ['pr.']
+    },
+    bonded: {
+        '@desc': 'All bonded atoms',
+        map: () => B.struct.generator.atomGroups({
+            'atom-test': B.core.rel.gr([B.struct.atomProperty.core.bondCount({
+                flags: B.struct.type.bondFlags(['covalent', 'metallic', 'sulfide'])
+            }), 0])
+        })
+    },
+    donors: {
+        '@desc': 'All hydrogen bond donor atoms.',
+        abbr: ['don.']
+    },
+    acceptors: {
+        '@desc': 'All hydrogen bond acceptor atoms.',
+        abbr: ['acc.']
+    },
+    fixed: {
+        '@desc': 'All fixed atoms.',
+        abbr: ['fxd.']
+    },
+    restrained: {
+        '@desc': 'All restrained atoms.',
+        abbr: ['rst.']
+    },
+    organic: {
+        '@desc': 'All atoms in non-polymer organic compounds (e.g. ligands, buffers). Finds carbon-containing molecules that do not match known polymers.',
+        abbr: ['org.'],
+        map: () => h.asAtoms(B.struct.modifier.expandProperty({
+            '0': B.struct.modifier.union([
+                B.struct.generator.queryInSelection({
+                    '0': B.struct.generator.atomGroups({
+                        'residue-test': B.core.logic.not([
+                            B.core.set.has([
+                                B.core.type.set(ResDict.nucleic.concat(ResDict.protein)),
+                                B.ammp('label_comp_id')
+                            ])
+                        ])
+                    }),
+                    query: B.struct.generator.atomGroups({
+                        'atom-test': B.core.rel.eq([
+                            B.es('C'),
+                            B.acp('elementSymbol')
+                        ])
+                    })
+                })
+            ]),
+            property: B.ammp('residueKey')
+        }))
+    },
+    inorganic: {
+        '@desc': 'All non-polymer inorganic atoms/ions. Finds atoms in molecules that do not contain carbon and do not match any known solvent residues.',
+        abbr: ['ino.'],
+        map: () => h.asAtoms(B.struct.modifier.expandProperty({
+            '0': B.struct.modifier.union([
+                B.struct.filter.pick({
+                    '0': B.struct.generator.atomGroups({
+                        'residue-test': B.core.logic.not([
+                            B.core.set.has([
+                                B.core.type.set(ResDict.nucleic.concat(ResDict.protein).concat(ResDict.solvent)),
+                                B.ammp('label_comp_id')
+                            ])
+                        ]),
+                        'group-by': B.ammp('residueKey')
+                    }),
+                    test: B.core.logic.not([
+                        B.core.set.has([
+                            B.struct.atomSet.propertySet([B.acp('elementSymbol')]),
+                            B.es('C')
+                        ])
+                    ])
+                })
+            ]),
+            property: B.ammp('residueKey')
+        }))
+    },
+    solvent: {
+        '@desc': 'All water molecules. The hardcoded solvent residue identifiers are currently: HOH, WAT, H20, TIP, SOL.',
+        abbr: ['sol.'],
+        map: () => B.struct.generator.atomGroups({
+            'residue-test': B.core.set.has([
+                B.core.type.set(ResDict.solvent),
+                B.ammp('label_comp_id')
+            ])
+        })
+    },
+    guide: {
+        '@desc': 'All protein CA and nucleic acid C4*/C4',
+        map: () => B.struct.combinator.merge([
+            B.struct.generator.atomGroups({
+                'atom-test': B.core.rel.eq([
+                    B.atomName('CA'),
+                    B.ammp('label_atom_id')
+                ]),
+                'residue-test': B.core.set.has([
+                    B.core.type.set(ResDict.protein),
+                    B.ammp('label_comp_id')
+                ])
+            }),
+            B.struct.generator.atomGroups({
+                'atom-test': B.core.set.has([
+                    h.atomNameSet(['C4*', 'C4']),
+                    B.ammp('label_atom_id')
+                ]),
+                'residue-test': B.core.set.has([
+                    B.core.type.set(ResDict.nucleic),
+                    B.ammp('label_comp_id')
+                ])
+            })
+        ]),
+    },
+    metals: {
+        '@desc': 'All metal atoms (new in PyMOL 1.6.1)'
+    },
+    backbone: {
+        '@desc': 'the C, N, CA, and O atoms of a protein and the equivalent atoms in a nucleic acid.',
+        map: () => B.struct.generator.atomGroups({
+            'atom-test': B.core.set.has([
+                B.core.type.set(Backbone.protein.concat(ResDict.protein)),
+                B.ammp('label_atom_id')
+	    ])
+        }),
+    },
+    proteinxxxxxx: {
+        '@desc': 'protein................',
+        abbr: ['polymer.protein'],
+        map: () => B.struct.generator.atomGroups({
+            'residue-test': B.core.set.has([
+                B.core.type.set(ResDict.protein),
+                B.ammp('label_comp_id')
+            ])
+        })
+    },
+    nucleicxxxxx: {
+        '@desc': 'protein................',
+        abbr: ['polymer.nucleic'],
+        map: () => B.struct.generator.atomGroups({
+            'residue-test': B.core.set.has([
+                B.core.type.set(ResDict.nucleic),
+                B.ammp('label_comp_id')
+            ])
+        })
+    }
+};

+ 369 - 0
src/mol-script/transpilers/rasmol/special_operators.ts

@@ -0,0 +1,369 @@
+/**
+ * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
+
+import * as P from '../../../mol-util/monadic-parser';
+import * as h from '../helper';
+import { MolScriptBuilder } from '../../../mol-script/language/builder';
+const B = MolScriptBuilder;
+import { OperatorList } from '../types';
+import { Expression } from '../../language/expression';
+
+export const special_operators: OperatorList = [
+    {
+        '@desc': 'Selects atoms that are not included in s1.',
+        '@examples': [
+            'NOT resn ALA',
+            'not (resi 42 or chain A)',
+            '!resi 42 or chain A',
+        ],
+        name: 'not',
+        type: h.prefix,
+        rule: P.MonadicParser.alt(
+            P.MonadicParser.regexp(/NOT/i).skip(P.MonadicParser.whitespace),
+            P.MonadicParser.string('!').skip(P.MonadicParser.optWhitespace)
+        ),
+        map: (op, selection) => h.invertExpr(selection),
+    },
+    {
+        '@desc': 'Selects atoms included in both s1 and s2.',
+        '@examples': ['chain A AND name CA'],
+        name: 'and',
+        type: h.binaryLeft,
+        rule: h.infixOp(/AND|&/i),
+        map: (op, selection, by) =>
+            B.struct.modifier.intersectBy({ 0: selection, by }),
+    },
+    {
+        '@desc': 'Selects atoms included in either s1 or s2.',
+        '@examples': ['chain A OR chain B'],
+        name: 'or',
+        type: h.binaryLeft,
+        rule: h.infixOp(/OR|\|/i),
+        map: (op: string, s1: Expression, s2: Expression) => B.struct.combinator.merge([s1, s2]),
+    },
+    {
+        '@desc':
+            'Selects atoms in s1 whose identifiers name, resi, resn, chain and segi all match atoms in s2.',
+        '@examples': ['chain A IN chain B'],
+        name: 'in',
+        type: h.binaryLeft,
+        rule: h.infixOp(/IN/i),
+        map: (op: string, selection: Expression, source: Expression) => {
+            return B.struct.filter.withSameAtomProperties({
+                0: selection,
+                source,
+                property: B.core.type.compositeKey([
+                    B.ammp('label_atom_id'),
+                    B.ammp('label_seq_id'),
+                    B.ammp('label_comp_id'),
+                    B.ammp('auth_asym_id'),
+                    B.ammp('label_asym_id'),
+                ]),
+            });
+        },
+    },
+    {
+        '@desc':
+            'Selects atoms in s1 whose identifiers name and resi match atoms in s2.',
+        '@examples': ['chain A LIKE chain B'],
+        name: 'like',
+        type: h.binaryLeft,
+        rule: h.infixOp(/LIKE|l\./i),
+        map: (op: string, selection: Expression, source: Expression) => {
+            return B.struct.filter.withSameAtomProperties({
+                0: selection,
+                source,
+                property: B.core.type.compositeKey([
+                    B.ammp('label_atom_id'),
+                    B.ammp('label_seq_id'),
+                ]),
+            });
+        },
+    },
+    {
+        '@desc':
+            'Selects all atoms whose van der Waals radii are separated from the van der Waals radii of s1 by a minimum of X Angstroms.',
+        '@examples': ['solvent GAP 2'],
+        name: 'gap',
+        type: h.postfix,
+        rule: h
+            .postfixOp(/GAP\s+([-+]?[0-9]*\.?[0-9]+)/i, 1)
+            .map((x: any) => parseFloat(x)),
+        map: (distance: number, target: Expression) => {
+            return B.struct.filter.within({
+                '0': B.struct.generator.all(),
+                target,
+                'atom-radius': B.acp('vdw'),
+                'max-radius': distance,
+                invert: true,
+            });
+        },
+    },
+    {
+        '@desc':
+            'Selects atoms with centers within X Angstroms of the center of any atom in s1.',
+        '@examples': ['resname LIG AROUND 1'],
+        name: 'around',
+        abbr: ['a.'],
+        type: h.postfix,
+        rule: h
+            .postfixOp(/(AROUND|a\.)\s+([-+]?[0-9]*\.?[0-9]+)/i, 2)
+            .map((x: any) => parseFloat(x)),
+        map: (radius: number, target: Expression) => {
+            return B.struct.modifier.exceptBy({
+                '0': B.struct.filter.within({
+                    '0': B.struct.generator.all(),
+                    target,
+                    'max-radius': radius,
+                }),
+                by: target,
+            });
+        },
+    },
+    {
+        '@desc':
+            'Expands s1 by all atoms within X Angstroms of the center of any atom in s1.',
+        '@examples': ['chain A EXPAND 3'],
+        name: 'expand',
+        abbr: ['x.'],
+        type: h.postfix,
+        rule: h
+            .postfixOp(/(EXPAND|x\.)\s+([-+]?[0-9]*\.?[0-9]+)/i, 2)
+            .map((x: any) => parseFloat(x)),
+        map: (radius: number, selection: Expression) => {
+            return B.struct.modifier.includeSurroundings({ 0: selection, radius });
+        },
+    },
+    {
+        '@desc':
+            'Selects atoms in s1 that are within X Angstroms of any atom in s2.',
+        '@examples': ['chain A WITHIN 3 OF chain B'],
+        name: 'within',
+        abbr: ['w.'],
+        type: h.binaryLeft,
+        rule: h.ofOp('WITHIN', 'w.'),
+        map: (radius: number, selection: Expression, target: Expression) => {
+            return B.struct.filter.within({
+                0: selection,
+                target,
+                'max-radius': radius,
+            });
+        },
+    },
+    {
+        '@desc':
+            'Same as within, but excludes s2 from the selection (and thus is identical to s1 and s2 around X).',
+        '@examples': ['chain A NEAR_TO 3 OF chain B'],
+        name: 'near_to',
+        abbr: ['nto.'],
+        type: h.binaryLeft,
+        rule: h.ofOp('NEAR_TO', 'nto.'),
+        map: (radius: number, selection: Expression, target: Expression) => {
+            return B.struct.modifier.exceptBy({
+                '0': B.struct.filter.within({
+                    '0': selection,
+                    target,
+                    'max-radius': radius,
+                }),
+                by: target,
+            });
+        },
+    },
+    {
+        '@desc': 'Selects atoms in s1 that are at least X Anstroms away from s2.',
+        '@examples': ['solvent BEYOND 2 OF chain A'],
+        name: 'beyond',
+        abbr: ['be.'],
+        type: h.binaryLeft,
+        rule: h.ofOp('BEYOND', 'be.'),
+        map: (radius: number, selection: Expression, target: Expression) => {
+            return B.struct.modifier.exceptBy({
+                '0': B.struct.filter.within({
+                    '0': selection,
+                    target,
+                    'max-radius': radius,
+                    invert: true,
+                }),
+                by: target,
+            });
+        },
+    },
+    {
+        '@desc': 'Expands selection to complete residues.',
+        '@examples': ['BYRESIDUE name N'],
+        name: 'byresidue',
+        abbr: ['byresi', 'byres', 'br.'],
+        type: h.prefix,
+        rule: h.prefixOp(/BYRESIDUE|byresi|byres|br\./i),
+        map: (op: string, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.expandProperty({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                    property: B.ammp('residueKey'),
+                })
+            );
+        },
+    },
+    {
+        '@desc':
+            'Completely selects all alpha carbons in all residues covered by a selection.',
+        '@examples': ['BYCALPHA chain A'],
+        name: 'bycalpha',
+        abbr: ['bca.'],
+        type: h.prefix,
+        rule: h.prefixOp(/BYCALPHA|bca\./i),
+        map: (op: string, selection: Expression) => {
+            return B.struct.generator.queryInSelection({
+                '0': B.struct.modifier.expandProperty({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                    property: B.ammp('residueKey'),
+                }),
+                query: B.struct.generator.atomGroups({
+                    'atom-test': B.core.rel.eq([
+                        B.atomName('CA'),
+                        B.ammp('label_atom_id'),
+                    ]),
+                }),
+            });
+        },
+    },
+    {
+        '@desc': 'Expands selection to complete molecules.',
+        '@examples': ['BYMOLECULE resi 20-30'],
+        name: 'bymolecule',
+        isUnsupported: true, // structure-query.atom-property.topology.connected-component-key' is not implemented
+        abbr: ['bymol', 'bm.'],
+        type: h.prefix,
+        rule: h.prefixOp(/BYMOLECULE|bymol|bm\./i),
+        map: (op: string, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.expandProperty({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                    property: B.atp('connectedComponentKey'),
+                })
+            );
+        },
+    },
+    {
+        '@desc': 'Expands selection to complete fragments.',
+        '@examples': ['BYFRAGMENT resi 10'],
+        name: 'byfragment',
+        abbr: ['byfrag', 'bf.'],
+        isUnsupported: true,
+        type: h.prefix,
+        rule: h.prefixOp(/BYFRAGMENT|byfrag|bf\./i),
+        map: (op: string, selection: Expression) => [op, selection],
+    },
+    {
+        '@desc': 'Expands selection to complete segments.',
+        '@examples': ['BYSEGMENT resn CYS'],
+        name: 'bysegment',
+        abbr: ['bysegi', 'byseg', 'bs.'],
+        type: h.prefix,
+        rule: h.prefixOp(/BYSEGMENT|bysegi|byseg|bs\./i),
+        map: (op: string, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.expandProperty({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                    property: B.ammp('chainKey'),
+                })
+            );
+        },
+    },
+    {
+        '@desc': 'Expands selection to complete objects.',
+        '@examples': ['BYOBJECT chain A'],
+        name: 'byobject',
+        abbr: ['byobj', 'bo.'],
+        isUnsupported: true,
+        type: h.prefix,
+        rule: h.prefixOp(/BYOBJECT|byobj|bo\./i),
+        map: (op: string, selection: Expression) => [op, selection],
+    },
+    {
+        '@desc': 'Expands selection to unit cell.',
+        '@examples': ['BYCELL chain A'],
+        name: 'bycell',
+        isUnsupported: true,
+        type: h.prefix,
+        rule: h.prefixOp(/BYCELL/i),
+        map: (op: string, selection: Expression) => [op, selection],
+    },
+    {
+        '@desc': 'All rings of size ≤ 7 which have at least one atom in s1.',
+        '@examples': ['BYRING resn HEM'],
+        name: 'byring',
+        // isUnsupported: true, // structure-query.atom-set.atom-count' is not implemented.
+        type: h.prefix,
+        rule: h.prefixOp(/BYRING/i),
+        map: (op: string, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.intersectBy({
+                    '0': B.struct.filter.pick({
+                        '0': B.struct.generator.rings(),
+                        test: B.core.logic.and([
+                            B.core.rel.lte([B.struct.atomSet.atomCount(), 7]),
+                            B.core.rel.gr([B.struct.atomSet.countQuery([selection]), 1]),
+                        ]),
+                    }),
+                    by: selection,
+                })
+            );
+        },
+    },
+    {
+        '@desc': 'Selects atoms directly bonded to s1, excludes s1.',
+        '@examples': ['NEIGHBOR resn CYS'],
+        name: 'neighbor',
+        type: h.prefix,
+        abbr: ['nbr.'],
+        rule: h.prefixOp(/NEIGHBOR|nbr\./i),
+        map: (op: string, selection: Expression) => {
+            return B.struct.modifier.exceptBy({
+                '0': h.asAtoms(
+                    B.struct.modifier.includeConnected({
+                        '0': B.struct.modifier.union({ 0: selection }),
+                        'bond-test': true,
+                    })
+                ),
+                by: selection,
+            });
+        },
+    },
+    {
+        '@desc': 'Selects atoms directly bonded to s1, may include s1.',
+        '@examples': ['BOUND_TO name CA'],
+        name: 'bound_to',
+        abbr: ['bto.'],
+        type: h.prefix,
+        rule: h.prefixOp(/BOUND_TO|bto\./i),
+        map: (op: string, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.includeConnected({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                })
+            );
+        },
+    },
+    {
+        '@desc': 'Extends s1 by X bonds connected to atoms in s1.',
+        '@examples': ['resname LIG EXTEND 3'],
+        name: 'extend',
+        abbr: ['xt.'],
+        type: h.postfix,
+        rule: h.postfixOp(/(EXTEND|xt\.)\s+([0-9]+)/i, 2).map((x: any) => parseInt(x)),
+        map: (count: number, selection: Expression) => {
+            return h.asAtoms(
+                B.struct.modifier.includeConnected({
+                    '0': B.struct.modifier.union({ 0: selection }),
+                    'bond-test': true,
+                    'layer-count': count,
+                })
+            );
+        },
+    },
+];

+ 153 - 0
src/mol-script/transpilers/rasmol/special_properties.ts

@@ -0,0 +1,153 @@
+/**
+ * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
+
+import { MolScriptBuilder } from '../../../mol-script/language/builder';
+const B = MolScriptBuilder;
+import { PropertyDict } from '../types';
+
+const reFloat = /[-+]?[0-9]*\.?[0-9]+/;
+// const rePosInt = /[0-9]+/;
+
+function atomNameListMap(x: string) { return x.split('+').map(B.atomName); }
+function listMap(x: string) { return x.split('+').map(x => x.replace(/^["']|["']$/g, '')); }
+function rangeMap(x: string) {
+    const [min, max] = x.split('-').map(x => parseInt(x));
+    return { min, max };
+}
+function listOrRangeMap(x: string) {
+    return x.includes('-') ? rangeMap(x) : listMap(x).map(x => parseInt(x));
+}
+function elementListMap(x: string) {
+    return x.split('+').map(B.struct.type.elementSymbol);
+}
+
+const sstrucDict: { [k: string]: string } = {
+    H: 'helix',
+    S: 'beta',
+    L: 'none'
+};
+function sstrucListMap(x: string) {
+    return {
+        flags: B.struct.type.secondaryStructureFlags(
+            x.toUpperCase().split('+').map(ss => sstrucDict[ss] || 'none')
+        )
+    };
+}
+
+export const special_properties: PropertyDict = {
+    symbol: {
+        '@desc': 'chemical-symbol-list: list of 1- or 2-letter chemical symbols from the periodic table',
+        '@examples': ['symbol O+N'],
+        abbr: ['e.'], regex: /[a-zA-Z'"+]+/, map: elementListMap,
+        level: 'atom-test', property: B.acp('elementSymbol')
+    },
+    name: {
+        '@desc': 'atom-name-list: list of up to 4-letter codes for atoms in proteins or nucleic acids',
+        '@examples': ['name CA+CB+CG+CD'],
+        abbr: ['n.'], regex: /[a-zA-Z0-9'"+]+/, map: atomNameListMap,
+        level: 'atom-test', property: B.ammp('label_atom_id')
+    },
+    resn: {
+        '@desc': 'residue-name-list: list of 3-letter codes for amino acids or list of up to 2-letter codes for nucleic acids',
+        '@examples': ['resn ASP+GLU+ASN+GLN', 'resn A+G'],
+        abbr: ['resname', 'r.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap,
+        level: 'residue-test', property: B.ammp('label_comp_id')
+    },
+    resi: {
+        '@desc': 'residue-identifier-list list of up to 4-digit residue numbers or residue-identifier-range',
+        '@examples': ['resi 1+10+100+1000', 'resi 1-10'],
+        abbr: ['resident', 'residue', 'resid', 'i.'], regex: /[0-9+-]+/, map: listOrRangeMap,
+        level: 'residue-test', property: B.ammp('auth_seq_id')
+    },
+    alt: {
+        '@desc': 'alternate-conformation-identifier-list list of single letters',
+        '@examples': ['alt A+B', 'alt ""', 'alt ""+A'],
+        abbr: [], regex: /[a-zA-Z0-9'"+]+/, map: listMap,
+        level: 'atom-test', property: B.ammp('label_alt_id')
+    },
+    chain: {
+        '@desc': 'chain-identifier-list list of single letters or sometimes numbers',
+        '@examples': ['chain A'],
+        abbr: ['c.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap,
+        level: 'chain-test', property: B.ammp('auth_asym_id')
+    },
+    segi: {
+        '@desc': 'segment-identifier-list list of up to 4 letter identifiers',
+        '@examples': ['segi lig'],
+        abbr: ['segid', 's.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap,
+        level: 'chain-test', property: B.ammp('label_asym_id')
+    },
+    flag: {
+        '@desc': 'flag-number a single integer from 0 to 31',
+        '@examples': ['flag 0'],
+        isUnsupported: true,
+        abbr: ['f.'], regex: /[0-9]+/, map: x => parseInt(x),
+        level: 'atom-test'
+    },
+    numeric_type: {
+        '@desc': 'type-number a single integer',
+        '@examples': ['nt. 5'],
+        isUnsupported: true,
+        abbr: ['nt.'], regex: /[0-9]+/, map: x => parseInt(x),
+        level: 'atom-test'
+    },
+    text_type: {
+        '@desc': 'type-string a list of up to 4 letter codes',
+        '@examples': ['text_type HA+HC'],
+        isUnsupported: true,
+        abbr: ['tt.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap,
+        level: 'atom-test'
+    },
+    id: {
+        '@desc': 'external-index-number a single integer',
+        '@examples': ['id 23'],
+        regex: /[0-9+-]+/, map: listOrRangeMap,
+        level: 'atom-test', property: B.ammp('id')
+    },
+    index: {
+        '@desc': 'internal-index-number a single integer',
+        '@examples': ['index 11'],
+        regex: /[0-9+-]+/, map: listOrRangeMap,
+        level: 'atom-test', property: B.ammp('id')
+    },
+    ss: {
+        '@desc': 'secondary-structure-type list of single letters. Helical regions should be assigned H and sheet regions S. Loop regions can either be assigned L or be blank.',
+        '@examples': ['ss H+S+L', 'ss S+""'],
+        abbr: [], regex: /[a-zA-Z'"+]+/, map: sstrucListMap,
+        level: 'residue-test', property: B.ammp('secondaryStructureFlags')
+    },
+
+    b: {
+        '@desc': 'comparison-operator b-factor-value a real number',
+        '@examples': ['b > 10'],
+        isNumeric: true,
+        abbr: [], regex: reFloat, map: x => parseFloat(x),
+        level: 'atom-test', property: B.ammp('B_iso_or_equiv')
+    },
+    q: {
+        '@desc': 'comparison-operator occupancy-value a real number',
+        '@examples': ['q <0.50'],
+        isNumeric: true,
+        abbr: [], regex: reFloat, map: x => parseFloat(x),
+        level: 'atom-test', property: B.ammp('occupancy')
+    },
+    formal_charge: {
+        '@desc': 'comparison-operator formal charge-value an integer',
+        '@examples': ['fc. = -1'],
+        isNumeric: true,
+        abbr: ['fc.'], regex: reFloat, map: x => parseFloat(x),
+        level: 'atom-test', property: B.ammp('pdbx_formal_charge')
+    },
+    partial_charge: {
+        '@desc': 'comparison-operator partial charge-value a real number',
+        '@examples': ['pc. > 1'],
+        isUnsupported: true,
+        isNumeric: true,
+        abbr: ['pc.'], regex: reFloat, map: x => parseFloat(x),
+        level: 'atom-test'
+    }
+};