Explorar el Código

Merge pull request #73 from JonStargaryen/chem-comp

Support export of charges, stereo flag and SYBYL atom types for ModelServer ligand queries
David Sehnal hace 4 años
padre
commit
3ecd305adf

+ 1 - 1
README.md

@@ -106,7 +106,7 @@ and navigate to `build/viewer`
 ### Other scripts
 **Create chem comp bond table**
 
-    node --max-old-space-size=4096 lib/commonjs/cli/chem-comp-bond/create-table.js build/data/ccb.bcif -b
+    node --max-old-space-size=4096 lib/commonjs/cli/chem-comp-dict/create-table.js build/data/ccb.bcif -b
 
 **Test model server**
 

+ 59 - 10
src/cli/chem-comp-bond/create-table.ts → src/cli/chem-comp-dict/create-table.ts

@@ -23,6 +23,7 @@ import { CCD_Schema } from '../../mol-io/reader/cif/schema/ccd';
 import { SetUtils } from '../../mol-util/set';
 import { DefaultMap } from '../../mol-util/map';
 import { mmCIF_chemCompBond_schema } from '../../mol-io/reader/cif/schema/mmcif-extras';
+import { ccd_chemCompAtom_schema } from '../../mol-io/reader/cif/schema/ccd-extras';
 
 export async function ensureAvailable(path: string, url: string) {
     if (FORCE_DOWNLOAD || !fs.existsSync(path)) {
@@ -135,7 +136,7 @@ function checkAddingBondsFromPVCD(pvcd: DatabaseCollection<CCD_Schema>) {
     }
 }
 
-async function createBonds() {
+async function createBonds(atomsRequested: boolean) {
     await ensureDataAvailable();
     const ccd = await readCCD();
     const pvcd = await readPVCD();
@@ -200,25 +201,68 @@ async function createBonds() {
     });
 
     const bondDatabase =  Database.ofTables(
-        TABLE_NAME,
+        CCB_TABLE_NAME,
         { chem_comp_bond: mmCIF_chemCompBond_schema },
         { chem_comp_bond: bondTable }
     );
 
-    return bondDatabase;
+    return { bonds: bondDatabase, atoms: atomsRequested ? createAtoms(ccd) : void 0 };
 }
 
-async function run(out: string, binary = false) {
-    const bonds = await createBonds();
+function createAtoms(ccd: DatabaseCollection<CCD_Schema>) {
+    const comp_id: string[] = [];
+    const atom_id: string[] = [];
+    const charge: number[] = [];
+    const pdbx_stereo_config: typeof CCD_Schema.chem_comp_atom['pdbx_stereo_config']['T'][] = [];
+
+    function addAtoms(compId: string, cca: CCA) {
+        for (let i = 0, il = cca._rowCount; i < il; ++i) {
+            atom_id.push(cca.atom_id.value(i));
+            comp_id.push(compId);
+            charge.push(cca.charge.value(i));
+            pdbx_stereo_config.push(cca.pdbx_stereo_config.value(i));
+        }
+    }
+
+    // add atoms from CCD
+    for (const k in ccd) {
+        const { chem_comp, chem_comp_atom } = ccd[k];
+        if (chem_comp_atom._rowCount) {
+            addAtoms(chem_comp.id.value(0), chem_comp_atom);
+        }
+    }
+
+    const atomTable = Table.ofArrays(ccd_chemCompAtom_schema, {
+        comp_id, atom_id, charge, pdbx_stereo_config
+    });
 
-    const cif = getEncodedCif(TABLE_NAME, bonds, binary);
+    return Database.ofTables(
+        CCA_TABLE_NAME,
+        { chem_comp_atom: ccd_chemCompAtom_schema },
+        { chem_comp_atom: atomTable }
+    );
+}
+
+async function run(out: string, binary = false, ccaOut?: string) {
+    const { bonds, atoms } = await createBonds(!!ccaOut);
+
+    const ccbCif = getEncodedCif(CCB_TABLE_NAME, bonds, binary);
     if (!fs.existsSync(path.dirname(out))) {
         fs.mkdirSync(path.dirname(out));
     }
-    writeFile(out, cif);
+    writeFile(out, ccbCif);
+
+    if (!!ccaOut) {
+        const ccaCif = getEncodedCif(CCA_TABLE_NAME, atoms, binary);
+        if (!fs.existsSync(path.dirname(ccaOut))) {
+            fs.mkdirSync(path.dirname(ccaOut));
+        }
+        writeFile(ccaOut, ccaCif);
+    }
 }
 
-const TABLE_NAME = 'CHEM_COMP_BONDS';
+const CCB_TABLE_NAME = 'CHEM_COMP_BONDS';
+const CCA_TABLE_NAME = 'CHEM_COMP_ATOMS';
 
 const DATA_DIR = path.join(__dirname, '..', '..', '..', '..', 'build/data');
 const CCD_PATH = path.join(DATA_DIR, 'components.cif');
@@ -241,13 +285,18 @@ parser.addArgument([ '--binary', '-b' ], {
     action: 'storeTrue',
     help: 'Output as BinaryCIF.'
 });
+parser.addArgument(['--ccaOut', '-a'], {
+    help: 'Optional generated file output path for chem_comp_atom data.',
+    required: false
+});
 interface Args {
     out: string
     forceDownload?: boolean
-    binary?: boolean
+    binary?: boolean,
+    ccaOut?: string
 }
 const args: Args = parser.parseArgs();
 
 const FORCE_DOWNLOAD = args.forceDownload;
 
-run(args.out, args.binary);
+run(args.out, args.binary, args.ccaOut);

+ 16 - 0
src/mol-io/reader/cif/schema/ccd-extras.ts

@@ -0,0 +1,16 @@
+/**
+ * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { CCD_Schema } from './ccd';
+
+// a reduced chem_comp_atom schema that provides charge and stereo_config information
+export const ccd_chemCompAtom_schema = {
+    comp_id: CCD_Schema.chem_comp_atom.comp_id,
+    atom_id: CCD_Schema.chem_comp_atom.atom_id,
+    charge: CCD_Schema.chem_comp_atom.charge,
+    pdbx_stereo_config: CCD_Schema.chem_comp_atom.pdbx_stereo_config
+};

+ 26 - 21
src/mol-io/writer/ligand-encoder.ts

@@ -7,14 +7,15 @@
 import { StringBuilder } from '../../mol-util';
 import Writer from './writer';
 import { Encoder, Category, Field } from './cif/encoder';
-import { ComponentBond } from '../../mol-model-formats/structure/property/bonds/comp';
+import { ComponentAtom } from '../../mol-model-formats/structure/property/atoms/chem_comp';
+import { ComponentBond } from '../../mol-model-formats/structure/property/bonds/chem_comp';
 
 interface Atom {
-    label_atom_id: string,
     Cartn_x: number,
     Cartn_y: number,
     Cartn_z: number,
-    type_symbol: string
+    type_symbol: string,
+    index: number
 }
 
 function Atom(partial: any): Atom {
@@ -24,7 +25,8 @@ function Atom(partial: any): Atom {
 export abstract class LigandEncoder implements Encoder<string> {
     protected builder: StringBuilder;
     protected meta: StringBuilder;
-    protected componentData: ComponentBond;
+    protected componentAtomData: ComponentAtom;
+    protected componentBondData: ComponentBond;
     protected error = false;
     protected encoded = false;
     readonly isBinary = false;
@@ -61,8 +63,12 @@ export abstract class LigandEncoder implements Encoder<string> {
         this._writeCategory(category, context);
     }
 
-    setComponentBondData(componentData: ComponentBond) {
-        this.componentData = componentData;
+    setComponentAtomData(componentAtomData: ComponentAtom) {
+        this.componentAtomData = componentAtomData;
+    }
+
+    setComponentBondData(componentBondData: ComponentBond) {
+        this.componentBondData = componentBondData;
     }
 
     writeTo(stream: Writer) {
@@ -80,14 +86,15 @@ export abstract class LigandEncoder implements Encoder<string> {
         return StringBuilder.getString(this.builder);
     }
 
-    protected getAtoms<Ctx>(instance: Category.Instance<Ctx>, source: any): Atom[] {
-        const sortedFields = this.getSortedFields(instance, ['Cartn_x', 'Cartn_y', 'Cartn_z', 'type_symbol']);
+    protected getAtoms<Ctx>(instance: Category.Instance<Ctx>, source: any): Map<string, Atom> {
+        const sortedFields = this.getSortedFields(instance, ['Cartn_x', 'Cartn_y', 'Cartn_z']);
         const label_atom_id = this.getField(instance, 'label_atom_id');
-        return this._getAtoms(source, sortedFields, label_atom_id);
+        const type_symbol = this.getField(instance, 'type_symbol');
+        return this._getAtoms(source, sortedFields, label_atom_id, type_symbol);
     }
 
-    private _getAtoms(source: any, fields: Field<any, any>[], label_atom_id: Field<any, any>): Atom[] {
-        const atoms: Atom[] = [];
+    private _getAtoms(source: any, fields: Field<any, any>[], label_atom_id: Field<any, any>, type_symbol: Field<any, any>): Map<string, Atom> {
+        const atoms = new Map<string, Atom>();
         let index = 0;
 
         // is outer loop even needed?
@@ -102,19 +109,21 @@ export abstract class LigandEncoder implements Encoder<string> {
                 const key = it.move();
 
                 const lai = label_atom_id.value(key, data, index) as string;
-                const label = this.getLabel(lai);
-                if (this.skipHydrogen(label)) {
+                const ts = type_symbol.value(key, data, index) as string;
+                if (this.skipHydrogen(ts)) {
                     index++;
                     continue;
                 }
-                const a: { [k: string]: (string | number) } = { 'label_atom_id': lai };
+                const a: { [k: string]: (string | number) } = {};
 
                 for (let _f = 0, _fl = fields.length; _f < _fl; _f++) {
                     const f: Field<any, any> = fields[_f]!;
                     a[f.name] = f.value(key, data, index);
                 }
+                a[type_symbol.name] = ts;
+                a['index'] = index;
 
-                atoms.push(Atom(a));
+                atoms.set(lai, Atom(a));
                 index++;
             }
         }
@@ -122,15 +131,11 @@ export abstract class LigandEncoder implements Encoder<string> {
         return atoms;
     }
 
-    protected skipHydrogen(label: string) {
+    protected skipHydrogen(type_symbol: string) {
         if (this.hydrogens) {
             return false;
         }
-        return label.startsWith('H');
-    }
-
-    protected getLabel(s: string) {
-        return s.replace(/[^A-Z]+/g, '');
+        return type_symbol === 'H';
     }
 
     private getSortedFields<Ctx>(instance: Category.Instance<Ctx>, names: string[]) {

+ 38 - 18
src/mol-io/writer/mol/encoder.ts

@@ -11,7 +11,6 @@ import { LigandEncoder } from '../ligand-encoder';
 
 // specification: http://c4.cabrillo.edu/404/ctfile.pdf
 // SDF wraps MOL and allows for multiple molecules per file as well as additional properties
-// TODO add support for stereo/chiral flags, add charges
 export class MolEncoder extends LigandEncoder {
     _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx) {
         // use separate builder because we still need to write Counts and Bonds line
@@ -25,25 +24,33 @@ export class MolEncoder extends LigandEncoder {
         // 3rd lines must be present and can contain comments
         StringBuilder.writeSafe(this.builder, `${name}\n  ${this.encoder}\n\n`);
 
-        const bondMap = this.componentData.entries.get(name)!;
+        const atomMap = this.componentAtomData.entries.get(name)!;
+        const bondMap = this.componentBondData.entries.get(name)!;
         let bondCount = 0;
+        let chiral = false;
 
         // traverse once to determine all actually present atoms
         const atoms = this.getAtoms(instance, source);
-        for (let i1 = 0, il = atoms.length; i1 < il; i1++) {
-            const atom = atoms[i1];
-            StringBuilder.writePadLeft(ctab, atom.Cartn_x.toFixed(4), 10);
-            StringBuilder.writePadLeft(ctab, atom.Cartn_y.toFixed(4), 10);
-            StringBuilder.writePadLeft(ctab, atom.Cartn_z.toFixed(4), 10);
+        atoms.forEach((atom1, label_atom_id1) => {
+            const { index: i1 } = atom1;
+            const { charge, stereo_config } = atomMap.map.get(label_atom_id1)!;
+            StringBuilder.writePadLeft(ctab, atom1.Cartn_x.toFixed(4), 10);
+            StringBuilder.writePadLeft(ctab, atom1.Cartn_y.toFixed(4), 10);
+            StringBuilder.writePadLeft(ctab, atom1.Cartn_z.toFixed(4), 10);
             StringBuilder.whitespace1(ctab);
-            StringBuilder.writePadRight(ctab, atom.type_symbol, 2);
-            StringBuilder.writeSafe(ctab, '  0  0  0  0  0  0  0  0  0  0  0  0\n');
-
-            bondMap.map.get(atom.label_atom_id)!.forEach((v, k) => {
-                const i2 = atoms.findIndex(e => e.label_atom_id === k);
-                const label2 = this.getLabel(k);
-                if (i1 < i2 && atoms.findIndex(e => e.label_atom_id === k) > -1 && !this.skipHydrogen(label2)) {
-                    const { order } = v;
+            StringBuilder.writePadRight(ctab, atom1.type_symbol, 2);
+            StringBuilder.writeSafe(ctab, '  0');
+            StringBuilder.writeIntegerPadLeft(ctab, this.mapCharge(charge), 3);
+            StringBuilder.writeSafe(ctab, '  0  0  0  0  0  0  0  0  0  0\n');
+            if (stereo_config !== 'N') chiral = true;
+
+            bondMap.map.get(label_atom_id1)!.forEach((bond, label_atom_id2) => {
+                const atom2 = atoms.get(label_atom_id2);
+                if (!atom2) return;
+
+                const { index: i2, type_symbol: type_symbol2 } = atom2;
+                if (i1 < i2 && !this.skipHydrogen(type_symbol2)) {
+                    const { order } = bond;
                     StringBuilder.writeIntegerPadLeft(bonds, i1 + 1, 3);
                     StringBuilder.writeIntegerPadLeft(bonds, i2 + 1, 3);
                     StringBuilder.writeIntegerPadLeft(bonds, order, 3);
@@ -51,12 +58,12 @@ export class MolEncoder extends LigandEncoder {
                     bondCount++;
                 }
             });
-        }
+        });
 
         // write counts line
-        StringBuilder.writeIntegerPadLeft(this.builder, atoms.length, 3);
+        StringBuilder.writeIntegerPadLeft(this.builder, atoms.size, 3);
         StringBuilder.writeIntegerPadLeft(this.builder, bondCount, 3);
-        StringBuilder.writeSafe(this.builder, '  0  0  0  0  0  0  0  0  0\n');
+        StringBuilder.writeSafe(this.builder, `  0  0  ${chiral ? 1 : 0}  0  0  0  0  0  0\n`);
 
         StringBuilder.writeSafe(this.builder, StringBuilder.getString(ctab));
         StringBuilder.writeSafe(this.builder, StringBuilder.getString(bonds));
@@ -64,6 +71,19 @@ export class MolEncoder extends LigandEncoder {
         StringBuilder.writeSafe(this.builder, 'M  END\n');
     }
 
+    private mapCharge(raw: number): number {
+        // 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1, 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
+        switch (raw) {
+            case 3: return 1;
+            case 2: return 2;
+            case 1: return 3;
+            case -1: return 5;
+            case -2: return 6;
+            case -3: return 7;
+            default: return 0;
+        }
+    }
+
     protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
         const { instance, source } = getCategoryInstanceData(category, context);
         const fields = instance.fields;

+ 245 - 19
src/mol-io/writer/mol2/encoder.ts

@@ -9,10 +9,14 @@ import { LigandEncoder } from '../ligand-encoder';
 import { StringBuilder } from '../../../mol-util';
 import { getCategoryInstanceData } from '../cif/encoder/util';
 import { BondType } from '../../../mol-model/structure/model/types';
+import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/chem_comp';
+
+// type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
+// type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
+const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
+type BondMap = Map<string, { order: number, flags: number }>;
 
 // specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
-// TODO amide (and real sp/sp2/sp3) support for bonds and SYBYL atom types: see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
-// TODO support charges
 export class Mol2Encoder extends LigandEncoder {
     private out: StringBuilder;
 
@@ -25,36 +29,258 @@ export class Mol2Encoder extends LigandEncoder {
         const name = this.getName(instance, source);
         StringBuilder.writeSafe(this.builder, `# Name: ${name}\n# Created by ${this.encoder}\n\n`);
 
-        const bondMap = this.componentData.entries.get(name)!;
+        const bondMap = this.componentBondData.entries.get(name)!;
         let bondCount = 0;
 
         const atoms = this.getAtoms(instance, source);
         StringBuilder.writeSafe(a, '@<TRIPOS>ATOM\n');
         StringBuilder.writeSafe(b, '@<TRIPOS>BOND\n');
-        for (let i1 = 0, il = atoms.length; i1 < il; i1++) {
-            const atom = atoms[i1];
-
-            let aromatic = false;
-            bondMap.map.get(atom.label_atom_id)!.forEach((v, k) => {
-                const i2 = atoms.findIndex(e => e.label_atom_id === k);
-                const label2 = this.getLabel(k);
-                if (i1 < i2 && atoms.findIndex(e => e.label_atom_id === k) > -1 && !this.skipHydrogen(label2)) {
-                    const { order, flags } = v;
-                    const ar = flags === BondType.Flag.Aromatic;
-                    if (ar) aromatic = true;
+        atoms.forEach((atom1, label_atom_id1) => {
+            const { index: i1 } = atom1;
+            bondMap.map.get(label_atom_id1)!.forEach((bond, label_atom_id2) => {
+                const atom2 = atoms.get(label_atom_id2);
+                if (!atom2) return;
+
+                const { index: i2, type_symbol: type_symbol2 } = atom2;
+                if (i1 < i2 && !this.skipHydrogen(type_symbol2)) {
+                    const { order, flags } = bond;
+                    const ar = BondType.is(BondType.Flag.Aromatic, flags);
                     StringBuilder.writeSafe(b, `${++bondCount} ${i1 + 1} ${i2 + 1} ${ar ? 'ar' : order}`);
                     StringBuilder.newline(b);
                 }
             });
 
-            const sub = aromatic ? '.ar' : '';
-            StringBuilder.writeSafe(a, `${i1 + 1} ${atom.type_symbol} ${atom.Cartn_x.toFixed(3)} ${atom.Cartn_y.toFixed(3)} ${atom.Cartn_z.toFixed(3)} ${atom.type_symbol}${sub} 1 ${name} 0.000\n`);
-        }
+            const sybyl = this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap);
+            StringBuilder.writeSafe(a, `${i1 + 1} ${label_atom_id1} ${atom1.Cartn_x.toFixed(3)} ${atom1.Cartn_y.toFixed(3)} ${atom1.Cartn_z.toFixed(3)} ${sybyl} 1 ${name} 0.000\n`);
+        });
 
-        StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.length} ${bondCount} 0 0 0\nSMALL\nNO_CHARGES\n\n`);
+        // could write something like 'SMALL\nNO_CHARGES', for now let's write **** indicating non-optional, yet missing, string values
+        StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.size} ${bondCount} 1\n****\n****\n\n`);
         StringBuilder.writeSafe(this.out, StringBuilder.getString(a));
         StringBuilder.writeSafe(this.out, StringBuilder.getString(b));
-        StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n${name} ${name} 1\n`);
+        StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n1 ${name} 1\n`);
+    }
+
+    private count<K, V, C>(map: Map<K, V>, ctx: C, predicate: (k: K, v: V, ctx: C) => boolean): number {
+        let count = 0;
+        const iter = map.entries();
+        let result = iter.next();
+        while (!result.done) {
+            if (predicate(result.value[0], result.value[1], ctx)) {
+                count++;
+            }
+            result = iter.next();
+        }
+        return count;
+    }
+
+    private orderSum(map: BondMap): number {
+        let sum = 0;
+        const iter = map.values();
+        let result = iter.next();
+        while (!result.done) {
+            sum += result.value.order;
+            result = iter.next();
+        }
+        return sum;
+    }
+
+    private isNonMetalBond(label_atom_id: string): boolean {
+        for (const a of NON_METAL_ATOMS) {
+            if (label_atom_id.startsWith(a)) return true;
+        }
+        return false;
+    }
+
+    private extractNonmets(map: BondMap): BondMap {
+        const ret = new Map<string, { order: number, flags: number }>();
+        const iter = map.entries();
+        let result = iter.next();
+        while (!result.done) {
+            const [k, v] = result.value;
+            if (this.isNonMetalBond(k)) {
+                ret.set(k, v);
+            }
+            result = iter.next();
+        }
+        return ret;
+    }
+
+    // see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
+    // cannot account for covalently bound amino acids etc
+    private mapToSybyl(label_atom_id1: string, type_symbol1: string, bondMap: ComponentBond.Entry) {
+        // TODO if altLoc: 'Du' // 1.1
+        // TODO if end of polymeric bond: 'Du' // 1.2
+        if (type_symbol1 === 'D') return 'H'; // 1.3
+        if (type_symbol1 === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
+        if (type_symbol1 === 'Co' || type_symbol1 === 'Ru') return type_symbol1 + '.oh'; // 1.5
+
+        const bonds = bondMap.map.get(label_atom_id1)!;
+        const numBonds = bonds.size;
+
+        if (type_symbol1 === 'Ti' || type_symbol1 === 'Cr') { // 1.10
+            return type_symbol1 + (numBonds <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
+        }
+        if (type_symbol1 === 'C') { // 1.6
+            if (numBonds >= 4 && this.count(bonds, this, (_k, v) => v.order === 1) >= 4) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
+            if (numBonds === 3 && this.isCat(bonds, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
+            if (numBonds >= 2 && this.count(bonds, this, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
+            if ((numBonds === 1 || numBonds === 2) && this.count(bonds, this, (_k, v) => v.order === 3)) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
+            return 'C.2'; // 1.6.5
+        }
+
+        // most of the time, bonds will equal non-metal bonds
+        const nonmets = this.count(bonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === bonds.size ? bonds : this.extractNonmets(bonds);
+        const numNonmets = nonmets.size;
+
+        if (type_symbol1 === 'O') { // 1.7
+            if (numNonmets === 1) { // 1.7.1
+                if (this.isOC(nonmets, bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
+                if (this.isOP(nonmets, bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
+            }
+            if (numNonmets >= 2 && this.count(bonds, this, (_k, v) => v.order === 1) === bonds.size) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
+            return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
+        }
+        if (type_symbol1 === 'N') { // 1.8
+            if (numNonmets === 4 && this.count(nonmets, this, (_k, v) => v.order === 1) === 4) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
+            if (numBonds >= 2 && this.count(bonds, this, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
+            if (numNonmets === 1 && this.count(nonmets, this, (_k, v) => v.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
+            if (numNonmets === 2 && this.orderSum(nonmets) === 4) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
+            if (numNonmets === 3 && this.hasCOCS(nonmets, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
+            if (numNonmets === 3) { // 1.8.6
+                if (this.count(nonmets, this, (_k, v) => v.order > 1) === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
+                if (this.count(nonmets, this, (_k, v) => v.order === 1) === 3) {
+                    if (this.isNpl3(nonmets, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
+                }
+                return 'N.3';
+            }
+            return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
+        }
+        if (type_symbol1 === 'S') { // 1.9
+            if (numNonmets === 3 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
+            if (numNonmets === 4 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
+            if (numNonmets >= 2 && this.count(bonds, this, (_k, v) => v.order === 1) >= 2) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8), 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
+            return 'S.2'; // 1.9.4
+        }
+        return type_symbol1; // 1.11
+    }
+
+    // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
+    // delocalised .AND. one other single bond is to H then atom_type is N.pl3
+    // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
+    // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
+    // TODO cannot check accurately for delocalized bonds
+    private isNpl3(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            const adjacentBonds = bondMap.map.get(label_atom_id)!;
+            if (this.count(adjacentBonds, this, (_k, v) => v.order > 1 || BondType.is(BondType.Flag.Aromatic, v.flags))) {
+                // TODO check accurately for 2nd criterion with coordinates
+                return true;
+            }
+            result = iter.next();
+        }
+        return false;
+    }
+
+    // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
+    // forming only 1 non-metal bond then atom_type is O.co2
+    private isOC(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
+        if (!nonmet[0].startsWith('C')) return false;
+        const carbonBonds = bondMap.map.get(nonmet[0])!;
+        if (carbonBonds.size !== 3) return false;
+
+        let count = 0;
+        const iter = carbonBonds.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === 1) count++;
+            }
+            result = iter.next();
+        }
+        return count === 2;
+    }
+
+    // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
+    // only 1 non-metal bond then atom_type is O.co2
+    private isOP(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
+        if (!nonmet[0].startsWith('P')) return false;
+        const phosphorusBonds = bondMap.map.get(nonmet[0])!;
+        if (phosphorusBonds.size < 2) return false;
+
+        let count = 0;
+        const iter = phosphorusBonds.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k)) === 1) count++;
+            }
+            result = iter.next();
+        }
+        return count >= 2;
+    }
+
+    // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
+    // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
+    private isCat(currentBondMap: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter1 = currentBondMap.keys();
+        let result1 = iter1.next();
+        while (!result1.done) {
+            const label_atom_id = result1.value;
+            if (!label_atom_id.startsWith('N')) return false;
+
+            const adjacentBonds = bondMap.map.get(label_atom_id)!;
+            if (adjacentBonds.size < 2) return false;
+
+            const iter2 = adjacentBonds.keys();
+            let result2 = iter2.next();
+            while (!result2.done) {
+                if (result2.value.startsWith('O')) return false;
+                result2 = iter2.next();
+            }
+            result1 = iter1.next();
+        }
+        // TODO ensure no cycles
+        return true;
+    }
+
+    private countOfOxygenWithSingleNonmet(nonmets: BondMap, bondMap: ComponentBond.Entry): number {
+        let count = 0;
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, this, (k, _v, ctx) => ctx.isNonMetalBond(k))) count++;
+            }
+            result = iter.next();
+        }
+        return count;
+    }
+
+    // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
+    private hasCOCS(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('C')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, this, (k, v) => k.startsWith('O') || k.startsWith('S') && v.order === 2)) return true;
+            }
+            result = iter.next();
+        }
+        return false;
     }
 
     protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {

+ 1 - 1
src/mol-model-formats/structure/mmcif.ts

@@ -15,7 +15,7 @@ import { ModelSymmetry } from './property/symmetry';
 import { ModelSecondaryStructure } from './property/secondary-structure';
 import { Table } from '../../mol-data/db';
 import { AtomSiteAnisotrop } from './property/anisotropic';
-import { ComponentBond } from './property/bonds/comp';
+import { ComponentBond } from './property/bonds/chem_comp';
 import { StructConn } from './property/bonds/struct_conn';
 
 function modelSymmetryFromMmcif(model: Model) {

+ 92 - 0
src/mol-model-formats/structure/property/atoms/chem_comp.ts

@@ -0,0 +1,92 @@
+/**
+ * Copyright (c) 2020 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ */
+
+import { Model } from '../../../../mol-model/structure/model/model';
+import { CustomPropertyDescriptor } from '../../../../mol-model/custom-property';
+import { CifWriter } from '../../../../mol-io/writer/cif';
+import { Table } from '../../../../mol-data/db';
+import { FormatPropertyProvider } from '../../common/property';
+import { CCD_Schema } from '../../../../mol-io/reader/cif/schema/ccd';
+
+export interface ComponentAtom {
+    readonly data: Table<CCD_Schema['chem_comp_atom']>
+    readonly entries: ReadonlyMap<string, ComponentAtom.Entry>
+}
+
+export namespace ComponentAtom {
+    export const Descriptor: CustomPropertyDescriptor = {
+        name: 'chem_comp_atom',
+        cifExport: {
+            prefix: '',
+            categories: [{
+                name: 'chem_comp_atom',
+                instance(ctx) {
+                    const p = Provider.get(ctx.firstModel);
+                    if (!p) return CifWriter.Category.Empty;
+                    const chem_comp_atom = p.data;
+                    if (!chem_comp_atom) return CifWriter.Category.Empty;
+
+                    const comp_names = ctx.structures[0].uniqueResidueNames;
+                    const { comp_id, _rowCount } = chem_comp_atom;
+                    const indices: number[] = [];
+                    for (let i = 0; i < _rowCount; i++) {
+                        if (comp_names.has(comp_id.value(i))) indices[indices.length] = i;
+                    }
+
+                    return CifWriter.Category.ofTable(chem_comp_atom, indices);
+                }
+            }]
+        }
+    };
+
+    export const Provider = FormatPropertyProvider.create<ComponentAtom>(Descriptor);
+
+    export function chemCompAtomFromTable(model: Model, table: Table<CCD_Schema['chem_comp_atom']>): Table<CCD_Schema['chem_comp_atom']> {
+        return Table.pick(table, CCD_Schema.chem_comp_atom, (i: number) => {
+            return model.properties.chemicalComponentMap.has(table.comp_id.value(i));
+        });
+    }
+
+    export function getEntriesFromChemCompAtom(data: Table<CCD_Schema['chem_comp_atom']>) {
+        const entries: Map<string, Entry> = new Map();
+
+        function addEntry(id: string) {
+            let e = new Entry(id);
+            entries.set(id, e);
+            return e;
+        }
+
+        const { comp_id, atom_id, charge, pdbx_stereo_config, _rowCount } = data;
+
+        let entry = addEntry(comp_id.value(0)!);
+        for (let i = 0; i < _rowCount; i++) {
+            const name = atom_id.value(i)!;
+            const id = comp_id.value(i)!;
+            const ch = charge.value(i)!;
+            const stereo = pdbx_stereo_config.value(i)!;
+
+            if (entry.id !== id) {
+                entry = addEntry(id);
+            }
+
+            entry.add(name, ch, stereo);
+        }
+
+        return entries;
+    }
+
+    export class Entry {
+        readonly map: Map<string, { charge: number, stereo_config: CCD_Schema['chem_comp_atom']['pdbx_stereo_config']['T'] }> = new Map();
+
+        add(a: string, charge: number, stereo_config: CCD_Schema['chem_comp_atom']['pdbx_stereo_config']['T']) {
+            this.map.set(a, { charge, stereo_config });
+        }
+
+        constructor(public readonly id: string) { }
+    }
+}

+ 5 - 1
src/mol-model-formats/structure/property/bonds/comp.ts → src/mol-model-formats/structure/property/bonds/chem_comp.ts

@@ -56,7 +56,11 @@ export namespace ComponentBond {
         const entries: Map<string, Entry> = new Map();
 
         function addEntry(id: string) {
-            let e = new Entry(id);
+            // weird behavior when 'PRO' is requested - will report a single bond between N and H because a later operation would override real content
+            if (entries.has(id)) {
+                return entries.get(id)!;
+            }
+            const e = new Entry(id);
             entries.set(id, e);
             return e;
         }

+ 1 - 1
src/mol-model/structure/structure/unit/bonds/intra-compute.ts

@@ -14,7 +14,7 @@ import { SortedArray } from '../../../../../mol-data/int';
 import { getIntraBondOrderFromTable } from '../../../model/properties/atomic/bonds';
 import StructureElement from '../../element';
 import { IndexPairBonds } from '../../../../../mol-model-formats/structure/property/bonds/index-pair';
-import { ComponentBond } from '../../../../../mol-model-formats/structure/property/bonds/comp';
+import { ComponentBond } from '../../../../../mol-model-formats/structure/property/bonds/chem_comp';
 import { StructConn } from '../../../../../mol-model-formats/structure/property/bonds/struct_conn';
 
 function getGraph(atomA: StructureElement.UnitIndex[], atomB: StructureElement.UnitIndex[], _order: number[], _flags: number[], atomCount: number): IntraUnitBonds {

+ 2 - 1
src/servers/model/config.ts

@@ -82,7 +82,8 @@ const DefaultModelServerConfig = {
             //     }
             // },
             // wwPDB: {
-            //     chemCompBondTablePath: ''
+            //     chemCompBondTablePath: '',
+            //     chemCompAtomTablePath: ''
             // }
         }
     },

+ 29 - 3
src/servers/model/properties/providers/wwpdb.ts

@@ -10,13 +10,15 @@ import { AttachModelProperty } from '../../property-provider';
 import { CIF } from '../../../../mol-io/reader/cif';
 import { getParam } from '../../../common/util';
 import { mmCIF_Database, mmCIF_Schema } from '../../../../mol-io/reader/cif/schema/mmcif';
-import { ComponentBond } from '../../../../mol-model-formats/structure/property/bonds/comp';
+import { ComponentBond } from '../../../../mol-model-formats/structure/property/bonds/chem_comp';
+import { ComponentAtom } from '../../../../mol-model-formats/structure/property/atoms/chem_comp';
+import { CCD_Database, CCD_Schema } from '../../../../mol-io/reader/cif/schema/ccd';
 
 require('util.promisify').shim();
 const readFile = util.promisify(fs.readFile);
 
 export const wwPDB_chemCompBond: AttachModelProperty = async ({ model, params }) => {
-    const table = await getChemCompBondTable(getTablePath(params));
+    const table = await getChemCompBondTable(getBondTablePath(params));
     const data = ComponentBond.chemCompBondFromTable(model, table);
     const entries = ComponentBond.getEntriesFromChemCompBond(data);
     return ComponentBond.Provider.set(model, { entries, data });
@@ -37,8 +39,32 @@ async function getChemCompBondTable(path: string): Promise<mmCIF_Database['chem_
     return chemCompBondTable;
 }
 
-function getTablePath(params: any) {
+function getBondTablePath(params: any) {
     const path = getParam<string>(params, 'wwPDB', 'chemCompBondTablePath');
     if (!path) throw new Error(`wwPDB 'chemCompBondTablePath' not set!`);
     return path;
+}
+
+export const wwPDB_chemCompAtom: AttachModelProperty = async ({ model, params }) => {
+    const table = await getChemCompAtomTable(getAtomTablePath(params));
+    const data = ComponentAtom.chemCompAtomFromTable(model, table);
+    const entries = ComponentAtom.getEntriesFromChemCompAtom(data);
+    return ComponentAtom.Provider.set(model, { entries, data });
+};
+
+let chemCompAtomTable: CCD_Database['chem_comp_atom'];
+async function getChemCompAtomTable(path: string): Promise<CCD_Database['chem_comp_atom']> {
+    if (!chemCompAtomTable) {
+        const parsed = await CIF.parse(await read(path)).run();
+        if (parsed.isError) throw new Error(parsed.toString());
+        const table = CIF.toDatabase(CCD_Schema, parsed.result.blocks[0]);
+        chemCompAtomTable = table.chem_comp_atom;
+    }
+    return chemCompAtomTable;
+}
+
+function getAtomTablePath(params: any) {
+    const path = getParam<string>(params, 'wwPDB', 'chemCompAtomTablePath');
+    if (!path) throw new Error(`wwPDB 'chemCompAtomTablePath' not set!`);
+    return path;
 }

+ 3 - 2
src/servers/model/properties/wwpdb.ts

@@ -5,12 +5,13 @@
  */
 
 import { AttachModelProperties } from '../property-provider';
-import { wwPDB_chemCompBond } from './providers/wwpdb';
+import { wwPDB_chemCompBond, wwPDB_chemCompAtom } from './providers/wwpdb';
 
 export const attachModelProperties: AttachModelProperties = (args) => {
     // return a list of promises that start attaching the props in parallel
     // (if there are downloads etc.)
     return [
-        wwPDB_chemCompBond(args)
+        wwPDB_chemCompBond(args),
+        wwPDB_chemCompAtom(args)
     ];
 };

+ 12 - 2
src/servers/model/server/query.ts

@@ -22,12 +22,13 @@ import CifField = CifWriter.Field
 import { splitCamelCase } from '../../../mol-util/string';
 import { Encoder } from '../../../mol-io/writer/cif/encoder';
 import { Encoding } from './api';
-import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/comp';
+import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/chem_comp';
 import { SdfWriter } from '../../../mol-io/writer/sdf';
 import { MolWriter } from '../../../mol-io/writer/mol';
 import { Mol2Writer } from '../../../mol-io/writer/mol2';
 import { MolEncoder } from '../../../mol-io/writer/mol/encoder';
 import { Mol2Encoder } from '../../../mol-io/writer/mol2/encoder';
+import { ComponentAtom } from '../../../mol-model-formats/structure/property/atoms/chem_comp';
 
 export interface Stats {
     structure: StructureWrapper,
@@ -227,7 +228,16 @@ async function resolveJobEntry(entry: JobEntry, structure: StructureWrapper, enc
         encoder.writeCategory(_model_server_result, entry);
         encoder.writeCategory(_model_server_params, entry);
 
-        if (encoder instanceof MolEncoder || encoder instanceof Mol2Encoder) encoder.setComponentBondData(ComponentBond.Provider.get(structure.models[0])!);
+        if (entry.queryDefinition.niceName === 'Ligand') {
+            if (encoder instanceof MolEncoder) {
+                encoder.setComponentAtomData(ComponentAtom.Provider.get(structure.models[0])!);
+            }
+            if (encoder instanceof MolEncoder || encoder instanceof Mol2Encoder) {
+                encoder.setComponentBondData(ComponentBond.Provider.get(structure.models[0])!);
+            }
+        }
+        // TODO propagate data for cif/bcif as well?
+
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter(entry.queryDefinition.filter);
         if (result.length > 0) encode_mmCIF_categories(encoder, result, { copyAllCategories: entry.copyAllCategories });
         if (!entry.copyAllCategories && entry.queryDefinition.filter) encoder.setFilter();