Browse Source

iterators

JonStargaryen 4 years ago
parent
commit
b8f168ebf5
1 changed files with 221 additions and 144 deletions
  1. 221 144
      src/mol-io/writer/mol2/encoder.ts

+ 221 - 144
src/mol-io/writer/mol2/encoder.ts

@@ -9,12 +9,12 @@ import { LigandEncoder } from '../ligand-encoder';
 import { StringBuilder } from '../../../mol-util';
 import { getCategoryInstanceData } from '../cif/encoder/util';
 import { BondType } from '../../../mol-model/structure/model/types';
-// import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/comp';
+import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/comp';
 
-// // type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
-// // type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
-// const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
-// type BondData = { label_atom_id: string, order: number, aromatic: boolean };
+// type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
+// type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
+const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
+type BondMap = Map<string, { order: number, flags: number }>;
 
 // specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
 export class Mol2Encoder extends LigandEncoder {
@@ -50,8 +50,7 @@ export class Mol2Encoder extends LigandEncoder {
                 }
             });
 
-            // const sybyl = this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap);
-            const sybyl = '?';
+            const sybyl = this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap);
             StringBuilder.writeSafe(a, `${i1 + 1} ${label_atom_id1} ${atom1.Cartn_x.toFixed(3)} ${atom1.Cartn_y.toFixed(3)} ${atom1.Cartn_z.toFixed(3)} ${sybyl} 1 ${name} 0.000\n`);
         });
 
@@ -62,146 +61,224 @@ export class Mol2Encoder extends LigandEncoder {
         StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n1 ${name} 1\n`);
     }
 
-    // private toArray(map: Map<string, { order: number, flags: number }>): BondData[] {
-    //     const array: BondData[] = [];
-    //     map.forEach((v, k) => array.push({ label_atom_id: k, order: v.order, aromatic: BondType.is(BondType.Flag.Aromatic, v.flags) }));
-    //     return array;
-    // }
+    private count<K, V>(map: Map<K, V>, predicate: (k: K, v: V) => boolean): number {
+        let count = 0;
+        const iter = map.entries();
+        let result = iter.next();
+        while (!result.done) {
+            if (predicate(result.value[0], result.value[1])) {
+                count++;
+            }
+            result = iter.next();
+        }
+        return count;
+    }
+
+    private orderSum(map: BondMap): number {
+        let sum = 0;
+        const iter = map.values();
+        let result = iter.next();
+        while (!result.done) {
+            sum += result.value.order;
+            result = iter.next();
+        }
+        return sum;
+    }
+
+    private isNonMetalBond(label_atom_id: string): boolean {
+        return NON_METAL_ATOMS.some(a => label_atom_id.startsWith(a));
+    }
+
+    private extractNonmets(map: BondMap): BondMap {
+        const ret = new Map<string, { order: number, flags: number }>();
+        const iter = map.entries();
+        let result = iter.next();
+        while (!result.done) {
+            const [k, v] = result.value;
+            if (NON_METAL_ATOMS.some(a => k.startsWith(a))) {
+                ret.set(k, v);
+            }
+            result = iter.next();
+        }
+        return ret;
+    }
 
     // see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
     // cannot account for covalently bound amino acids etc
-    // private mapToSybyl(label_atom_id: string, type_symbol: string, bondMap: ComponentBond.Entry) {
-    //     const partialBondMap = bondMap.map.get(label_atom_id)!;
-    //     const bond = this.toArray(partialBondMap);
-
-    //     const num_bond = bond.length;
-    //     const nonmet = bond.filter(b => this.isNonMetalBond(b));
-    //     const num_nonmet = nonmet.length;
-    //     const arom = bond.filter(e => e.aromatic);
-    //     const num_arom = arom.length;
-
-    //     // TODO if altLoc: 'Du' // 1.1
-    //     // TODO if end of polymeric bond: 'Du' // 1.2
-    //     if (type_symbol === 'D') return 'H'; // 1.3
-    //     if (type_symbol === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
-    //     if (type_symbol === 'Co' || type_symbol === 'Ru') return type_symbol + '.oh'; // 1.5
-    //     if (type_symbol === 'C') { // 1.6
-    //         if (num_bond >= 4 && bond.every(b => b.order === 1)) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
-    //         if (num_bond === 3 && this.isCat(label_atom_id, bond, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
-    //         if (num_bond >= 2 && num_arom >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
-    //         if ((num_bond === 1 || num_bond === 2) && bond.filter(b => b.order === 3).length === 1) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
-    //         return 'C.2'; // 1.6.5
-    //     }
-    //     if (type_symbol === 'O') { // 1.7
-    //         if (num_nonmet === 1) { // 1.7.1
-    //             if (this.isOC(nonmet[0], bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
-    //             if (this.isOP(nonmet[0], bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
-    //         }
-    //         if (num_nonmet >= 2 && bond.every(b => b.order === 1)) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
-    //         return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
-    //     }
-    //     if (type_symbol === 'N') { // 1.8
-    //         if (num_nonmet === 4 && bond.every(b => b.order === 1)) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
-    //         if (num_bond >= 2 && num_arom === 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
-    //         if (num_nonmet === 1 && nonmet.some(b => b.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
-    //         if (num_nonmet === 2 && (nonmet[0].order + nonmet[1].order === 4)) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
-    //         if (num_nonmet === 3 && this.hasCOCS(nonmet, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
-    //         if (num_nonmet === 3) { // 1.8.6
-    //             if (nonmet.filter(b => b.order > 1).length === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
-    //             if (nonmet.every(b => b.order === 1)) {
-    //                 if (this.isNpl3(nonmet, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
-    //             }
-    //             return 'N.3';
-    //         }
-    //         return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
-    //     }
-    //     if (type_symbol === 'S') { // 1.9
-    //         if (num_nonmet === 3 && this.countOfOxygenWithSingleNonmet(nonmet, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
-    //         if (num_nonmet === 4 && this.countOfOxygenWithSingleNonmet(nonmet, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
-    //         if (num_nonmet >= 2 && bond.every(b => b.order === 1)) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
-    //         return 'S.2'; // 1.9.4, 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
-    //     }
-    //     if (type_symbol === 'Ti' || type_symbol === 'Cr') { // 1.10
-    //         return type_symbol + (num_bond <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
-    //     }
-    //     return type_symbol; // 1.11
-    // }
-
-    // private isNonMetalBond(b: BondData): boolean {
-    //     // would be nice to have type_symbol here...
-    //     return NON_METAL_ATOMS.some(a => this.getLabel(b.label_atom_id).startsWith(a));
-    // }
-
-    // // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
-    // // delocalised .AND. one other single bond is to H then atom_type is N.pl3
-    // // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
-    // // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
-    // // TODO cannot check accurately for delocalized bonds
-    // // TODO cannot check accurately for 2nd criterion without coordinates
-    // private isNpl3(nonmet: BondData[], bondMap: ComponentBond.Entry): boolean {
-    //     for (let i = 0, il = nonmet.length; i < il; i++) {
-    //         const consumed = nonmet[i];
-    //         // determine index that fulfills 1st criterion
-    //         if (this.toArray(bondMap.map.get(consumed.label_atom_id)!).some(b => b.order > 1 || b.aromatic)) {
-    //             if (nonmet.filter(b => b !== consumed).filter(b => this.getLabel(b.label_atom_id).startsWith('H')).length === 1) return true; // 1.8.6.2.1
-    //             if (nonmet.filter(b => b !== consumed).every(b => !this.getLabel(b.label_atom_id).startsWith('H'))) return true; // 1.8.6.2.2
-    //         }
-    //     }
-    //     return false;
-    // }
-
-    // // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
-    // // forming only 1 non-metal bond then atom_type is O.co2
-    // private isOC(nonmet: BondData, bondMap: ComponentBond.Entry): boolean {
-    //     if (!this.getLabel(nonmet.label_atom_id).startsWith('C')) return false;
-    //     const carbonBonds = this.toArray(bondMap.map.get(nonmet.label_atom_id)!);
-    //     if (carbonBonds.length !== 3) return false;
-    //     return carbonBonds.filter(b => this.getLabel(b.label_atom_id).startsWith('O') &&
-    //         this.toArray(bondMap.map.get(b.label_atom_id)!).filter(ob => this.isNonMetalBond(ob)).length === 1).length === 2;
-    // }
-
-    // // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
-    // // only 1 non-metal bond then atom_type is O.co2
-    // private isOP(nonmet: BondData, bondMap: ComponentBond.Entry): boolean {
-    //     if (!this.getLabel(nonmet.label_atom_id).startsWith('P')) return false;
-    //     const phosphorusBonds = this.toArray(bondMap.map.get(nonmet.label_atom_id)!);
-    //     if (phosphorusBonds.length < 2) return false;
-    //     return phosphorusBonds.filter(b => this.getLabel(b.label_atom_id).startsWith('O') &&
-    //         this.toArray(bondMap.map.get(b.label_atom_id)!).filter(ob => this.isNonMetalBond(ob)).length === 1).length >= 2;
-    // }
-
-    // // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
-    // // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
-    // private isCat(root: string, bond: BondData[], bondMap: ComponentBond.Entry): boolean {
-    //     if (bond.some(b => !this.getLabel(b.label_atom_id).startsWith('N'))) return false;
-    //     const nitrogenBonds = bond.map(b => b.label_atom_id).map(label_atom_id => this.toArray(bondMap.map.get(label_atom_id)!));
-
-    //     // ensure no cycles
-    //     const all = [];
-    //     const unique = new Set();
-    //     nitrogenBonds.forEach(a => a.map(b => b.label_atom_id).filter(lai => lai !== root).forEach(lai => { all.push(lai); unique.add(lai); }));
-    //     if (all.length !== unique.size) return false;
-
-    //     return nitrogenBonds.every(a => a.length >= 2 && a.every(b => b.label_atom_id !== 'O'));
-    // }
-
-    // private countOfOxygenWithSingleNonmet(nonmet: BondData[], bondMap: ComponentBond.Entry): number {
-    //     return nonmet.map(b => b.label_atom_id)
-    //         .filter(label_atom_id => this.getLabel(label_atom_id).startsWith('O'))
-    //         .map(label_atom_id => this.toArray(bondMap.map.get(label_atom_id)!)
-    //             .filter(b => this.isNonMetalBond(b)).length === 1)
-    //         .length;
-    // }
-
-    // // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
-    // private hasCOCS(nonmet: BondData[], bondMap: ComponentBond.Entry): boolean {
-    //     return nonmet.map(b => b.label_atom_id)
-    //         .filter(label_atom_id => this.getLabel(label_atom_id).startsWith('C'))
-    //         .filter(label_atom_id => this.toArray(bondMap.map.get(label_atom_id)!)
-    //             .filter(b => b.order === 2)
-    //             .filter(b => this.getLabel(b.label_atom_id).startsWith('O') || this.getLabel(b.label_atom_id).startsWith('S')))
-    //         .length === 1;
-    // }
+    private mapToSybyl(label_atom_id1: string, type_symbol1: string, bondMap: ComponentBond.Entry) {
+        // TODO if altLoc: 'Du' // 1.1
+        // TODO if end of polymeric bond: 'Du' // 1.2
+        if (type_symbol1 === 'D') return 'H'; // 1.3
+        if (type_symbol1 === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
+        if (type_symbol1 === 'Co' || type_symbol1 === 'Ru') return type_symbol1 + '.oh'; // 1.5
+
+        const bonds = bondMap.map.get(label_atom_id1)!;
+        const numBonds = bonds.size;
+
+        if (type_symbol1 === 'Ti' || type_symbol1 === 'Cr') { // 1.10
+            return type_symbol1 + (numBonds <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
+        }
+        if (type_symbol1 === 'C') { // 1.6
+            if (numBonds >= 4 && this.count(bonds, (_k, v) => v.order === 1) >= 4) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
+            if (numBonds === 3 && this.isCat(bonds, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
+            if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
+            if ((numBonds === 1 || numBonds === 2) && this.count(bonds, (_k, v) => v.order === 3)) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
+            return 'C.2'; // 1.6.5
+        }
+
+        // most of the time, bonds will equal non-metal bonds
+        const nonmets = this.count(bonds, (k, _v) => this.isNonMetalBond(k)) === bonds.size ? bonds : this.extractNonmets(bonds);
+        const numNonmets = nonmets.size;
+
+        if (type_symbol1 === 'O') { // 1.7
+            if (numNonmets === 1) { // 1.7.1
+                if (this.isOC(nonmets, bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
+                if (this.isOP(nonmets, bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
+            }
+            if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) === bonds.size) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
+            return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
+        }
+        if (type_symbol1 === 'N') { // 1.8
+            if (numNonmets === 4 && this.count(nonmets, (_k, v) => v.order === 1) === 4) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
+            if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
+            if (numNonmets === 1 && this.count(nonmets, (_k, v) => v.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
+            if (numNonmets === 2 && this.orderSum(nonmets) === 4) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
+            if (numNonmets === 3 && this.hasCOCS(nonmets, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
+            if (numNonmets === 3) { // 1.8.6
+                if (this.count(nonmets, (_k, v) => v.order > 1) === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
+                if (this.count(nonmets, (_k, v) => v.order === 1) === 3) {
+                    if (this.isNpl3(nonmets, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
+                }
+                return 'N.3';
+            }
+            return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
+        }
+        if (type_symbol1 === 'S') { // 1.9
+            if (numNonmets === 3 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
+            if (numNonmets === 4 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
+            if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) >= 2) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8), 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
+            return 'S.2'; // 1.9.4
+        }
+        return type_symbol1; // 1.11
+    }
+
+    // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
+    // delocalised .AND. one other single bond is to H then atom_type is N.pl3
+    // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
+    // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
+    // TODO cannot check accurately for delocalized bonds
+    private isNpl3(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            const adjacentBonds = bondMap.map.get(label_atom_id)!;
+            if (this.count(adjacentBonds, (_k, v) => v.order > 1 || BondType.is(BondType.Flag.Aromatic, v.flags))) {
+                // TODO check accurately for 2nd criterion with coordinates
+                return true;
+            }
+            result = iter.next();
+        }
+        return false;
+    }
+
+    // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
+    // forming only 1 non-metal bond then atom_type is O.co2
+    private isOC(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
+        if (!nonmet[0].startsWith('C')) return false;
+        const carbonBonds = bondMap.map.get(nonmet[0])!;
+        if (carbonBonds.size !== 3) return false;
+
+        let count = 0;
+        const iter = carbonBonds.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
+            }
+            result = iter.next();
+        }
+        return count === 2;
+    }
+
+    // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
+    // only 1 non-metal bond then atom_type is O.co2
+    private isOP(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
+        if (!nonmet[0].startsWith('P')) return false;
+        const phosphorusBonds = bondMap.map.get(nonmet[0])!;
+        if (phosphorusBonds.size < 2) return false;
+
+        let count = 0;
+        const iter = phosphorusBonds.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
+            }
+            result = iter.next();
+        }
+        return count >= 2;
+    }
+
+    // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
+    // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
+    private isCat(currentBondMap: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter1 = currentBondMap.keys();
+        let result1 = iter1.next();
+        while (!result1.done) {
+            const label_atom_id = result1.value;
+            if (!label_atom_id.startsWith('N')) return false;
+
+            const adjacentBonds = bondMap.map.get(label_atom_id)!;
+            if (adjacentBonds.size < 2) return false;
+
+            const iter2 = adjacentBonds.keys();
+            let result2 = iter2.next();
+            while (!result2.done) {
+                if (result2.value.startsWith('O')) return false;
+                result2 = iter2.next();
+            }
+            result1 = iter1.next();
+        }
+        // TODO ensure no cycles
+        return true;
+    }
+
+    private countOfOxygenWithSingleNonmet(nonmets: BondMap, bondMap: ComponentBond.Entry): number {
+        let count = 0;
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('O')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k))) count++;
+            }
+            result = iter.next();
+        }
+        return count;
+    }
+
+    // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
+    private hasCOCS(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
+        const iter = nonmets.keys();
+        let result = iter.next();
+        while (!result.done) {
+            const label_atom_id = result.value;
+            if (label_atom_id.startsWith('C')) {
+                const adjacentBonds = bondMap.map.get(label_atom_id)!;
+                if (this.count(adjacentBonds, (k, v) => k.startsWith('O') || k.startsWith('S') && v.order === 2)) return true;
+            }
+            result = iter.next();
+        }
+        return false;
+    }
 
     protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
         const { instance, source } = getCategoryInstanceData(category, context);