123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- /**
- * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
- */
- import { Category } from '../cif/encoder';
- import { LigandEncoder } from '../ligand-encoder';
- import { StringBuilder } from '../../../mol-util';
- import { getCategoryInstanceData } from '../cif/encoder/util';
- import { BondType } from '../../../mol-model/structure/model/types';
- import { ComponentBond } from '../../../mol-model-formats/structure/property/bonds/chem_comp';
- // type MOL_TYPE = 'SMALL' | 'BIOPOLYMER' | 'PROTEIN' | 'NUCLEIC_ACID' | 'SACCHARIDE';
- // type CHARGE_TYPE = 'NO_CHARGES' | 'DEL_RE' | 'GASTEIGER' | 'GAST_HUCK' | 'HUCKEL' | 'PULLMAN' | 'GAUSS80_CHARGES' | 'AMPAC_CHARGES' | 'MULLIKEN_CHARGES' | 'DICT_ CHARGES' | 'MMFF94_CHARGES' | 'USER_CHARGES';
- const NON_METAL_ATOMS = 'H D B C N O F Si P S Cl As Se Br Te I At He Ne Ar Kr Xe Rn'.split(' ');
- type BondMap = Map<string, { order: number, flags: number }>;
- // specification: http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
- export class Mol2Encoder extends LigandEncoder {
- private out: StringBuilder;
- _writeCategory<Ctx>(category: Category<Ctx>, context?: Ctx): void {
- const a = StringBuilder.create();
- const b = StringBuilder.create();
- const { instance, source } = getCategoryInstanceData(category, context);
- // write header
- const name = this.getName(instance, source);
- StringBuilder.writeSafe(this.builder, `# Name: ${name}\n# Created by ${this.encoder}\n\n`);
- const bondMap = this.componentBondData.entries.get(name)!;
- let bondCount = 0;
- const atoms = this.getAtoms(instance, source);
- StringBuilder.writeSafe(a, '@<TRIPOS>ATOM\n');
- StringBuilder.writeSafe(b, '@<TRIPOS>BOND\n');
- atoms.forEach((atom1, label_atom_id1) => {
- const { index: i1 } = atom1;
- bondMap.map.get(label_atom_id1)!.forEach((bond, label_atom_id2) => {
- const atom2 = atoms.get(label_atom_id2);
- if (!atom2) return;
- const { index: i2, type_symbol: type_symbol2 } = atom2;
- if (i1 < i2 && !this.skipHydrogen(type_symbol2)) {
- const { order, flags } = bond;
- const ar = BondType.is(BondType.Flag.Aromatic, flags);
- StringBuilder.writeSafe(b, `${++bondCount} ${i1 + 1} ${i2 + 1} ${ar ? 'ar' : order}`);
- StringBuilder.newline(b);
- }
- });
- const sybyl = this.mapToSybyl(label_atom_id1, atom1.type_symbol, bondMap);
- StringBuilder.writeSafe(a, `${i1 + 1} ${label_atom_id1} ${atom1.Cartn_x.toFixed(3)} ${atom1.Cartn_y.toFixed(3)} ${atom1.Cartn_z.toFixed(3)} ${sybyl} 1 ${name} 0.000\n`);
- });
- // could write something like 'SMALL\nNO_CHARGES', for now let's write **** indicating non-optional, yet missing, string values
- StringBuilder.writeSafe(this.out, `@<TRIPOS>MOLECULE\n${name}\n${atoms.size} ${bondCount} 1\n****\n****\n\n`);
- StringBuilder.writeSafe(this.out, StringBuilder.getString(a));
- StringBuilder.writeSafe(this.out, StringBuilder.getString(b));
- StringBuilder.writeSafe(this.out, `@<TRIPOS>SUBSTRUCTURE\n1 ${name} 1\n`);
- }
- private count<K, V>(map: Map<K, V>, predicate: (k: K, v: V) => boolean): number {
- let count = 0;
- const iter = map.entries();
- let result = iter.next();
- while (!result.done) {
- if (predicate(result.value[0], result.value[1])) {
- count++;
- }
- result = iter.next();
- }
- return count;
- }
- private orderSum(map: BondMap): number {
- let sum = 0;
- const iter = map.values();
- let result = iter.next();
- while (!result.done) {
- sum += result.value.order;
- result = iter.next();
- }
- return sum;
- }
- private isNonMetalBond(label_atom_id: string): boolean {
- for (const a of NON_METAL_ATOMS) {
- if (label_atom_id.startsWith(a)) return true;
- }
- return false;
- }
- private extractNonmets(map: BondMap): BondMap {
- const ret = new Map<string, { order: number, flags: number }>();
- const iter = map.entries();
- let result = iter.next();
- while (!result.done) {
- const [k, v] = result.value;
- if (NON_METAL_ATOMS.some(a => k.startsWith(a))) {
- ret.set(k, v);
- }
- result = iter.next();
- }
- return ret;
- }
- // see https://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
- // cannot account for covalently bound amino acids etc
- private mapToSybyl(label_atom_id1: string, type_symbol1: string, bondMap: ComponentBond.Entry) {
- // TODO if altLoc: 'Du' // 1.1
- // TODO if end of polymeric bond: 'Du' // 1.2
- if (type_symbol1 === 'D') return 'H'; // 1.3
- if (type_symbol1 === 'P') return 'P.3'; // 1.4, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
- if (type_symbol1 === 'Co' || type_symbol1 === 'Ru') return type_symbol1 + '.oh'; // 1.5
- const bonds = bondMap.map.get(label_atom_id1)!;
- const numBonds = bonds.size;
- if (type_symbol1 === 'Ti' || type_symbol1 === 'Cr') { // 1.10
- return type_symbol1 + (numBonds <= 4 ? '.th' : '.oh'); // 1.10.1 & 1.10.2
- }
- if (type_symbol1 === 'C') { // 1.6
- if (numBonds >= 4 && this.count(bonds, (_k, v) => v.order === 1) >= 4) return 'C.3'; // 1.6.1, 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
- if (numBonds === 3 && this.isCat(bonds, bondMap)) return 'C.cat'; // 1.6.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
- if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'C.ar'; // 1.6.3, 1acj/ligand?encoding=mol2&auth_seq_id=30 (PHE), 1acj/ligand?encoding=mol2&auth_seq_id=63 (TYR), 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
- if ((numBonds === 1 || numBonds === 2) && this.count(bonds, (_k, v) => v.order === 3)) return 'C.1'; // 1.6.4, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
- return 'C.2'; // 1.6.5
- }
- // most of the time, bonds will equal non-metal bonds
- const nonmets = this.count(bonds, (k, _v) => this.isNonMetalBond(k)) === bonds.size ? bonds : this.extractNonmets(bonds);
- const numNonmets = nonmets.size;
- if (type_symbol1 === 'O') { // 1.7
- if (numNonmets === 1) { // 1.7.1
- if (this.isOC(nonmets, bondMap)) return 'O.co2'; // 1.7.1.1, 4h2v/ligand?encoding=mol2&auth_seq_id=403 (ACT)
- if (this.isOP(nonmets, bondMap)) return 'O.co2'; // 1.7.1.2, 4mpo/ligand?encoding=mol2&auth_seq_id=203 (PO4)
- }
- if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) === bonds.size) return 'O.3'; // 1.7.2, 1acj/ligand?encoding=mol2&auth_seq_id=601 (HOH), 3rga/ligand?encoding=mol2&auth_seq_id=307 (MOH)
- return 'O.2'; // 1.7.3, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
- }
- if (type_symbol1 === 'N') { // 1.8
- if (numNonmets === 4 && this.count(nonmets, (_k, v) => v.order === 1) === 4) return 'N.4'; // 1.8.1, 4ikf/ligand?encoding=mol2&auth_seq_id=403 (NH4)
- if (numBonds >= 2 && this.count(bonds, (_k, v) => BondType.is(BondType.Flag.Aromatic, v.flags)) >= 2) return 'N.ar'; // 1.8.2, 1acj/ligand?encoding=mol2&auth_seq_id=84 (TRP), 1acj/ligand?encoding=mol2&auth_seq_id=999 (THA)
- if (numNonmets === 1 && this.count(nonmets, (_k, v) => v.order === 3)) return 'N.1'; // 1.8.3, 3i04/ligand?encoding=mol2&auth_asym_id=C&auth_seq_id=900 (CYN)
- if (numNonmets === 2 && this.orderSum(nonmets) === 4) return 'N.1'; // 1.8.4, 3sbr/ligand?encoding=mol2&auth_seq_id=640&auth_asym_id=D (N2O)
- if (numNonmets === 3 && this.hasCOCS(nonmets, bondMap)) return 'N.am'; // 1.8.5, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8)
- if (numNonmets === 3) { // 1.8.6
- if (this.count(nonmets, (_k, v) => v.order > 1) === 1) return 'N.pl3'; // 1.8.6.1, 4hon/ligand?encoding=mol2&auth_seq_id=407 (NO3)
- if (this.count(nonmets, (_k, v) => v.order === 1) === 3) {
- if (this.isNpl3(nonmets, bondMap)) return 'N.pl3'; // 1.8.6.1.1 & 1.8.6.1.2, 1acj/ligand?encoding=mol2&auth_seq_id=44 (ARG), 5vjb/ligand?encoding=mol2&auth_seq_id=101 (GAI)
- }
- return 'N.3';
- }
- return 'N.2'; // 1.8.7, 1acj/ligand?encoding=mol2&auth_seq_id=4 (SER)
- }
- if (type_symbol1 === 'S') { // 1.9
- if (numNonmets === 3 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 1) return 'S.o'; // 1.9.1, 4i03/ligand?encoding=mol2&auth_seq_id=312 (DMS)
- if (numNonmets === 4 && this.countOfOxygenWithSingleNonmet(nonmets, bondMap) === 2) return 'S.o2'; // 1.9.2, 1udt/ligand?encoding=mol2&auth_seq_id=1000 (VIA)
- if (numNonmets >= 2 && this.count(bonds, (_k, v) => v.order === 1) >= 2) return 'S.3'; // 1.9.3, 3zfz/ligand?encoding=mol2&auth_seq_id=1669 (1W8), 4gpc/ligand?encoding=mol2&auth_seq_id=902 (SO4)
- return 'S.2'; // 1.9.4
- }
- return type_symbol1; // 1.11
- }
- // 1.8.6.2.1: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
- // delocalised .AND. one other single bond is to H then atom_type is N.pl3
- // 1.8.6.2.2: If one single bond is to an atom that forms a bond of type double, triple, aromatic or
- // delocalised .AND. neither of the other single bonds are to H .AND. sum_of_angles around N .ge. 350 deg then atom_type is N.pl3
- // TODO cannot check accurately for delocalized bonds
- private isNpl3(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
- const iter = nonmets.keys();
- let result = iter.next();
- while (!result.done) {
- const label_atom_id = result.value;
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (this.count(adjacentBonds, (_k, v) => v.order > 1 || BondType.is(BondType.Flag.Aromatic, v.flags))) {
- // TODO check accurately for 2nd criterion with coordinates
- return true;
- }
- result = iter.next();
- }
- return false;
- }
- // If bond is to carbon .AND. carbon forms a total of 3 bonds, 2 of which are to an oxygen
- // forming only 1 non-metal bond then atom_type is O.co2
- private isOC(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
- const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
- if (!nonmet[0].startsWith('C')) return false;
- const carbonBonds = bondMap.map.get(nonmet[0])!;
- if (carbonBonds.size !== 3) return false;
- let count = 0;
- const iter = carbonBonds.keys();
- let result = iter.next();
- while (!result.done) {
- const label_atom_id = result.value;
- if (label_atom_id.startsWith('O')) {
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
- }
- result = iter.next();
- }
- return count === 2;
- }
- // If bond is to phosphorus .AND. phosphorus forms at least 2 bonds to an oxygen forming
- // only 1 non-metal bond then atom_type is O.co2
- private isOP(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
- const nonmet = nonmets.entries().next()!.value as [string, { order: number, flags: number }];
- if (!nonmet[0].startsWith('P')) return false;
- const phosphorusBonds = bondMap.map.get(nonmet[0])!;
- if (phosphorusBonds.size < 2) return false;
- let count = 0;
- const iter = phosphorusBonds.keys();
- let result = iter.next();
- while (!result.done) {
- const label_atom_id = result.value;
- if (label_atom_id.startsWith('O')) {
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k)) === 1) count++;
- }
- result = iter.next();
- }
- return count >= 2;
- }
- // If num_bond .eq. 3 .AND. all bonds are acyclic .AND. all bonds are to nitrogen .AND. each
- // nitrogen forms bonds to 2 other atoms both of which are not oxygen then atom_type is C.cat.
- private isCat(currentBondMap: BondMap, bondMap: ComponentBond.Entry): boolean {
- const iter1 = currentBondMap.keys();
- let result1 = iter1.next();
- while (!result1.done) {
- const label_atom_id = result1.value;
- if (!label_atom_id.startsWith('N')) return false;
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (adjacentBonds.size < 2) return false;
- const iter2 = adjacentBonds.keys();
- let result2 = iter2.next();
- while (!result2.done) {
- if (result2.value.startsWith('O')) return false;
- result2 = iter2.next();
- }
- result1 = iter1.next();
- }
- // TODO ensure no cycles
- return true;
- }
- private countOfOxygenWithSingleNonmet(nonmets: BondMap, bondMap: ComponentBond.Entry): number {
- let count = 0;
- const iter = nonmets.keys();
- let result = iter.next();
- while (!result.done) {
- const label_atom_id = result.value;
- if (label_atom_id.startsWith('O')) {
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (this.count(adjacentBonds, (k, _v) => this.isNonMetalBond(k))) count++;
- }
- result = iter.next();
- }
- return count;
- }
- // If num_nonmet .eq. 3 .AND. one bond is to C=O or C=S then atom_type is N.am
- private hasCOCS(nonmets: BondMap, bondMap: ComponentBond.Entry): boolean {
- const iter = nonmets.keys();
- let result = iter.next();
- while (!result.done) {
- const label_atom_id = result.value;
- if (label_atom_id.startsWith('C')) {
- const adjacentBonds = bondMap.map.get(label_atom_id)!;
- if (this.count(adjacentBonds, (k, v) => k.startsWith('O') || k.startsWith('S') && v.order === 2)) return true;
- }
- result = iter.next();
- }
- return false;
- }
- protected writeFullCategory<Ctx>(sb: StringBuilder, category: Category<Ctx>, context?: Ctx) {
- const { instance, source } = getCategoryInstanceData(category, context);
- const fields = instance.fields;
- const src = source[0];
- const data = src.data;
- const it = src.keys();
- const key = it.move();
- for (let _f = 0; _f < fields.length; _f++) {
- const f = fields[_f]!;
- StringBuilder.writeSafe(sb, `# ${category.name}.${f.name}: `);
- const val = f.value(key, data, 0);
- StringBuilder.writeSafe(sb, val as string);
- StringBuilder.newline(sb);
- }
- StringBuilder.newline(sb);
- }
- encode(): void {
- // write meta-information, do so after ctab
- if (this.error || this.metaInformation) {
- StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.meta));
- }
- StringBuilder.writeSafe(this.builder, StringBuilder.getString(this.out));
- this.encoded = true;
- }
- constructor(encoder: string, metaInformation: boolean, hydrogens: boolean) {
- super(encoder, metaInformation, hydrogens);
- this.out = StringBuilder.create();
- }
- }
|