Browse Source

Merge pull request #314 from ptourlas/feature/formal-charge-labels

Feature/formal charge labels
David Sehnal 3 years ago
parent
commit
e94ecf2a0b

+ 109 - 1
src/mol-io/reader/_spec/mol.spec.ts

@@ -1,5 +1,12 @@
+/**
+ * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
 
-import { parseMol } from '../mol/parser';
+import { parseMol, formalChargeMapper } from '../mol/parser';
 
 const MolString = `2244
   -OEChem-04072009073D
@@ -49,6 +56,48 @@ const MolString = `2244
  13 20  1  0  0  0  0
 M  END`;
 
+const MolStringWithAtomBlockCharge = `
+  Ketcher  1 72215442D 1   1.00000     0.00000     0
+
+  4  3  0  0  0  0            999 V2000
+    0.0000    0.0000    0.0000 C   0  1  0  0  0  0  0  0  0  0  0  0
+    0.8660    0.5000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.8660    0.5000    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000   -1.0000    0.0000 P   0  0  0  0  0  0  0  0  0  0  0  0
+  1  4  2  0  0  0  0
+  3  1  1  0  0  0  0
+  2  1  1  0  0  0  0
+M  END`;
+
+const MolStringWithPropertyBlockCharge = `
+  Ketcher  1 72215442D 1   1.00000     0.00000     0
+
+  4  3  0  0  0  0            999 V2000
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8660    0.5000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.8660    0.5000    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000   -1.0000    0.0000 P   0  0  0  0  0  0  0  0  0  0  0  0
+  1  4  2  0  0  0  0
+  3  1  1  0  0  0  0
+  2  1  1  0  0  0  0
+M  CHG  3   2  -1   3   1   4   1
+M  END`;
+
+const MolStringWithMultipleChargeLines = `
+  Ketcher  1 72215442D 1   1.00000     0.00000     0
+
+  4  3  0  0  0  0            999 V2000
+    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    0.8660    0.5000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+   -0.8660    0.5000    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
+    0.0000   -1.0000    0.0000 P   0  0  0  0  0  0  0  0  0  0  0  0
+  1  4  2  0  0  0  0
+  3  1  1  0  0  0  0
+  2  1  1  0  0  0  0
+M  CHG  1   2  -1
+M  CHG  2   3   1   4   1
+M  END`;
+
 describe('mol reader', () => {
     it('basic', async () => {
         const parsed = await parseMol(MolString).run();
@@ -70,4 +119,63 @@ describe('mol reader', () => {
         expect(bonds.atomIdxB.value(20)).toBe(20);
         expect(bonds.order.value(20)).toBe(1);
     });
+    it('property block charges', async () => {
+        const parsed = await parseMol(MolStringWithPropertyBlockCharge).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const { formalCharges } = parsed.result;
+
+        expect(formalCharges.atomIdx.rowCount).toBe(3);
+        expect(formalCharges.charge.rowCount).toBe(3);
+
+        expect(formalCharges.atomIdx.value(0)).toBe(2);
+        expect(formalCharges.atomIdx.value(1)).toBe(3);
+
+        expect(formalCharges.charge.value(0)).toBe(-1);
+        expect(formalCharges.charge.value(1)).toBe(1);
+    });
+    it('multiple charge lines', async () => {
+        const parsed = await parseMol(MolStringWithMultipleChargeLines).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const { formalCharges } = parsed.result;
+
+        expect(formalCharges.atomIdx.rowCount).toBe(3);
+        expect(formalCharges.charge.rowCount).toBe(3);
+
+        expect(formalCharges.atomIdx.value(0)).toBe(2);
+        expect(formalCharges.atomIdx.value(1)).toBe(3);
+
+        expect(formalCharges.charge.value(0)).toBe(-1);
+        expect(formalCharges.charge.value(1)).toBe(1);
+    });
+
+    it('atom block charge mapping', async () => {
+        expect(formalChargeMapper(7)).toBe(-3);
+        expect(formalChargeMapper(6)).toBe(-2);
+        expect(formalChargeMapper(5)).toBe(-1);
+        expect(formalChargeMapper(0)).toBe(0);
+        expect(formalChargeMapper(3)).toBe(1);
+        expect(formalChargeMapper(2)).toBe(2);
+        expect(formalChargeMapper(1)).toBe(3);
+        expect(formalChargeMapper(4)).toBe(0);
+    });
+    it('atom block charges', async () => {
+        const parsed = await parseMol(MolStringWithAtomBlockCharge).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const { atoms, formalCharges } = parsed.result;
+
+        /* No property block charges */
+        expect(formalCharges.atomIdx.rowCount).toBe(0);
+        expect(formalCharges.charge.rowCount).toBe(0);
+
+        expect(atoms.formal_charge.value(0)).toBe(1);
+        expect(atoms.formal_charge.value(1)).toBe(0);
+        expect(atoms.formal_charge.value(2)).toBe(0);
+        expect(atoms.formal_charge.value(3)).toBe(0);
+    });
 });

+ 44 - 0
src/mol-io/reader/_spec/sdf.spec.ts

@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
+ * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
 
 import { parseSdf } from '../sdf/parser';
 
@@ -458,6 +465,38 @@ describe('sdf reader', () => {
         expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10');
     });
 
+    it('charge parsing in V2000', async () => {
+        const parsed = await parseSdf(SdfString).run();
+        if (parsed.isError) {
+            throw new Error(parsed.message);
+        }
+        const compound1 = parsed.result.compounds[0];
+        const compound2 = parsed.result.compounds[1];
+        const compound3 = parsed.result.compounds[2];
+
+        const formalCharges1 = {
+            atomIdx: compound1.molFile.formalCharges.atomIdx,
+            charge: compound1.molFile.formalCharges.charge
+        };
+        const formalCharges2 = {
+            atomIdx: compound2.molFile.formalCharges.atomIdx,
+            charge: compound2.molFile.formalCharges.charge
+        };
+        const formalCharges3 = {
+            atomIdx: compound3.molFile.formalCharges.atomIdx,
+            charge: compound3.molFile.formalCharges.charge
+        };
+
+        expect(formalCharges1.atomIdx.rowCount).toBe(3);
+        expect(formalCharges2.atomIdx.rowCount).toBe(3);
+        expect(formalCharges3.atomIdx.rowCount).toBe(0);
+
+        expect(formalCharges1.charge.rowCount === formalCharges1.atomIdx.rowCount).toBe(true);
+        expect(formalCharges2.charge.rowCount === formalCharges2.atomIdx.rowCount).toBe(true);
+        expect(formalCharges3.charge.rowCount === formalCharges3.atomIdx.rowCount).toBe(true);
+    });
+
+
     it('v3000', async () => {
         const parsed = await parseSdf(V3000SdfString).run();
         if (parsed.isError) {
@@ -486,6 +525,11 @@ describe('sdf reader', () => {
         expect(compound1.molFile.bonds.atomIdxB.value(10)).toBe(9);
         expect(compound1.molFile.bonds.order.value(10)).toBe(2);
 
+        expect(compound1.molFile.formalCharges.atomIdx.rowCount).toBe(13);
+        for (let i = 0; i < compound1.molFile.atoms.count; i++) {
+            expect(compound1.molFile.formalCharges.charge.value(i)).toBe(0);
+        }
+
         expect(compound1.dataItems.dataHeader.rowCount).toBe(2);
         expect(compound1.dataItems.data.rowCount).toBe(2);
 

+ 135 - 4
src/mol-io/reader/mol/parser.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  */
 
 import { Column } from '../../../mol-data/db';
@@ -10,6 +11,7 @@ import { TokenColumnProvider as TokenColumn } from '../common/text/column/token'
 import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
 import { ReaderResult as Result } from '../result';
 
+
 /** Subset of the MolFile V2000 format */
 export interface MolFile {
     readonly title: string,
@@ -20,7 +22,8 @@ export interface MolFile {
         readonly x: Column<number>,
         readonly y: Column<number>,
         readonly z: Column<number>,
-        readonly type_symbol: Column<string>
+        readonly type_symbol: Column<string>,
+        readonly formal_charge: Column<number>
     },
     readonly bonds: {
         readonly count: number
@@ -28,6 +31,57 @@ export interface MolFile {
         readonly atomIdxB: Column<number>,
         readonly order: Column<number>
     }
+    readonly formalCharges: {
+        readonly atomIdx: Column<number>;
+        readonly charge: Column<number>;
+    }
+}
+
+/*
+    The atom lines in a .mol file have the following structure:
+
+    xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
+    ---------------------------------------------------------------------
+
+    Below is a breakdown of each component and its start/end indices:
+
+    xxxxx.xxxx  (X COORDINATE, 1-10)
+    yyyyy.yyyy  (Y COORDINATE, 10-20)
+    zzzzz.zzzz  (Z COORDINATE, 20-30)
+    _           (30 IS EMPTY)
+    aaa         (ATOM SYMBOL, 31-34)
+    dd          (MASS DIFF, 34-36)
+    ccc         (FORMAL CHARGE, 36-39)
+    sss         (ATOM STEREO PARITY, 39-42)
+    hhh         (HYDROGEN COUNT+1, 42-45)
+    bbb         (STEREO CARE BOX, 45-48)
+    vvv         (VALENCE, 48-51)
+    HHH         (H0 DESIGNATOR, 51-54)
+    rrr         (UNUSED, 54-57)
+    iii         (UNUSED, 57-60)
+    mmm         (ATOM-ATOM MAPPING NUMBER, 60-63)
+    nnn         (INVERSION/RETENTION FLAG, 63-66)
+    eee         (EXACT CHANGE FLAG, 66-69)
+*/
+
+/**
+ * @param key - The value found at the atom block.
+ * @returns The actual formal charge based on the mapping.
+ */
+export function formalChargeMapper(key: number) {
+    switch (key) {
+        case 7: return -3;
+        case 6: return -2;
+        case 5: return -1;
+        case 0: return 0;
+        case 3: return 1;
+        case 2: return 2;
+        case 1: return 3;
+        case 4: return 0;
+        default:
+            console.error(`Value ${key} is outside the 0-7 range, defaulting to 0.`);
+            return 0;
+    }
 }
 
 export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
@@ -35,6 +89,7 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
     const y = TokenBuilder.create(tokenizer.data, count * 2);
     const z = TokenBuilder.create(tokenizer.data, count * 2);
     const type_symbol = TokenBuilder.create(tokenizer.data, count * 2);
+    const formal_charge = TokenBuilder.create(tokenizer.data, count * 2);
 
     for (let i = 0; i < count; ++i) {
         Tokenizer.markLine(tokenizer);
@@ -47,6 +102,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
         TokenBuilder.addUnchecked(z, tokenizer.tokenStart, tokenizer.tokenEnd);
         Tokenizer.trim(tokenizer, s + 31, s + 34);
         TokenBuilder.addUnchecked(type_symbol, tokenizer.tokenStart, tokenizer.tokenEnd);
+        Tokenizer.trim(tokenizer, s + 36, s + 39);
+        TokenBuilder.addUnchecked(formal_charge, tokenizer.tokenStart, tokenizer.tokenEnd);
         tokenizer.position = position;
     }
 
@@ -55,7 +112,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
         x: TokenColumn(x)(Column.Schema.float),
         y: TokenColumn(y)(Column.Schema.float),
         z: TokenColumn(z)(Column.Schema.float),
-        type_symbol: TokenColumn(type_symbol)(Column.Schema.str)
+        type_symbol: TokenColumn(type_symbol)(Column.Schema.str),
+        formal_charge: TokenColumn(formal_charge)(Column.Schema.int)
     };
 }
 
@@ -84,6 +142,76 @@ export function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds
     };
 }
 
+interface FormalChargesRawData {
+    atomIdx: Array<number>;
+    charge: Array<number>;
+}
+export function handleFormalCharges(tokenizer: Tokenizer, lineStart: number, formalCharges: FormalChargesRawData) {
+
+    Tokenizer.trim(tokenizer, lineStart + 6, lineStart + 9);
+    const numOfCharges = parseInt(Tokenizer.getTokenString(tokenizer));
+    for (let i = 0; i < numOfCharges; ++i) {
+        /*
+        M  CHG  3   1  -1   2   0   2  -1
+                |   |   |   |   |
+                |   |   |   |   |__charge2 (etc.)
+                |   |   |   |
+                |   |   |   |__atomIdx2
+                |   |   |
+                |   |   |__charge1
+                |   |
+                |   |__atomIdx1 (cursor at position 12)
+                |
+                |___numOfCharges
+        */
+        const offset = 9 + (i * 8);
+
+        Tokenizer.trim(tokenizer, lineStart + offset, lineStart + offset + 4);
+        const _atomIdx = Tokenizer.getTokenString(tokenizer);
+        formalCharges.atomIdx.push(+_atomIdx);
+        Tokenizer.trim(tokenizer, lineStart + offset + 4, lineStart + offset + 8);
+        const _charge = Tokenizer.getTokenString(tokenizer);
+        formalCharges.charge.push(+_charge);
+    }
+    /* Once the line is read, move to the next one. */
+    Tokenizer.eatLine(tokenizer);
+}
+
+/** Call an appropriate handler based on the property type.
+ * (For now it only calls the formal charge handler, additional handlers can
+ * be added for other properties.)
+ */
+export function handlePropertiesBlock(tokenizer: Tokenizer): MolFile['formalCharges'] {
+
+    const _atomIdx: Array<number> = [];
+    const _charge: Array<number> = [];
+    const _formalCharges: FormalChargesRawData = { atomIdx: _atomIdx, charge: _charge };
+
+    while (tokenizer.position < tokenizer.length) {
+        const { position: s } = tokenizer;
+
+        Tokenizer.trim(tokenizer, s + 3, s + 6);
+        const propertyType = Tokenizer.getTokenString(tokenizer);
+
+        if (propertyType === 'END') break;
+        Tokenizer.eatLine(tokenizer);
+
+        switch (propertyType) {
+            case 'CHG':
+                handleFormalCharges(tokenizer, s, _formalCharges);
+                break;
+            default:
+                break;
+        }
+    }
+
+    const formalCharges: MolFile['formalCharges'] = {
+        atomIdx: Column.ofIntArray(_formalCharges.atomIdx),
+        charge: Column.ofIntArray(_formalCharges.charge)
+    };
+    return formalCharges;
+}
+
 function parseInternal(data: string): Result<MolFile> {
     const tokenizer = Tokenizer(data);
 
@@ -98,12 +226,15 @@ function parseInternal(data: string): Result<MolFile> {
     const atoms = handleAtoms(tokenizer, atomCount);
     const bonds = handleBonds(tokenizer, bondCount);
 
+    const formalCharges = handlePropertiesBlock(tokenizer);
+
     const result: MolFile = {
         title,
         program,
         comment,
         atoms,
-        bonds
+        bonds,
+        formalCharges,
     };
     return Result.success(result);
 }

+ 10 - 0
src/mol-io/reader/sdf/parser-v3-util.ts

@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) 2021-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Jason Pattle <jpattle@exscientia.co.uk>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
+ */
+
 import { Column } from '../../../mol-data/db';
 import { MolFile } from '../mol/parser';
 import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer';
@@ -61,6 +68,9 @@ export function handleAtomsV3(
         y: TokenColumn(y)(Column.Schema.float),
         z: TokenColumn(z)(Column.Schema.float),
         type_symbol: TokenColumn(type_symbol)(Column.Schema.str),
+        /* No support for formal charge parsing in V3000 molfiles at the moment,
+        so all charges default to 0.*/
+        formal_charge: Column.ofConst(0, atomCount, Column.Schema.int)
     };
 }
 

+ 14 - 3
src/mol-io/reader/sdf/parser.ts

@@ -1,12 +1,14 @@
 /**
- * Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Jason Pattle <jpattle@exscientia.co.uk>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  */
 
 import { Column } from '../../../mol-data/db';
-import { MolFile, handleAtoms, handleBonds } from '../mol/parser';
+import { MolFile, handleAtoms, handleBonds, handlePropertiesBlock } from '../mol/parser';
 import { Task } from '../../../mol-task';
 import { ReaderResult as Result } from '../result';
 import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
@@ -29,6 +31,7 @@ export interface SdfFile {
 
 
 const delimiter = '$$$$';
+
 function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
     const dataHeader = TokenBuilder.create(tokenizer.data, 32);
     const data = TokenBuilder.create(tokenizer.data, 32);
@@ -93,12 +96,20 @@ function handleMolFile(tokenizer: Tokenizer) {
         return;
     }
 
+    /* No support for formal charge parsing in V3000 molfiles at the moment,
+    so all charges default to 0.*/
+    const nullFormalCharges: MolFile['formalCharges'] = {
+        atomIdx: Column.ofConst(0, atomCount, Column.Schema.int),
+        charge: Column.ofConst(0, atomCount, Column.Schema.int)
+    };
+
     const atoms = molIsV3 ? handleAtomsV3(tokenizer, atomCount) : handleAtoms(tokenizer, atomCount);
     const bonds = molIsV3 ? handleBondsV3(tokenizer, bondCount) : handleBonds(tokenizer, bondCount);
+    const formalCharges = molIsV3 ? nullFormalCharges : handlePropertiesBlock(tokenizer);
     const dataItems = handleDataItems(tokenizer);
 
     return {
-        molFile: { title, program, comment, atoms, bonds },
+        molFile: { title, program, comment, atoms, bonds, formalCharges },
         dataItems
     };
 }

+ 16 - 3
src/mol-model-formats/structure/mol.ts

@@ -1,12 +1,13 @@
 /**
- * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
  */
 
 import { Column, Table } from '../../mol-data/db';
-import { MolFile } from '../../mol-io/reader/mol/parser';
+import { MolFile, formalChargeMapper } from '../../mol-io/reader/mol/parser';
 import { MoleculeType } from '../../mol-model/structure/model/types';
 import { RuntimeContext, Task } from '../../mol-task';
 import { createModels } from './basic/parser';
@@ -18,13 +19,24 @@ import { IndexPairBonds } from './property/bonds/index-pair';
 import { Trajectory } from '../../mol-model/structure';
 
 export async function getMolModels(mol: MolFile, format: ModelFormat<any> | undefined, ctx: RuntimeContext) {
-    const { atoms, bonds } = mol;
+    const { atoms, bonds, formalCharges } = mol;
 
     const MOL = Column.ofConst('MOL', mol.atoms.count, Column.Schema.str);
     const A = Column.ofConst('A', mol.atoms.count, Column.Schema.str);
     const type_symbol = Column.asArrayColumn(atoms.type_symbol);
     const seq_id = Column.ofConst(1, atoms.count, Column.Schema.int);
 
+    const computedFormalCharges = new Int32Array(mol.atoms.count);
+    if (formalCharges.atomIdx.rowCount > 0) {
+        for (let i = 0; i < formalCharges.atomIdx.rowCount; i++) {
+            computedFormalCharges[formalCharges.atomIdx.value(i) - 1] = formalCharges.charge.value(i);
+        }
+    } else {
+        for (let i = 0; i < mol.atoms.count; i++) {
+            computedFormalCharges[i] = formalChargeMapper(atoms.formal_charge.value(i));
+        }
+    }
+
     const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
         auth_asym_id: A,
         auth_atom_id: type_symbol,
@@ -45,6 +57,7 @@ export async function getMolModels(mol: MolFile, format: ModelFormat<any> | unde
         type_symbol,
 
         pdbx_PDB_model_num: Column.ofConst(1, atoms.count, Column.Schema.int),
+        pdbx_formal_charge: Column.ofIntArray(computedFormalCharges)
     }, atoms.count);
 
     const entityBuilder = new EntityBuilder();