Browse Source

add prmtop format support

Alexander Rose 3 years ago
parent
commit
511c839237

+ 1 - 0
CHANGELOG.md

@@ -14,6 +14,7 @@ Note that since we don't clearly distinguish between a public and private interf
 - Fix wrong element assignment for atoms with Charmm ion names
 - Fix handling of empty symmetry cell data
 - Add support for ``trr`` coordinates files
+- Add support for ``prmtop`` topology files
 
 ## [v3.3.1] - 2022-02-27
 

+ 176 - 0
src/mol-io/reader/prmtop/parser.ts

@@ -0,0 +1,176 @@
+/**
+ * Copyright (c) 2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Task, RuntimeContext } from '../../../mol-task';
+import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer';
+import { ReaderResult as Result } from '../result';
+import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
+import { Column } from '../../../mol-data/db';
+import { Mutable } from '../../../mol-util/type-helpers';
+
+// http://ambermd.org/prmtop.pdf
+// https://ambermd.org/FileFormats.php#topology
+
+const Pointers = {
+    'NATOM': '', 'NTYPES': '', 'NBONH': '', 'MBONA': '', 'NTHETH': '', 'MTHETA': '',
+    'NPHIH': '', 'MPHIA': '', 'NHPARM': '', 'NPARM': '', 'NNB': '', 'NRES': '',
+    'NBONA': '', 'NTHETA': '', 'NPHIA': '', 'NUMBND': '', 'NUMANG': '', 'NPTRA': '',
+    'NATYP': '', 'NPHB': '', 'IFPERT': '', 'NBPER': '', 'NGPER': '', 'NDPER': '',
+    'MBPER': '', 'MGPER': '', 'MDPER': '', 'IFBOX': '', 'NMXRS': '', 'IFCAP': '',
+    'NUMEXTRA': '', 'NCOPY': '',
+};
+type PointerName = keyof typeof Pointers;
+const PointersNames = Object.keys(Pointers) as PointerName[];
+
+export interface PrmtopFile {
+    readonly version: string
+    readonly title: ReadonlyArray<string>
+    readonly pointers: Readonly<Record<PointerName, number>>
+    readonly atomName: Column<string>
+    readonly charge: Column<number>
+    readonly mass: Column<number>
+    readonly residueLabel: Column<string>
+    readonly residuePointer: Column<number>
+    readonly bondsIncHydrogen: Column<number>
+    readonly bondsWithoutHydrogen: Column<number>
+    readonly radii: Column<number>
+}
+
+const { readLine, markLine, trim } = Tokenizer;
+
+function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext) {
+    return {
+        tokenizer,
+        runtimeCtx,
+    };
+}
+type State = ReturnType<typeof State>
+
+function handleTitle(state: State): string[] {
+    const { tokenizer } = state;
+    const title: string[] = [];
+
+    while (tokenizer.tokenEnd < tokenizer.length) {
+        if (tokenizer.data[tokenizer.position] === '%') break;
+        const line = readLine(tokenizer).trim();
+        if (line) title.push(line);
+    }
+
+    return title;
+}
+
+function handlePointers(state: State): Record<PointerName, number> {
+    const { tokenizer } = state;
+
+    const pointers: Record<PointerName, number> = Object.create(null);
+    PointersNames.forEach(name => { pointers[name] = 0; });
+
+    let curIdx = 0;
+    while (tokenizer.tokenEnd < tokenizer.length) {
+        if (tokenizer.data[tokenizer.position] === '%') break;
+        const line = readLine(tokenizer);
+
+        const n = Math.min(curIdx + 10, 32);
+        for (let i = 0; curIdx < n; ++i, ++curIdx) {
+            pointers[PointersNames[curIdx]] = parseInt(
+                line.substring(i * 8, i * 8 + 8).trim()
+            );
+        }
+    }
+
+    return pointers;
+}
+
+function handleTokens(state: State, count: number, countPerLine: number, itemSize: number): Tokens {
+    const { tokenizer } = state;
+
+    const tokens = TokenBuilder.create(tokenizer.data, count * 2);
+
+    let curIdx = 0;
+    while (tokenizer.tokenEnd < tokenizer.length) {
+        if (tokenizer.data[tokenizer.position] === '%') break;
+
+        tokenizer.tokenStart = tokenizer.position;
+        const n = Math.min(curIdx + countPerLine, count);
+        for (let i = 0; curIdx < n; ++i, ++curIdx) {
+            const p = tokenizer.position;
+            trim(tokenizer, tokenizer.position, tokenizer.position + itemSize);
+            TokenBuilder.addUnchecked(tokens, tokenizer.tokenStart, tokenizer.tokenEnd);
+            tokenizer.position = p + itemSize;
+        }
+
+        markLine(tokenizer);
+    }
+
+    return tokens;
+}
+
+async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<PrmtopFile>> {
+    const t = Tokenizer(data);
+    const state = State(t, ctx);
+
+    const result: Mutable<PrmtopFile> = Object.create(null);
+    let prevPosition = 0;
+
+    while (t.tokenEnd < t.length) {
+        if (t.position - prevPosition > 100000 && ctx.shouldUpdate) {
+            prevPosition = t.position;
+            await ctx.update({ current: t.position, max: t.length });
+        }
+
+        const line = readLine(state.tokenizer).trim();
+        if (line.startsWith('%VERSION')) {
+            result.version = line.substring(8).trim();
+        } else if (line.startsWith('%FLAG')) {
+            const flag = line.substring(5).trim();
+            const formatLine = readLine(state.tokenizer).trim();
+            if (!formatLine.startsWith('%FORMAT')) throw new Error('expected %FORMAT');
+
+            if (flag === 'TITLE') {
+                result.title = handleTitle(state);
+            } else if (flag === 'POINTERS') {
+                result.pointers = handlePointers(state);
+            } else if (flag === 'ATOM_NAME') {
+                const tokens = handleTokens(state, result.pointers['NATOM'], 20, 4);
+                result.atomName = TokenColumn(tokens)(Column.Schema.str);
+            } else if (flag === 'CHARGE') {
+                const tokens = handleTokens(state, result.pointers['NATOM'], 5, 16);
+                result.charge = TokenColumn(tokens)(Column.Schema.float);
+            } else if (flag === 'MASS') {
+                const tokens = handleTokens(state, result.pointers['NATOM'], 5, 16);
+                result.mass = TokenColumn(tokens)(Column.Schema.float);
+            } else if (flag === 'RESIDUE_LABEL') {
+                const tokens = handleTokens(state, result.pointers['NRES'], 20, 4);
+                result.residueLabel = TokenColumn(tokens)(Column.Schema.str);
+            } else if (flag === 'RESIDUE_POINTER') {
+                const tokens = handleTokens(state, result.pointers['NRES'], 10, 8);
+                result.residuePointer = TokenColumn(tokens)(Column.Schema.int);
+            } else if (flag === 'BONDS_INC_HYDROGEN') {
+                const tokens = handleTokens(state, result.pointers['NBONH'] * 3, 10, 8);
+                result.bondsIncHydrogen = TokenColumn(tokens)(Column.Schema.int);
+            } else if (flag === 'BONDS_WITHOUT_HYDROGEN') {
+                const tokens = handleTokens(state, result.pointers['NBONA'] * 3, 10, 8);
+                result.bondsWithoutHydrogen = TokenColumn(tokens)(Column.Schema.int);
+            } else if (flag === 'RADII') {
+                const tokens = handleTokens(state, result.pointers['NATOM'], 5, 16);
+                result.radii = TokenColumn(tokens)(Column.Schema.float);
+            } else {
+                while (t.tokenEnd < t.length) {
+                    if (t.data[t.position] === '%') break;
+                    markLine(t);
+                }
+            }
+        }
+    }
+
+    return Result.success(result);
+}
+
+export function parsePrmtop(data: string) {
+    return Task.create<Result<PrmtopFile>>('Parse PRMTOP', async ctx => {
+        return await parseInternal(data, ctx);
+    });
+}

+ 174 - 0
src/mol-model-formats/structure/prmtop.ts

@@ -0,0 +1,174 @@
+/**
+ * Copyright (c) 2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ */
+
+import { Column, Table } from '../../mol-data/db';
+import { PrmtopFile } from '../../mol-io/reader/prmtop/parser';
+import { getMoleculeType, MoleculeType } from '../../mol-model/structure/model/types';
+import { Topology } from '../../mol-model/structure/topology/topology';
+import { Task } from '../../mol-task';
+import { ModelFormat } from '../format';
+import { BasicSchema, createBasic } from './basic/schema';
+import { ComponentBuilder } from './common/component';
+import { EntityBuilder } from './common/entity';
+import { getChainId } from './common/util';
+import { guessElementSymbolString } from './util';
+
+function getBasic(prmtop: PrmtopFile) {
+    const { pointers, residuePointer, residueLabel, atomName } = prmtop;
+    const atomCount = pointers.NATOM;
+    const residueCount = pointers.NRES;
+
+    //
+
+    const residueIds = new Uint32Array(atomCount);
+    const residueNames: string[] = [];
+
+    const addResidue = (i: number, from: number, to: number) => {
+        const rn = residueLabel.value(i);
+        for (let j = from, jl = to; j < jl; ++j) {
+            residueIds[j] = i + 1;
+            residueNames[j] = rn;
+        }
+    };
+
+    for (let i = 0, il = residueCount - 1; i < il; ++i) {
+        addResidue(i, residuePointer.value(i) - 1, residuePointer.value(i + 1) - 1);
+
+    }
+    addResidue(residueCount - 1, residuePointer.value(residueCount - 1) - 1, atomCount);
+
+    const residueId = Column.ofIntArray(residueIds);
+    const residueName = Column.ofStringArray(residueNames);
+
+    //
+
+    const entityIds = new Array<string>(atomCount);
+    const asymIds = new Array<string>(atomCount);
+    const seqIds = new Uint32Array(atomCount);
+    const ids = new Uint32Array(atomCount);
+
+    const entityBuilder = new EntityBuilder();
+    const componentBuilder = new ComponentBuilder(residueId, atomName);
+
+    let currentEntityId = '';
+    let currentAsymIndex = 0;
+    let currentAsymId = '';
+    let currentSeqId = 0;
+    let prevMoleculeType = MoleculeType.Unknown;
+    let prevResidueNumber = -1;
+
+    for (let i = 0, il = atomCount; i < il; ++i) {
+        const residueNumber = residueId.value(i);
+        if (residueNumber !== prevResidueNumber) {
+            const compId = residueName.value(i);
+            const moleculeType = getMoleculeType(componentBuilder.add(compId, i).type, compId);
+
+            if (moleculeType !== prevMoleculeType) {
+                currentAsymId = getChainId(currentAsymIndex);
+                currentAsymIndex += 1;
+                currentSeqId = 0;
+            }
+
+            currentEntityId = entityBuilder.getEntityId(compId, moleculeType, currentAsymId);
+            currentSeqId += 1;
+
+            prevResidueNumber = residueNumber;
+            prevMoleculeType = moleculeType;
+        }
+
+        entityIds[i] = currentEntityId;
+        asymIds[i] = currentAsymId;
+        seqIds[i] = currentSeqId;
+        ids[i] = i;
+    }
+
+    const id = Column.ofIntArray(ids);
+    const asym_id = Column.ofStringArray(asymIds);
+
+    //
+
+    const type_symbol = new Array<string>(atomCount);
+    for (let i = 0; i < atomCount; ++i) {
+        type_symbol[i] = guessElementSymbolString(atomName.value(i), residueName.value(i));
+    }
+
+    const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
+        auth_asym_id: asym_id,
+        auth_atom_id: Column.asArrayColumn(atomName),
+        auth_comp_id: residueName,
+        auth_seq_id: residueId,
+        id: Column.asArrayColumn(id),
+
+        label_asym_id: asym_id,
+        label_atom_id: Column.asArrayColumn(atomName),
+        label_comp_id: residueName,
+        label_seq_id: Column.ofIntArray(seqIds),
+        label_entity_id: Column.ofStringArray(entityIds),
+
+        occupancy: Column.ofConst(1, atomCount, Column.Schema.float),
+        type_symbol: Column.ofStringArray(type_symbol),
+
+        pdbx_PDB_model_num: Column.ofConst(1, atomCount, Column.Schema.int),
+    }, atomCount);
+
+    const basic = createBasic({
+        entity: entityBuilder.getEntityTable(),
+        chem_comp: componentBuilder.getChemCompTable(),
+        atom_site
+    });
+
+    return basic;
+}
+
+//
+
+export { PrmtopFormat };
+
+type PrmtopFormat = ModelFormat<PrmtopFile>
+
+namespace PrmtopFormat {
+    export function is(x?: ModelFormat): x is PrmtopFormat {
+        return x?.kind === 'prmtop';
+    }
+
+    export function fromPrmtop(prmtop: PrmtopFile): PrmtopFormat {
+        return { kind: 'prmtop', name: prmtop.title.join(' ') || 'PRMTOP', data: prmtop };
+    }
+}
+
+export function topologyFromPrmtop(prmtop: PrmtopFile): Task<Topology> {
+    return Task.create('Parse PRMTOP', async ctx => {
+        const format = PrmtopFormat.fromPrmtop(prmtop);
+        const basic = getBasic(prmtop);
+
+        const { pointers: { NBONH, NBONA }, bondsIncHydrogen, bondsWithoutHydrogen } = prmtop;
+        const bondCount = NBONH + NBONA;
+
+        const bonds = {
+            indexA: Column.ofLambda({
+                value: (row: number) => {
+                    return row < NBONH
+                        ? bondsIncHydrogen.value(row * 3) / 3
+                        : bondsWithoutHydrogen.value((row - NBONH) * 3) / 3;
+                },
+                rowCount: bondCount,
+                schema: Column.Schema.int,
+            }),
+            indexB: Column.ofLambda({
+                value: (row: number) => {
+                    return row < NBONH
+                        ? bondsIncHydrogen.value(row * 3 + 1) / 3
+                        : bondsWithoutHydrogen.value((row - NBONH) * 3 + 1) / 3;
+                },
+                rowCount: bondCount,
+                schema: Column.Schema.int,
+            }),
+            order: Column.ofConst(1, bondCount, Column.Schema.int)
+        };
+
+        return Topology.create(prmtop.title.join(' ') || 'PRMTOP', basic, bonds, format);
+    });
+}

+ 20 - 0
src/mol-plugin-state/formats/topology.ts

@@ -29,10 +29,30 @@ const PsfProvider = DataFormatProvider({
 });
 type PsfProvider = typeof PsfProvider;
 
+export { PrmtopProvider };
+const PrmtopProvider = DataFormatProvider({
+    label: 'PRMTOP',
+    description: 'PRMTOP',
+    category: TopologyFormatCategory,
+    stringExtensions: ['prmtop', 'parm7'],
+    parse: async (plugin, data) => {
+        const format = plugin.state.data.build()
+            .to(data)
+            .apply(StateTransforms.Data.ParsePrmtop, {}, { state: { isGhost: true } });
+        const topology = format.apply(StateTransforms.Model.TopologyFromPrmtop);
+
+        await format.commit();
+
+        return { format: format.selector, topology: topology.selector };
+    }
+});
+type PrmtopProvider = typeof PrmtopProvider;
+
 export type TopologyProvider = PsfProvider;
 
 export const BuiltInTopologyFormats = [
     ['psf', PsfProvider] as const,
+    ['prmtop', PrmtopProvider] as const,
 ] as const;
 
 export type BuiltInTopologyFormat = (typeof BuiltInTopologyFormats)[number][0]

+ 3 - 1
src/mol-plugin-state/objects.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2018-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
@@ -24,6 +24,7 @@ import { CubeFile } from '../mol-io/reader/cube/parser';
 import { DxFile } from '../mol-io/reader/dx/parser';
 import { Color } from '../mol-util/color/color';
 import { Asset } from '../mol-util/assets';
+import { PrmtopFile } from '../mol-io/reader/prmtop/parser';
 
 export type TypeClass = 'root' | 'data' | 'prop'
 
@@ -72,6 +73,7 @@ export namespace PluginStateObject {
         export class Cif extends Create<CifFile>({ name: 'CIF File', typeClass: 'Data' }) { }
         export class Cube extends Create<CubeFile>({ name: 'Cube File', typeClass: 'Data' }) { }
         export class Psf extends Create<PsfFile>({ name: 'PSF File', typeClass: 'Data' }) { }
+        export class Prmtop extends Create<PrmtopFile>({ name: 'PRMTOP File', typeClass: 'Data' }) { }
         export class Ply extends Create<PlyFile>({ name: 'PLY File', typeClass: 'Data' }) { }
         export class Ccp4 extends Create<Ccp4File>({ name: 'CCP4/MRC/MAP File', typeClass: 'Data' }) { }
         export class Dsn6 extends Create<Dsn6File>({ name: 'DSN6/BRIX File', typeClass: 'Data' }) { }

+ 18 - 0
src/mol-plugin-state/transforms/data.ts

@@ -21,6 +21,7 @@ import { parseCube } from '../../mol-io/reader/cube/parser';
 import { parseDx } from '../../mol-io/reader/dx/parser';
 import { ColorNames } from '../../mol-util/color/names';
 import { assertUnreachable } from '../../mol-util/type-helpers';
+import { parsePrmtop } from '../../mol-io/reader/prmtop/parser';
 
 export { Download };
 export { DownloadBlob };
@@ -30,6 +31,7 @@ export { ParseBlob };
 export { ParseCif };
 export { ParseCube };
 export { ParsePsf };
+export { ParsePrmtop };
 export { ParsePly };
 export { ParseCcp4 };
 export { ParseDsn6 };
@@ -317,6 +319,22 @@ const ParsePsf = PluginStateTransform.BuiltIn({
     }
 });
 
+type ParsePrmtop = typeof ParsePrmtop
+const ParsePrmtop = PluginStateTransform.BuiltIn({
+    name: 'parse-prmtop',
+    display: { name: 'Parse PRMTOP', description: 'Parse PRMTOP from String data' },
+    from: [SO.Data.String],
+    to: SO.Format.Prmtop
+})({
+    apply({ a }) {
+        return Task.create('Parse PRMTOP', async ctx => {
+            const parsed = await parsePrmtop(a.data).runInContext(ctx);
+            if (parsed.isError) throw new Error(parsed.message);
+            return new SO.Format.Prmtop(parsed.result);
+        });
+    }
+});
+
 type ParsePly = typeof ParsePly
 const ParsePly = PluginStateTransform.BuiltIn({
     name: 'parse-ply',