Bladeren bron

parse PDB assembly, various tweaks

David Sehnal 6 jaren geleden
bovenliggende
commit
09d8be90ff

+ 1 - 1
src/apps/basic-wrapper/index.ts

@@ -55,7 +55,7 @@ class BasicWrapper {
         const state = this.stateTemplate.build();
 
         state.to('url').update(StateTransforms.Data.Download, p => ({ ...p, url }));
-        state.to('asm').update(StateTransforms.Model.StructureAssemblyFromModel, p => ({ ...p, id: assemblyId }));
+        state.to('asm').update(StateTransforms.Model.StructureAssemblyFromModel, p => ({ ...p, id: assemblyId || 'deposited' }));
 
         await PluginCommands.State.Update.dispatch(this.plugin, { state: this.plugin.state.dataState, tree: state });
 

+ 2 - 2
src/mol-io/reader/cif/data-model.ts

@@ -61,10 +61,10 @@ export namespace CifCategory {
     export type SomeFields<S> = { [P in keyof S]?: CifField }
     export type Fields<S> = { [P in keyof S]: CifField }
 
-    export function ofFields(name: string, fields: { [name: string]: CifField }): CifCategory {
+    export function ofFields(name: string, fields: { [name: string]: CifField | undefined }): CifCategory {
         const fieldNames = Object.keys(fields);
         return {
-            rowCount: fieldNames.length > 0 ? fields[fieldNames[0]].rowCount : 0,
+            rowCount: fieldNames.length > 0 ? fields[fieldNames[0]]!.rowCount : 0,
             name,
             fieldNames,
             getField(name) { return fields[name]; }

+ 4 - 0
src/mol-math/linear-algebra/3d/mat4.ts

@@ -119,6 +119,10 @@ namespace Mat4 {
         a[4 * j + i] = value;
     }
 
+    export function getValue(a: Mat4, i: number, j: number) {
+        return a[4 * j + i];
+    }
+
     export function toArray(a: Mat4, out: NumberArray, offset: number) {
         out[offset + 0] = a[0];
         out[offset + 1] = a[1];

+ 128 - 0
src/mol-model-formats/structure/pdb/assembly.ts

@@ -6,6 +6,8 @@
 
 import { CifCategory, CifField } from 'mol-io/reader/cif';
 import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
+import { Mat4 } from 'mol-math/linear-algebra';
+import { Tokens } from 'mol-io/reader/common/text/tokenizer';
 
 export function parseCryst1(id: string, record: string): CifCategory[] {
     // COLUMNS       DATA TYPE      CONTENTS
@@ -41,4 +43,130 @@ export function parseCryst1(id: string, record: string): CifCategory[] {
         space_group_name_Hall: CifField.ofString('?')
     }
     return [CifCategory.ofFields('cell', cell), CifCategory.ofFields('symmetry', symmetry)];
+}
+
+interface PdbAssembly {
+    id: string,
+    details: string,
+    groups: { chains: string[], operators: { id: number, matrix: Mat4 }[] }[]
+}
+
+function PdbAssembly(id: string, details: string): PdbAssembly {
+    return { id, details, groups: [] };
+}
+
+export function parseRemark350(lines: Tokens, lineStart: number, lineEnd: number): CifCategory[] {
+    const assemblies: PdbAssembly[] = [];
+
+    // Read the assemblies
+    let current: PdbAssembly, group: PdbAssembly['groups'][0], matrix: Mat4, operId = 1;
+    const getLine = (n: number) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]);
+    for (let i = lineStart; i < lineEnd; i++) {
+        let line = getLine(i);
+        if (line.substr(11, 12) === 'BIOMOLECULE:') {
+            const id = line.substr(23).trim();
+            line = getLine(++i);
+            const details = line.substr(11).trim();
+            current = PdbAssembly(id, details);
+            assemblies.push(current);
+        } else if (line.substr(13, 5) === 'BIOMT') {
+            const biomt = line.split(/\s+/)
+            const row = parseInt(line[18]) - 1
+
+            if (row === 0) {
+                matrix = Mat4.identity();
+                group!.operators.push({ id: operId++, matrix });
+            }
+
+            Mat4.setValue(matrix!, row, 0, parseFloat(biomt[4]));
+            Mat4.setValue(matrix!, row, 1, parseFloat(biomt[5]));
+            Mat4.setValue(matrix!, row, 2, parseFloat(biomt[6]));
+            Mat4.setValue(matrix!, row, 3, parseFloat(biomt[7]));
+        } else if (
+            line.substr(11, 30) === 'APPLY THE FOLLOWING TO CHAINS:' ||
+            line.substr(11, 30) === '                   AND CHAINS:') {
+
+            if (line.substr(11, 5) === 'APPLY') {
+                group = { chains: [], operators: [] };
+                current!.groups.push(group);
+            }
+
+            const chainList = line.substr(41, 30).split(',');
+            for (let j = 0, jl = chainList.length; j < jl; ++j) {
+                const c = chainList[j].trim();
+                if (c) group!.chains.push(c);
+            }
+        }
+    }
+
+    if (assemblies.length === 0) return [];
+
+    // Generate CIF
+
+    // pdbx_struct_assembly
+    const pdbx_struct_assembly: CifCategory.SomeFields<mmCIF_Schema['pdbx_struct_assembly']> = {
+        id: CifField.ofStrings(assemblies.map(a => a.id)),
+        details: CifField.ofStrings(assemblies.map(a => a.details))
+    };
+
+
+    // pdbx_struct_assembly_gen
+    const pdbx_struct_assembly_gen_rows: { [P in keyof CifCategory.Fields<mmCIF_Schema['pdbx_struct_assembly_gen']>]: string }[] = [];
+    for (const asm of assemblies) {
+        for (const group of asm.groups) {
+            pdbx_struct_assembly_gen_rows.push({
+                assembly_id: asm.id,
+                oper_expression: group.operators.map(o => o.id).join(','),
+                asym_id_list: group.chains.join(',')
+            });
+        }
+    }
+    const pdbx_struct_assembly_gen: CifCategory.Fields<mmCIF_Schema['pdbx_struct_assembly_gen']> = {
+        assembly_id: CifField.ofStrings(pdbx_struct_assembly_gen_rows.map(r => r.assembly_id)),
+        oper_expression: CifField.ofStrings(pdbx_struct_assembly_gen_rows.map(r => r.oper_expression)),
+        asym_id_list: CifField.ofStrings(pdbx_struct_assembly_gen_rows.map(r => r.asym_id_list))
+    };
+
+    // pdbx_struct_oper_list
+    const pdbx_struct_oper_list_rows: { [P in keyof CifCategory.Fields<mmCIF_Schema['pdbx_struct_oper_list']>]?: string }[] = [];
+    for (const asm of assemblies) {
+        for (const group of asm.groups) {
+            for (const oper of group.operators) {
+                const row = {
+                    id: '' + oper.id,
+                    type: '?',
+                    name: '?',
+                    symmetry_operation: '?'
+                } as any;
+                for (let i = 0; i < 3; i++) {
+                    for (let j = 0; j < 3; j++) {
+                        row[`matrix[${i + 1}][${j + 1}]`] = '' + Mat4.getValue(oper.matrix, i, j);
+                    }
+                    row[`vector[${i + 1}]`] = '' + Mat4.getValue(oper.matrix, i, 3);
+                }
+                pdbx_struct_oper_list_rows.push(row);
+            }
+        }
+    }
+
+    const pdbx_struct_oper_list: CifCategory.SomeFields<mmCIF_Schema['pdbx_struct_oper_list']> = {
+        id: CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => r.id!)),
+        type: CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => r.type!)),
+        name: CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => r.name!)),
+        symmetry_operation: CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => r.symmetry_operation!))
+    };
+    for (let i = 0; i < 3; i++) {
+        for (let j = 0; j < 3; j++) {
+            const k = `matrix[${i + 1}][${j + 1}]`;
+            (pdbx_struct_oper_list as any)[k] = CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => (r as any)[k]!));
+        }
+        const k = `vector[${i + 1}]`;
+        (pdbx_struct_oper_list as any)[k] = CifField.ofStrings(pdbx_struct_oper_list_rows.map(r => (r as any)[k]!));
+    }
+
+    return [
+        CifCategory.ofFields('pdbx_struct_assembly', pdbx_struct_assembly),
+        CifCategory.ofFields('pdbx_struct_assembly_gen', pdbx_struct_assembly_gen),
+        CifCategory.ofFields('pdbx_struct_oper_list', pdbx_struct_oper_list)
+    ];
 }

+ 29 - 15
src/mol-model-formats/structure/pdb/to-cif.ts

@@ -9,17 +9,7 @@ import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif';
 import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
 import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
 import { PdbFile } from 'mol-io/reader/pdb/schema';
-
-function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
-    return {
-        name,
-        fieldNames: Object.keys(fields),
-        rowCount,
-        getField(f: string) {
-            return fields[f];
-        }
-    }
-}
+import { parseCryst1, parseRemark350 } from './assembly';
 
 function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
     return {
@@ -88,7 +78,7 @@ function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_s
     };
 }
 
-const WaterNames = new Set([ 'SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC' ]);
+const WaterNames = new Set(['SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC']);
 
 function getEntityId(residueName: string, isHet: boolean) {
     if (isHet) {
@@ -205,16 +195,23 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
 
     const atom_site = atom_site_template(data, atomCount);
 
+    const helperCategories: CifCategory[] = [];
+
     let modelNum = 0, modelStr = '';
 
     for (let i = 0, _i = lines.count; i < _i; i++) {
-        const s = indices[2 * i], e = indices[2 * i + 1];
+        let s = indices[2 * i], e = indices[2 * i + 1];
         switch (data[s]) {
             case 'A':
                 if (!substringStartsWith(data, s, e, 'ATOM  ')) continue;
                 if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
                 addAtom(atom_site, modelStr, tokenizer, s, e, false);
                 break;
+            case 'C':
+                if (substringStartsWith(data, s, e, 'CRYST1')) {
+                    helperCategories.push(...parseCryst1(pdb.id || '?', data.substring(s, e)));
+                }
+                break;
             case 'H':
                 if (!substringStartsWith(data, s, e, 'HETATM')) continue;
                 if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
@@ -226,13 +223,30 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
                     modelStr = '' + modelNum;
                 }
                 break;
+            case 'R': {
+                if (substringStartsWith(data, s, e, 'REMARK 350')) {
+                    let j = i + 1;
+                    while (true) {
+                        s = indices[2 * j]; e = indices[2 * j + 1];
+                        if (!substringStartsWith(data, s, e, 'REMARK 350')) break;
+                        j++;
+                    }
+                    helperCategories.push(...parseRemark350(lines, i, j));
+                    i = j - 1;
+                }
+                break;
+            }
 
         }
     }
 
     const categories = {
-        entity: toCategory('entity', _entity(), 3),
-        atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
+        entity: CifCategory.ofFields('entity', _entity()),
+        atom_site: CifCategory.ofFields('atom_site', _atom_site(atom_site))
+    } as any;
+
+    for (const c of helperCategories) {
+        categories[c.name] = c;
     }
 
     return {

+ 3 - 3
src/mol-plugin/state/transforms/model.ts

@@ -129,11 +129,11 @@ const StructureAssemblyFromModel = PluginStateTransform.BuiltIn({
     to: SO.Molecule.Structure,
     params(a) {
         if (!a) {
-            return { id: PD.makeOptional(PD.Text('', { label: 'Assembly Id', description: 'Assembly Id. If none specified (undefined or empty string), the asymmetric unit is used.' })) };
+            return { id: PD.makeOptional(PD.Text('', { label: 'Assembly Id', description: 'Assembly Id. Value \'deposited\' can be used to specify deposited asymmetric unit.' })) };
         }
         const model = a.data;
         const ids = model.symmetry.assemblies.map(a => [a.id, `${a.id}: ${stringToWords(a.details)}`] as [string, string]);
-        if (!ids.length) ids.push(['deposited', 'Deposited'])
+        ids.push(['deposited', 'Deposited']);
         return { id: PD.makeOptional(PD.Select(ids[0][0], ids, { label: 'Asm Id', description: 'Assembly Id' })) };
     }
 })({
@@ -146,7 +146,7 @@ const StructureAssemblyFromModel = PluginStateTransform.BuiltIn({
 
             const base = Structure.ofModel(model);
             if (!asm) {
-                plugin.log.warn(`Model '${a.label}' has no assembly, returning deposited structure.`);
+                if (!!id && id !== 'deposited') plugin.log.warn(`Model '${a.label}' has no assembly, returning deposited structure.`);
                 const label = { label: a.data.label, description: structureDesc(base) };
                 return new SO.Molecule.Structure(base, label);
             }

+ 3 - 2
src/mol-plugin/ui/controls/parameters.tsx

@@ -248,8 +248,9 @@ export class SelectControl extends SimpleParam<PD.Select<string | number>> {
         }
     }
     renderControl() {
-        return <select value={this.props.value || ''} onChange={this.onChange} disabled={this.props.isDisabled}>
-            {!this.props.param.options.some(e => e[0] === this.props.value) && <option key={this.props.value} value={this.props.value}>{`[Invalid] ${this.props.value}`}</option>}
+        const isInvalid = this.props.value !== void 0 && !this.props.param.options.some(e => e[0] === this.props.value);
+        return <select value={this.props.value || this.props.param.defaultValue} onChange={this.onChange} disabled={this.props.isDisabled}>
+            {isInvalid && <option key={this.props.value} value={this.props.value}>{`[Invalid] ${this.props.value}`}</option>}
             {this.props.param.options.map(([value, label]) => <option key={value} value={value}>{label}</option>)}
         </select>;
     }