Ver Fonte

Merge branch 'master' into usdz-export

Sukolsak Sakshuwong há 3 anos atrás
pai
commit
6550e53414

+ 5 - 0
CHANGELOG.md

@@ -7,6 +7,11 @@ Note that since we don't clearly distinguish between a public and private interf
 ## [Unreleased]
 
 - Add `tubularHelices` parameter to Cartoon representation
+- Add `SdfFormat` and update SDF parser to be able to parse data headers according to spec (hopefully :)) #230
+- Fix mononucleotides detected as polymer components (#229)
+- Set default outline scale back to 1
+- Improved DCD reader cell angle handling (intepret near 0 angles as 90 deg)
+- Handle more residue/atom names commonly used in force-fields
 - Add USDZ support to ``geo-export`` extension.
 
 ## [v2.1.0] - 2021-07-05

+ 10 - 7
src/mol-io/reader/_spec/sdf.spec.ts

@@ -22,8 +22,8 @@ M  END
 > <DATABASE_NAME>
 drugbank
 
-> <SMILES>
-[O-]P([O-])([O-])=O
+> 5225 <TEST_FIELD>
+whatever
 
 > <INCHI_IDENTIFIER>
 InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-3
@@ -362,22 +362,25 @@ describe('sdf reader', () => {
         expect(bonds.atomIdxB.value(3)).toBe(5);
         expect(bonds.order.value(3)).toBe(1);
 
-        expect(dataItems.dataHeader.value(0)).toBe('DATABASE_ID');
+        expect(dataItems.dataHeader.value(0)).toBe('<DATABASE_ID>');
         expect(dataItems.data.value(0)).toBe('0');
 
-        expect(dataItems.dataHeader.value(1)).toBe('DATABASE_NAME');
+        expect(dataItems.dataHeader.value(1)).toBe('<DATABASE_NAME>');
         expect(dataItems.data.value(1)).toBe('drugbank');
 
-        expect(dataItems.dataHeader.value(31)).toBe('SYNONYMS');
+        expect(dataItems.dataHeader.value(2)).toBe('5225 <TEST_FIELD>');
+        expect(dataItems.data.value(2)).toBe('whatever');
+
+        expect(dataItems.dataHeader.value(31)).toBe('<SYNONYMS>');
         expect(dataItems.data.value(31)).toBe('Orthophosphate; Phosphate');
 
         expect(compound1.dataItems.data.value(0)).toBe('0');
         expect(compound2.dataItems.data.value(0)).toBe('1');
 
-        expect(compound3.dataItems.dataHeader.value(2)).toBe('PUBCHEM_CONFORMER_DIVERSEORDER');
+        expect(compound3.dataItems.dataHeader.value(2)).toBe('<PUBCHEM_CONFORMER_DIVERSEORDER>');
         expect(compound3.dataItems.data.value(2)).toBe('1\n11\n10\n3\n15\n17\n13\n5\n16\n7\n14\n9\n8\n4\n18\n6\n12\n2');
 
-        expect(compound3.dataItems.dataHeader.value(21)).toBe('PUBCHEM_COORDINATE_TYPE');
+        expect(compound3.dataItems.dataHeader.value(21)).toBe('<PUBCHEM_COORDINATE_TYPE>');
         expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10');
     });
 });

+ 14 - 10
src/mol-io/reader/sdf/parser.ts

@@ -13,16 +13,20 @@ import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
 import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
 
 /** http://c4.cabrillo.edu/404/ctfile.pdf - page 41 */
+
+export interface SdfFileCompound {
+    readonly molFile: MolFile,
+    readonly dataItems: {
+        readonly dataHeader: Column<string>,
+        readonly data: Column<string>
+    }
+}
+
 export interface SdfFile {
-    readonly compounds: {
-        readonly molFile: MolFile,
-        readonly dataItems: {
-            readonly dataHeader: Column<string>,
-            readonly data: Column<string>
-        }
-    }[]
+    readonly compounds: SdfFileCompound[]
 }
 
+
 const delimiter = '$$$$';
 function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
     const dataHeader = TokenBuilder.create(tokenizer.data, 32);
@@ -33,8 +37,8 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da
         if (line.startsWith(delimiter)) break;
         if (!line) continue;
 
-        if (line.startsWith('> <')) {
-            TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1);
+        if (line.startsWith('> ')) {
+            TokenBuilder.add(dataHeader, tokenizer.tokenStart + 2, tokenizer.tokenEnd);
 
             Tokenizer.markLine(tokenizer);
             const start = tokenizer.tokenStart;
@@ -42,7 +46,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da
             let added = false;
             while (tokenizer.position < tokenizer.length) {
                 const line2 = Tokenizer.readLine(tokenizer);
-                if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> <')) {
+                if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> ')) {
                     TokenBuilder.add(data, start, end);
                     added = true;
                     break;

+ 17 - 10
src/mol-model-formats/structure/common/component.ts

@@ -13,21 +13,26 @@ import { mmCIF_chemComp_schema } from '../../../mol-io/reader/cif/schema/mmcif-e
 type Component = Table.Row<Pick<mmCIF_chemComp_schema, 'id' | 'name' | 'type'>>
 
 const ProteinAtomIdsList = [
-    new Set([ 'CA' ]),
-    new Set([ 'C' ]),
-    new Set([ 'N' ])
+    new Set(['CA']),
+    new Set(['C']),
+    new Set(['N'])
 ];
 const RnaAtomIdsList = [
-    new Set([ 'P', 'O3\'', 'O3*' ]),
-    new Set([ 'C4\'', 'C4*' ]),
-    new Set([ 'O2\'', 'O2*', 'F2\'', 'F2*' ])
+    new Set(['P', 'O3\'', 'O3*']),
+    new Set(['C4\'', 'C4*']),
+    new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
 ];
 const DnaAtomIdsList = [
-    new Set([ 'P', 'O3\'', 'O3*' ]),
-    new Set([ 'C3\'', 'C3*' ]),
-    new Set([ 'O2\'', 'O2*', 'F2\'', 'F2*' ])
+    new Set(['P', 'O3\'', 'O3*']),
+    new Set(['C3\'', 'C3*']),
+    new Set(['O2\'', 'O2*', 'F2\'', 'F2*'])
 ];
 
+/** Used to reduce false positives for atom name-based type guessing */
+const NonPolymerNames = new Set([
+    'FMN', 'NCN', 'FNS', 'FMA' // Mononucleotides
+]);
+
 const StandardComponents = (function() {
     const map = new Map<string, Component>();
     const components: Component[] = [
@@ -151,9 +156,11 @@ export class ComponentBuilder {
                 this.set(StandardComponents.get(compId)!);
             } else if (WaterNames.has(compId)) {
                 this.set({ id: compId, name: 'WATER', type: 'non-polymer' });
+            } else if (NonPolymerNames.has(compId)) {
+                this.set({ id: compId, name: this.namesMap.get(compId) || compId, type: 'non-polymer' });
             } else {
                 const atomIds = this.getAtomIds(index);
-                if (CharmmIonComponents.has(compId) && atomIds.size === 1) {
+                if (atomIds.size === 1 && CharmmIonComponents.has(compId)) {
                     this.set(CharmmIonComponents.get(compId)!);
                 } else {
                     const type = this.getType(atomIds);

+ 3 - 3
src/mol-model-formats/structure/mol.ts

@@ -17,7 +17,7 @@ import { ModelFormat } from '../format';
 import { IndexPairBonds } from './property/bonds/index-pair';
 import { Trajectory } from '../../mol-model/structure';
 
-async function getModels(mol: MolFile, ctx: RuntimeContext) {
+export async function getMolModels(mol: MolFile, format: ModelFormat<any> | undefined, ctx: RuntimeContext) {
     const { atoms, bonds } = mol;
 
     const MOL = Column.ofConst('MOL', mol.atoms.count, Column.Schema.str);
@@ -61,7 +61,7 @@ async function getModels(mol: MolFile, ctx: RuntimeContext) {
         atom_site
     });
 
-    const models = await createModels(basics, MolFormat.create(mol), ctx);
+    const models = await createModels(basics, format ?? MolFormat.create(mol), ctx);
 
     if (models.frameCount > 0) {
         const indexA = Column.ofIntArray(Column.mapToArray(bonds.atomIdxA, x => x - 1, Int32Array));
@@ -91,5 +91,5 @@ namespace MolFormat {
 }
 
 export function trajectoryFromMol(mol: MolFile): Task<Trajectory> {
-    return Task.create('Parse MOL', ctx => getModels(mol, ctx));
+    return Task.create('Parse MOL', ctx => getMolModels(mol, void 0, ctx));
 }

+ 29 - 0
src/mol-model-formats/structure/sdf.ts

@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) 2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { SdfFileCompound } from '../../mol-io/reader/sdf/parser';
+import { Trajectory } from '../../mol-model/structure';
+import { Task } from '../../mol-task';
+import { ModelFormat } from '../format';
+import { getMolModels } from './mol';
+
+export { SdfFormat };
+
+type SdfFormat = ModelFormat<SdfFileCompound>
+
+namespace SdfFormat {
+    export function is(x?: ModelFormat): x is SdfFormat {
+        return x?.kind === 'sdf';
+    }
+
+    export function create(mol: SdfFileCompound): SdfFormat {
+        return { kind: 'sdf', name: mol.molFile.title, data: mol };
+    }
+}
+
+export function trajectoryFromSdf(mol: SdfFileCompound): Task<Trajectory> {
+    return Task.create('Parse SDF', ctx => getMolModels(mol.molFile, SdfFormat.create(mol), ctx));
+}

+ 3 - 2
src/mol-plugin-state/transforms/model.ts

@@ -40,6 +40,7 @@ import { coordinatesFromXtc } from '../../mol-model-formats/structure/xtc';
 import { parseXyz } from '../../mol-io/reader/xyz/parser';
 import { trajectoryFromXyz } from '../../mol-model-formats/structure/xyz';
 import { parseSdf } from '../../mol-io/reader/sdf/parser';
+import { trajectoryFromSdf } from '../../mol-model-formats/structure/sdf';
 
 export { CoordinatesFromDcd };
 export { CoordinatesFromXtc };
@@ -308,8 +309,8 @@ const TrajectoryFromSDF = PluginStateTransform.BuiltIn({
 
             const models: Model[] = [];
 
-            for (const { molFile } of parsed.result.compounds) {
-                const traj = await trajectoryFromMol(molFile).runInContext(ctx);
+            for (const compound of parsed.result.compounds) {
+                const traj = await trajectoryFromSdf(compound).runInContext(ctx);
                 for (let i = 0; i < traj.frameCount; i++) {
                     models.push(await Task.resolveInContext(traj.getFrameAtIndex(i), ctx));
                 }