Browse Source

added entity.subtype based on _entity_poly.type and _pdbx_entity_branch.type

Alexander Rose 5 years ago
parent
commit
1ed420aeb7

+ 56 - 4
src/mol-model-formats/structure/mmcif/parser.ts

@@ -12,7 +12,7 @@ import { Tensor, Vec3 } from '../../../mol-math/linear-algebra';
 import { RuntimeContext } from '../../../mol-task';
 import UUID from '../../../mol-util/uuid';
 import { Model } from '../../../mol-model/structure/model/model';
-import { Entities, ChemicalComponent, MissingResidue } from '../../../mol-model/structure/model/properties/common';
+import { Entities, ChemicalComponent, MissingResidue, EntitySubtype } from '../../../mol-model/structure/model/properties/common';
 import { CustomProperties } from '../../../mol-model/structure';
 import { ModelSymmetry } from '../../../mol-model/structure/model/properties/symmetry';
 import { createAssemblies } from './assembly';
@@ -23,12 +23,12 @@ import { getSecondaryStructure } from './secondary-structure';
 import { getSequence } from './sequence';
 import { sortAtomSite } from './sort';
 import { StructConn } from './bonds/struct_conn';
-import { getMoleculeType, MoleculeType, getEntityType } from '../../../mol-model/structure/model/types';
+import { getMoleculeType, MoleculeType, getEntityType, getEntitySubtype } from '../../../mol-model/structure/model/types';
 import { ModelFormat } from '../format';
 import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from '../../../mol-model/structure/structure/carbohydrates/constants';
 import mmCIF_Format = ModelFormat.mmCIF
 import { memoize1 } from '../../../mol-util/memoize';
-import { ElementIndex } from '../../../mol-model/structure/model';
+import { ElementIndex, EntityIndex } from '../../../mol-model/structure/model';
 
 export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) {
     const formatData = getFormatData(format)
@@ -302,6 +302,7 @@ function getEntities(format: mmCIF_Format): Entities {
             if (!entityIds.has(entityId)) {
                 ids.push(entityId)
                 types.push(getEntityType(label_comp_id.value(i)))
+                entityIds.add(entityId)
             }
         }
 
@@ -311,6 +312,7 @@ function getEntities(format: mmCIF_Format): Entities {
             if (!entityIds.has(entityId)) {
                 ids.push(entityId)
                 types.push('polymer')
+                entityIds.add(entityId)
             }
         }
 
@@ -320,6 +322,7 @@ function getEntities(format: mmCIF_Format): Entities {
             if (!entityIds.has(entityId)) {
                 ids.push(entityId)
                 types.push('polymer')
+                entityIds.add(entityId)
             }
         }
 
@@ -331,7 +334,56 @@ function getEntities(format: mmCIF_Format): Entities {
     } else {
         entityData = format.data.entity;
     }
-    return { data: entityData, getEntityIndex: Column.createIndexer(entityData.id) };
+
+    const getEntityIndex = Column.createIndexer<string, EntityIndex>(entityData.id)
+
+    //
+
+    const subtypes: EntitySubtype[] = new Array(entityData._rowCount)
+    subtypes.fill('other')
+
+    const entityIds = new Set<string>()
+    let assignSubtype = false
+
+    if (format.data.entity_poly.entity_id.isDefined) {
+        const { entity_id, type, _rowCount } = format.data.entity_poly
+        for (let i = 0; i < _rowCount; ++i) {
+            const entityId = entity_id.value(i)
+            subtypes[getEntityIndex(entityId)] = type.value(i)
+            entityIds.add(entityId)
+        }
+    } else {
+        assignSubtype = true
+    }
+
+    if (format.data.pdbx_entity_branch.entity_id.isDefined) {
+        const { entity_id, type, _rowCount } = format.data.pdbx_entity_branch
+        for (let i = 0; i < _rowCount; ++i) {
+            const entityId = entity_id.value(i)
+            subtypes[getEntityIndex(entityId)] = type.value(i)
+            entityIds.add(entityId)
+        }
+    } else {
+        assignSubtype = true
+    }
+
+    if (assignSubtype) {
+        const { label_entity_id, label_comp_id } = format.data.atom_site;
+        for (let i = 0 as ElementIndex, il = format.data.atom_site._rowCount; i < il; i++) {
+            const entityId = label_entity_id.value(i);
+            if (!entityIds.has(entityId)) {
+                subtypes[getEntityIndex(entityId)] = getEntitySubtype(label_comp_id.value(i))
+                entityIds.add(entityId)
+            }
+        }
+        // TODO how to handle coarse?
+    }
+
+    const subtypeColumn = Column.ofArray({ array: subtypes, schema: EntitySubtype })
+
+    //
+
+    return { data: entityData, subtype: subtypeColumn, getEntityIndex };
 }
 
 async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {

+ 10 - 3
src/mol-model/structure/model/properties/common.ts

@@ -6,11 +6,18 @@
  */
 
 import { mmCIF_Database, mmCIF_Schema } from '../../../../mol-io/reader/cif/schema/mmcif'
-import { Table } from '../../../../mol-data/db';
+import { Table, Column } from '../../../../mol-data/db';
 import { EntityIndex } from '../indexing';
 
+export type EntitySubtype = (
+    mmCIF_Schema['entity_poly']['type']['T'] |
+    mmCIF_Schema['pdbx_entity_branch']['type']['T']
+)
+export const EntitySubtype = Column.Schema.Aliased<EntitySubtype>(Column.Schema.Str(''))
+
 export interface Entities {
     data: mmCIF_Database['entity'],
+    subtype: Column<EntitySubtype>,
     getEntityIndex(id: string): EntityIndex
 }
 
@@ -19,8 +26,8 @@ export type ChemicalComponentMap = ReadonlyMap<string, ChemicalComponent>
 
 export type MissingResidue = Table.Row<Pick<
     mmCIF_Schema['pdbx_unobs_or_zero_occ_residues'],
-    'polymer_flag' | 'occupancy_flag'>
->
+    'polymer_flag' | 'occupancy_flag'
+>>
 export interface MissingResidues {
     has(model_num: number, asym_id: string, seq_id: number): boolean
     get(model_num: number, asym_id: string, seq_id: number): MissingResidue | undefined

+ 24 - 2
src/mol-model/structure/model/types.ts

@@ -9,6 +9,7 @@ import BitFlags from '../../../mol-util/bit-flags'
 import { SaccharideCompIdMap } from '../structure/carbohydrates/constants';
 import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
 import { SetUtils } from '../../../mol-util/set';
+import { EntitySubtype } from './properties/common';
 
 const _esCache = (function () {
     const cache = Object.create(null);
@@ -148,10 +149,11 @@ export const WaterNames = new Set([
     'SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC'
 ])
 
-export const AminoAcidNames = new Set([
+export const AminoAcidNamesL = new Set([
     'HIS', 'ARG', 'LYS', 'ILE', 'PHE', 'LEU', 'TRP', 'ALA', 'MET', 'PRO', 'CYS',
     'ASN', 'VAL', 'GLY', 'SER', 'GLN', 'TYR', 'ASP', 'GLU', 'THR', 'SEC', 'PYL',
-
+])
+export const AminoAcidNamesD = new Set([
     'DAL', // D-ALANINE
     'DAR', // D-ARGININE
     'DSG', // D-ASPARAGINE
@@ -174,6 +176,7 @@ export const AminoAcidNames = new Set([
     'DNE' // D-NORLEUCINE
     // ???  // D-SELENOCYSTEINE
 ])
+export const AminoAcidNames = SetUtils.unionMany(AminoAcidNamesL, AminoAcidNamesD)
 
 export const RnaBaseNames = new Set([ 'A', 'C', 'T', 'G', 'I', 'U' ])
 export const DnaBaseNames = new Set([ 'DA', 'DC', 'DT', 'DG', 'DI', 'DU' ])
@@ -240,6 +243,25 @@ export function getEntityType(compId: string): mmCIF_Schema['entity']['type']['T
     }
 }
 
+export function getEntitySubtype(compId: string): EntitySubtype {
+    compId = compId.toUpperCase()
+    if (SaccharideCompIdMap.has(compId)) {
+        return 'oligosaccharide'
+    } else if (PeptideBaseNames.has(compId)) {
+        return 'peptide nucleic acid'
+    } else if (AminoAcidNamesL.has(compId)) {
+        return 'polypeptide(L)'
+    } else if (AminoAcidNamesD.has(compId)) {
+        return 'polypeptide(D)'
+    } else if (RnaBaseNames.has(compId)) {
+        return 'polyribonucleotide'
+    } else if (DnaBaseNames.has(compId)) {
+        return 'polydeoxyribonucleotide'
+    } else {
+        return 'other'
+    }
+}
+
 export function isPolymer(moleculeType: MoleculeType) {
     return isNucleic(moleculeType) || isProtein(moleculeType)
 }

+ 1 - 0
src/mol-model/structure/structure/properties.ts

@@ -122,6 +122,7 @@ const entity = {
 
     id: p(l => l.unit.model.entities.data.id.value(eK(l))),
     type: p(l => l.unit.model.entities.data.type.value(eK(l))),
+    subtype: p(l => l.unit.model.entities.subtype.value(eK(l))),
     src_method: p(l => l.unit.model.entities.data.src_method.value(eK(l))),
     pdbx_description: p(l => l.unit.model.entities.data.pdbx_description.value(eK(l))),
     formula_weight: p(l => l.unit.model.entities.data.formula_weight.value(eK(l))),

+ 6 - 3
src/mol-script/language/symbol-table/structure-query.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 Mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import Type from '../type'
@@ -20,7 +21,8 @@ export namespace Types {
     export const SecondaryStructureFlags = Core.Types.Flags(SecondaryStructureFlag, 'SecondaryStructureFlag');
 
     export const RingFingerprint = Type.Value('Structure', 'RingFingerprint');
-    export const EntityType = Type.OneOf('Structure', 'EntityType', Type.Str, ['polymer', 'non-polymer', 'water', 'branched', 'unknown']);
+    export const EntityType = Type.OneOf('Structure', 'EntityType', Type.Str, ['polymer', 'non-polymer', 'water', 'branched']);
+    export const EntitySubtype = Type.OneOf('Structure', 'EntitySubtype', Type.Str, ['other', 'polypeptide(D)', 'polypeptide(L)', 'polydeoxyribonucleotide', 'polyribonucleotide', 'polydeoxyribonucleotide/polyribonucleotide hybrid', 'cyclic-pseudo-peptide', 'peptide nucleic acid', 'oligosaccharide']);
     export const ObjectPrimitive = Type.OneOf('Structure', 'ObjectPrimitive', Type.Str, ['atomistic', 'sphere', 'gaussian', 'other']);
     export const ResidueId = Type.Value('Structure', 'ResidueId');
 
@@ -295,7 +297,8 @@ const atomProperty = {
         occupancy: atomProp(Type.Num),
         B_iso_or_equiv: atomProp(Type.Num),
 
-        entityType: atomProp(Types.EntityType, 'Type of the entity as defined in mmCIF (polymer, non-polymer, branched, water, unknown)'),
+        entityType: atomProp(Types.EntityType, 'Type of the entity as defined in mmCIF (polymer, non-polymer, branched, water)'),
+        entitySubtype: atomProp(Types.EntitySubtype, 'Subtype of the entity as defined in mmCIF _entity_poly.type and _pdbx_entity_branch.type (other, polypeptide(D), polypeptide(L), polydeoxyribonucleotide, polyribonucleotide, polydeoxyribonucleotide/polyribonucleotide hybrid, cyclic-pseudo-peptide, peptide nucleic acid, oligosaccharide)'),
         objectPrimitive: atomProp(Types.ObjectPrimitive, 'Type of the primitive object used to model this segment as defined in mmCIF/IHM (atomistic, sphere, gaussian, other)'),
 
         secondaryStructureKey: atomProp(Type.AnyValue, 'Unique value for each secondary structure element.'),

+ 3 - 1
src/mol-script/runtime/query/table.ts

@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2018-2019 Mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author David Sehnal <david.sehnal@gmail.com>
+ * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
 import { MolScriptSymbolTable as MolScript } from '../../language/symbol-table';
@@ -307,6 +308,7 @@ const symbols = [
     D(MolScript.structureQuery.atomProperty.macromolecular.B_iso_or_equiv, atomProp(StructureProperties.atom.B_iso_or_equiv)),
 
     D(MolScript.structureQuery.atomProperty.macromolecular.entityType, atomProp(StructureProperties.entity.type)),
+    D(MolScript.structureQuery.atomProperty.macromolecular.entitySubtype, atomProp(StructureProperties.entity.subtype)),
     D(MolScript.structureQuery.atomProperty.macromolecular.objectPrimitive, atomProp(StructureProperties.unit.object_primitive)),
 
     D(MolScript.structureQuery.atomProperty.macromolecular.isModified, atomProp(StructureProperties.residue.isModified)),