Browse Source

update coreCif dictionary

Alexander Rose 3 years ago
parent
commit
3400c8e94a

+ 16 - 23
data/cif-field-names/cif-core-field-names.csv

@@ -2,11 +2,11 @@ audit.block_doi
 
 database_code.depnum_ccdc_archive
 database_code.depnum_ccdc_fiz
-database_code.ICSD
-database_code.MDF
-database_code.NBS
-database_code.CSD
-database_code.COD
+database_code.icsd
+database_code.mdf
+database_code.nbs
+database_code.csd
+database_code.cod
 
 chemical.name_systematic
 chemical.name_common
@@ -24,8 +24,8 @@ atom_type_scat.dispersion_imag
 atom_type_scat.source
 
 space_group.crystal_system
-space_group.name_H-M_full
-space_group.IT_number
+space_group.name_h-m_full
+space_group.it_number
 space_group_symop.operation_xyz
 
 cell.length_a
@@ -35,14 +35,14 @@ cell.angle_alpha
 cell.angle_beta
 cell.angle_gamma
 cell.volume
-cell.formula_units_Z
+cell.formula_units_z
 
 atom_site.label
 atom_site.type_symbol
 atom_site.fract_x
 atom_site.fract_y
 atom_site.fract_z
-atom_site.U_iso_or_equiv
+atom_site.u_iso_or_equiv
 atom_site.adp_type
 atom_site.occupancy
 atom_site.calc_flag
@@ -52,20 +52,13 @@ atom_site.disorder_group
 atom_site.site_symmetry_multiplicity
 
 atom_site_aniso.label
-atom_site_aniso.U
-atom_site_aniso.U_11
-atom_site_aniso.U_22
-atom_site_aniso.U_33
-atom_site_aniso.U_23
-atom_site_aniso.U_13
-atom_site_aniso.U_12
-atom_site_aniso.U_su
-atom_site_aniso.U_11_su
-atom_site_aniso.U_22_su
-atom_site_aniso.U_33_su
-atom_site_aniso.U_23_su
-atom_site_aniso.U_13_su
-atom_site_aniso.U_12_su
+atom_site_aniso.u
+atom_site_aniso.u_11
+atom_site_aniso.u_22
+atom_site_aniso.u_33
+atom_site_aniso.u_23
+atom_site_aniso.u_13
+atom_site_aniso.u_12
 
 geom_bond.atom_site_label_1
 geom_bond.atom_site_label_2

+ 16 - 19
src/cli/cifschema/util/cif-dic.ts

@@ -81,7 +81,7 @@ export function getFieldType(type: string, description: string, values?: string[
         case 'List(Real,Real)':
         case 'List(Real,Real,Real,Real)':
         case 'Date':
-        case 'Datetime':
+        case 'DateTime':
         case 'Tag':
         case 'Implied':
             return wrapContainer('str', ',', description, container);
@@ -234,29 +234,26 @@ const FORCE_INT_FIELDS = [
     '_struct_sheet_range.end_auth_seq_id',
 ];
 
+/**
+ * Note that name and mapped name must share a prefix. This is not always the case in
+ * the cifCore dictionary, but for downstream code to work a container field with the
+ * same prefix as the member fields must be given here and in the field names filter
+ * list.
+ */
 const FORCE_MATRIX_FIELDS_MAP: { [k: string]: string } = {
-    'atom_site_aniso.U_11': 'U',
-    'atom_site_aniso.U_22': 'U',
-    'atom_site_aniso.U_33': 'U',
-    'atom_site_aniso.U_23': 'U',
-    'atom_site_aniso.U_13': 'U',
-    'atom_site_aniso.U_12': 'U',
-    'atom_site_aniso.U_11_su': 'U_su',
-    'atom_site_aniso.U_22_su': 'U_su',
-    'atom_site_aniso.U_33_su': 'U_su',
-    'atom_site_aniso.U_23_su': 'U_su',
-    'atom_site_aniso.U_13_su': 'U_su',
-    'atom_site_aniso.U_12_su': 'U_su',
+    'atom_site_aniso.u_11': 'u', // is matrix_u in the the dic
+    'atom_site_aniso.u_22': 'u',
+    'atom_site_aniso.u_33': 'u',
+    'atom_site_aniso.u_23': 'u',
+    'atom_site_aniso.u_13': 'u',
+    'atom_site_aniso.u_12': 'u',
 };
 const FORCE_MATRIX_FIELDS = Object.keys(FORCE_MATRIX_FIELDS_MAP);
 
 const EXTRA_ALIASES: Database['aliases'] = {
-    'atom_site_aniso.U': [
-        'atom_site_anisotrop_U'
-    ],
-    'atom_site_aniso.U_su': [
-        'atom_site_aniso_U_esd',
-        'atom_site_anisotrop_U_esd',
+    'atom_site_aniso.matrix_u': [
+        'atom_site_anisotrop_U',
+        'atom_site_aniso.U'
     ],
 };
 

+ 2 - 2
src/cli/cifschema/util/helper.ts

@@ -10,8 +10,8 @@ export function parseImportGet(s: string): Import[] {
     // [{'save':hi_ang_Fox_coeffs  'file':templ_attr.cif}   {'save':hi_ang_Fox_c0  'file':templ_enum.cif}]
     // [{"file":'templ_enum.cif' "save":'H_M_ref'}]
     return s.trim().substring(2, s.length - 2).split(/}[ \n\t]*{/g).map(s => {
-        const save = s.match(/('save'|"save"):([^ \t\n]+)/);
-        const file = s.match(/('file'|"file"):([^ \t\n]+)/);
+        const save = s.match(/('save'|"save"):([^ \t\n{}]+)/);
+        const file = s.match(/('file'|"file"):([^ \t\n{}]+)/);
         return {
             save: save ? save[0].substr(7).replace(/['"]/g, '') : undefined,
             file: file ? file[0].substr(7).replace(/['"]/g, '') : undefined

+ 68 - 120
src/mol-io/reader/cif/schema/cif-core.ts

@@ -1,7 +1,7 @@
 /**
  * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
- * Code-generated 'CifCore' schema file. Dictionary versions: CifCore 3.0.14.
+ * Code-generated 'CifCore' schema file. Dictionary versions: CifCore 3.1.0.
  *
  * @author molstar/ciftools package
  */
@@ -10,8 +10,8 @@ import { Database, Column } from '../../../../mol-data/db';
 
 import Schema = Column.Schema;
 
-const int = Schema.int;
 const float = Schema.float;
+const int = Schema.int;
 const str = Schema.str;
 const Matrix = Schema.Matrix;
 
@@ -21,16 +21,6 @@ export const CifCore_Schema = {
      * the crystal unit cell and their measurement.
      */
     cell: {
-        /**
-         * The number of the formula units in the unit cell as specified
-         * by _chemical_formula.structural, _chemical_formula.moiety or
-         * _chemical_formula.sum.
-         */
-        formula_units_Z: int,
-        /**
-         * Volume of the crystal unit cell.
-         */
-        volume: float,
         /**
          * The angle between the bounding cell axes.
          */
@@ -43,6 +33,12 @@ export const CifCore_Schema = {
          * The angle between the bounding cell axes.
          */
         angle_gamma: float,
+        /**
+         * The number of the formula units in the unit cell as specified
+         * by _chemical_formula.structural, _chemical_formula.moiety or
+         * _chemical_formula.sum.
+         */
+        formula_units_z: int,
         /**
          * The length of each cell axis.
          */
@@ -55,6 +51,10 @@ export const CifCore_Schema = {
          * The length of each cell axis.
          */
         length_c: float,
+        /**
+         * Volume of the crystal unit cell.
+         */
+        volume: float,
     },
     /**
      * The CATEGORY of data items which describe the composition and
@@ -184,12 +184,12 @@ export const CifCore_Schema = {
         crystal_system: str,
         /**
          * The number as assigned in International Tables for Crystallography
-         * Vol A, specifying the proper affine class (i.e. the orientation
+         * Vol. A, specifying the proper affine class (i.e. the orientation
          * preserving affine class) of space groups (crystallographic space
          * group type) to which the space group belongs. This number defines
          * the space group type but not the coordinate system expressed.
          */
-        IT_number: int,
+        it_number: int,
         /**
          * The full international Hermann-Mauguin space-group symbol as
          * defined in Section 2.2.3 and given as the second item of the
@@ -220,7 +220,7 @@ export const CifCore_Schema = {
          * Space-group symmetry, edited by Th. Hahn, 5th ed.
          * Dordrecht: Kluwer Academic Publishers.
          */
-        'name_H-M_full': str,
+        'name_h-m_full': str,
     },
     /**
      * The CATEGORY of data items used to describe symmetry equivalent sites
@@ -340,8 +340,8 @@ export const CifCore_Schema = {
         /**
          * The digital object identifier (DOI) registered to identify
          * the data set publication represented by the current
-         * datablock. This can be used as a unique identifier for
-         * the datablock so long as the code used is a valid DOI
+         * data block. This can be used as a unique identifier for
+         * the data block so long as the code used is a valid DOI
          * (i.e. begins with a valid publisher prefix assigned by a
          * Registration Agency and a suffix guaranteed to be unique
          * by the publisher) and has had its metadata deposited
@@ -354,8 +354,8 @@ export const CifCore_Schema = {
          * structured extensible way. A DOI is an implementation
          * of the Internet concepts of Uniform Resource Name and
          * Universal Resource Locator managed according to the
-         * specifications of the International DOI Foundation (see
-         * http://www.doi.org).
+         * specifications of the International DOI Foundation
+         * (see http://www.doi.org).
          */
         block_doi: str,
     },
@@ -366,13 +366,13 @@ export const CifCore_Schema = {
      */
     database_code: {
         /**
-         * Code assigned by Crystallography Open Database (COD).
+         * Code assigned by the Crystallography Open Database (COD).
          */
-        COD: str,
+        cod: str,
         /**
          * Code assigned by the Cambridge Structural Database.
          */
-        CSD: str,
+        csd: str,
         /**
          * Deposition numbers assigned by the Cambridge Crystallographic
          * Data Centre (CCDC) to files containing structural information
@@ -388,15 +388,15 @@ export const CifCore_Schema = {
         /**
          * Code assigned by the Inorganic Crystal Structure Database.
          */
-        ICSD: str,
+        icsd: str,
         /**
          * Code assigned in the Metals Data File.
          */
-        MDF: str,
+        mdf: str,
         /**
          * Code assigned by the NBS (NIST) Crystal Data Database.
          */
-        NBS: str,
+        nbs: str,
     },
     /**
      * The CATEGORY of data items used to describe atom site information
@@ -511,7 +511,7 @@ export const CifCore_Schema = {
          * a* = the reciprocal-space cell lengths
          * Ref: Fischer, R. X. & Tillmanns, E. (1988). Acta Cryst. C44, 775-776.
          */
-        U_iso_or_equiv: float,
+        u_iso_or_equiv: float,
     },
     /**
      * The CATEGORY of data items used to describe the anisotropic
@@ -537,7 +537,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_11: float,
+        u_11: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -550,21 +550,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U: Matrix(3, 3),
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_11_su: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_su: Matrix(3, 3),
+        u: Matrix(3, 3),
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -577,14 +563,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_12: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_12_su: float,
+        u_12: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -597,14 +576,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_13: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_13_su: float,
+        u_13: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -617,14 +589,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_22: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_22_su: float,
+        u_22: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -637,14 +602,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_23: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_23_su: float,
+        u_23: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -657,14 +615,7 @@ export const CifCore_Schema = {
          *
          * The unique elements of the real symmetric matrix are entered by row.
          */
-        U_33: float,
-        /**
-         * These are the standard uncertainty values (SU) for the standard
-         * form of the Uij anisotropic atomic displacement components (see
-         * _aniso_UIJ. Because these values are TYPE measurand, the su values
-         * may in practice be auto generated as part of the Uij calculation.
-         */
-        U_33_su: float,
+        u_33: float,
     },
     /**
      * The CATEGORY of data items used to describe atomic type information
@@ -710,17 +661,14 @@ export const CifCore_Schema = {
 };
 
 export const CifCore_Aliases = {
-    'atom_site_aniso.U': [
-        'atom_site_anisotrop_U',
-    ],
-    'atom_site_aniso.U_su': [
-        'atom_site_aniso_U_esd',
-        'atom_site_anisotrop_U_esd',
+    'cell.formula_units_z': [
+        'cell_formula_units_Z',
     ],
-    'space_group.IT_number': [
+    'space_group.it_number': [
+        'space_group_IT_number',
         'symmetry_Int_Tables_number',
     ],
-    'space_group.name_H-M_full': [
+    'space_group.name_h-m_full': [
         'symmetry_space_group_name_H-M',
     ],
     'space_group_symop.operation_xyz': [
@@ -735,6 +683,21 @@ export const CifCore_Aliases = {
     'geom_bond.distance': [
         'geom_bond_dist',
     ],
+    'database_code.cod': [
+        'database_code_COD',
+    ],
+    'database_code.csd': [
+        'database_code_CSD',
+    ],
+    'database_code.icsd': [
+        'database_code_ICSD',
+    ],
+    'database_code.mdf': [
+        'database_code_MDF',
+    ],
+    'database_code.nbs': [
+        'database_code_NBS',
+    ],
     'atom_site.adp_type': [
         'atom_site_thermal_displace_type',
     ],
@@ -744,51 +707,36 @@ export const CifCore_Aliases = {
     'atom_site.site_symmetry_multiplicity': [
         'atom_site_symmetry_multiplicity',
     ],
+    'atom_site.u_iso_or_equiv': [
+        'atom_site_U_iso_or_equiv',
+    ],
     'atom_site_aniso.label': [
         'atom_site_anisotrop_id',
     ],
-    'atom_site_aniso.U_11': [
+    'atom_site_aniso.u_11': [
+        'atom_site_aniso_U_11',
         'atom_site_anisotrop_U_11',
     ],
-    'atom_site_aniso.U_11_su': [
-        'atom_site_aniso_U_11_esd',
-        'atom_site_anisotrop_U_11_esd',
-    ],
-    'atom_site_aniso.U_12': [
+    'atom_site_aniso.u_12': [
+        'atom_site_aniso_U_12',
         'atom_site_anisotrop_U_12',
     ],
-    'atom_site_aniso.U_12_su': [
-        'atom_site_aniso_U_12_esd',
-        'atom_site_anisotrop_U_12_esd',
-    ],
-    'atom_site_aniso.U_13': [
+    'atom_site_aniso.u_13': [
+        'atom_site_aniso_U_13',
         'atom_site_anisotrop_U_13',
     ],
-    'atom_site_aniso.U_13_su': [
-        'atom_site_aniso_U_13_esd',
-        'atom_site_anisotrop_U_13_esd',
-    ],
-    'atom_site_aniso.U_22': [
+    'atom_site_aniso.u_22': [
+        'atom_site_aniso_U_22',
         'atom_site_anisotrop_U_22',
     ],
-    'atom_site_aniso.U_22_su': [
-        'atom_site_aniso_U_22_esd',
-        'atom_site_anisotrop_U_22_esd',
-    ],
-    'atom_site_aniso.U_23': [
+    'atom_site_aniso.u_23': [
+        'atom_site_aniso_U_23',
         'atom_site_anisotrop_U_23',
     ],
-    'atom_site_aniso.U_23_su': [
-        'atom_site_aniso_U_23_esd',
-        'atom_site_anisotrop_U_23_esd',
-    ],
-    'atom_site_aniso.U_33': [
+    'atom_site_aniso.u_33': [
+        'atom_site_aniso_U_33',
         'atom_site_anisotrop_U_33',
     ],
-    'atom_site_aniso.U_33_su': [
-        'atom_site_aniso_U_33_esd',
-        'atom_site_anisotrop_U_33_esd',
-    ],
 };
 
 export type CifCore_Schema = typeof CifCore_Schema;

+ 9 - 4
src/mol-model-formats/structure/_spec/cif-core.spec.ts

@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ * Copyright (c) 2019-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
@@ -32,6 +32,9 @@ N N 0.0311 0.0180 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
 O O 0.0492 0.0322 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
 F F 0.0727 0.0534 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4'
 
+_symmetry_cell_setting           Triclinic
+_symmetry_space_group_name_H-M   P-1
+
 _cell_length_a                   11.0829(8)
 _cell_length_b                   14.6829(10)
 _cell_length_c                   16.8532(17)
@@ -82,7 +85,8 @@ describe('cif-core read', () => {
         const block = cifFile.blocks[0];
 
         expect(block.getField('cell_length_a')!.float(0)).toBe(11.0829);
-        expect.assertions(1);
+        expect(block.getField('symmetry_space_group_name_H-M')!.str(0)).toBe('P-1');
+        expect.assertions(2);
     });
 
     it('schema', async () => {
@@ -93,7 +97,8 @@ describe('cif-core read', () => {
         const cifCore = CIF.schema.cifCore(block);
 
         expect(cifCore.cell.length_a.value(0)).toBe(11.0829);
-        expect(cifCore.atom_site_aniso.U.value(0)).toEqual(new Float64Array([ 0.0425, 0, 0, 0.00089, 0.0423, 0, 0.01515, 0.00066, 0.0375 ]));
-        expect.assertions(2);
+        expect(cifCore.space_group['name_h-m_full'].value(0)).toBe('P-1');
+        expect(cifCore.atom_site_aniso.u.value(0)).toEqual(new Float64Array([ 0.0425, 0, 0, 0.00089, 0.0423, 0, 0.01515, 0.00066, 0.0375 ]));
+        expect.assertions(3);
     });
 });

+ 12 - 13
src/mol-model-formats/structure/cif-core.ts

@@ -25,10 +25,10 @@ import { Trajectory } from '../../mol-model/structure';
 import { cantorPairing } from '../../mol-data/util';
 
 function getSpacegroupNameOrNumber(space_group: CifCore_Database['space_group']) {
-    const groupNumber = space_group.IT_number.value(0);
-    const groupName = space_group['name_H-M_full'].value(0);
-    if (!space_group.IT_number.isDefined) return groupName;
-    if (!space_group['name_H-M_full'].isDefined) return groupNumber;
+    const groupNumber = space_group.it_number.value(0);
+    const groupName = space_group['name_h-m_full'].value(0).replace('-', ' ');
+    if (!space_group.it_number.isDefined) return groupName;
+    if (!space_group['name_h-m_full'].isDefined) return groupNumber;
     return groupNumber;
 }
 
@@ -129,7 +129,7 @@ async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: Runti
         pdbx_formal_charge: formalCharge,
 
         pdbx_PDB_model_num: Column.ofConst(1, atomCount, Column.Schema.int),
-        B_iso_or_equiv: db.atom_site.U_iso_or_equiv,
+        B_iso_or_equiv: db.atom_site.u_iso_or_equiv,
     }, atomCount);
 
     const name = (
@@ -231,15 +231,14 @@ function atomSiteAnisotropFromCifCore(model: Model) {
     if (!CifCoreFormat.is(model.sourceData)) return;
     const { atom_site, atom_site_aniso } = model.sourceData.data.db;
     const data = Table.ofPartialColumns(AtomSiteAnisotrop.Schema, {
-        U: atom_site_aniso.U,
-        U_esd: atom_site_aniso.U_su
+        U: atom_site_aniso.u,
     }, atom_site_aniso._rowCount);
     const elementToAnsiotrop = AtomSiteAnisotrop.getElementToAnsiotropFromLabel(atom_site.label, atom_site_aniso.label);
     return { data, elementToAnsiotrop };
 }
 function atomSiteAnisotropApplicableCifCore(model: Model) {
     if (!CifCoreFormat.is(model.sourceData)) return false;
-    return model.sourceData.data.db.atom_site_aniso.U.isDefined;
+    return model.sourceData.data.db.atom_site_aniso.u.isDefined;
 }
 AtomSiteAnisotrop.Provider.formatRegistry.add('cifCore', atomSiteAnisotropFromCifCore, atomSiteAnisotropApplicableCifCore);
 
@@ -261,11 +260,11 @@ namespace CifCoreFormat {
         const name = (
             db.database_code.depnum_ccdc_archive.value(0) ||
             db.database_code.depnum_ccdc_fiz.value(0) ||
-            db.database_code.ICSD.value(0) ||
-            db.database_code.MDF.value(0) ||
-            db.database_code.NBS.value(0) ||
-            db.database_code.CSD.value(0) ||
-            db.database_code.COD.value(0) ||
+            db.database_code.icsd.value(0) ||
+            db.database_code.mdf.value(0) ||
+            db.database_code.nbs.value(0) ||
+            db.database_code.csd.value(0) ||
+            db.database_code.cod.value(0) ||
             db._name
         );