Przeglądaj źródła

support matrix type for cif-core schema

Alexander Rose 5 lat temu
rodzic
commit
0f22eab8b9

+ 8 - 1
data/cif-field-names/cif-core-field-names.csv

@@ -45,16 +45,23 @@ atom_site.calc_flag
 atom_site.refinement_flags
 atom_site.disorder_assembly
 atom_site.disorder_group
-
 atom_site.site_symmetry_multiplicity
 
 atom_site_aniso.label
+atom_site_aniso.U
 atom_site_aniso.U_11
 atom_site_aniso.U_22
 atom_site_aniso.U_33
 atom_site_aniso.U_23
 atom_site_aniso.U_13
 atom_site_aniso.U_12
+atom_site_aniso.U_su
+atom_site_aniso.U_11_su
+atom_site_aniso.U_22_su
+atom_site_aniso.U_33_su
+atom_site_aniso.U_23_su
+atom_site_aniso.U_13_su
+atom_site_aniso.U_12_su
 
 geom_bond.atom_site_label_1
 geom_bond.atom_site_label_2

+ 32 - 3
src/apps/cifschema/util/cif-dic.ts

@@ -232,6 +232,32 @@ const FORCE_INT_FIELDS = [
     '_struct_sheet_range.end_auth_seq_id',
 ];
 
+const FORCE_MATRIX_FIELDS_MAP: { [k: string]: string } = {
+    'atom_site_aniso.U_11': 'U',
+    'atom_site_aniso.U_22': 'U',
+    'atom_site_aniso.U_33': 'U',
+    'atom_site_aniso.U_23': 'U',
+    'atom_site_aniso.U_13': 'U',
+    'atom_site_aniso.U_12': 'U',
+    'atom_site_aniso.U_11_su': 'U_su',
+    'atom_site_aniso.U_22_su': 'U_su',
+    'atom_site_aniso.U_33_su': 'U_su',
+    'atom_site_aniso.U_23_su': 'U_su',
+    'atom_site_aniso.U_13_su': 'U_su',
+    'atom_site_aniso.U_12_su': 'U_su',
+}
+const FORCE_MATRIX_FIELDS = Object.keys(FORCE_MATRIX_FIELDS_MAP)
+
+const EXTRA_ALIASES: Database['aliases'] = {
+    'atom_site_aniso.U': [
+        'atom_site_anisotrop_U'
+    ],
+    'atom_site_aniso.U_su': [
+        'atom_site_aniso_U_esd',
+        'atom_site_anisotrop_U_esd',
+    ],
+}
+
 const COMMA_SEPARATED_LIST_FIELDS = [
     '_atom_site.pdbx_struct_group_id',
     '_chem_comp.mon_nstd_parent_comp_id',
@@ -280,9 +306,8 @@ const EXTRA_ENUM_VALUES: { [k: string]: string[] } = {
 }
 
 export function generateSchema(frames: CifFrame[], imports: Imports = new Map()): Database {
-
     const tables: Database['tables'] = {}
-    const aliases: Database['aliases'] = {}
+    const aliases: Database['aliases'] = { ...EXTRA_ALIASES }
 
     const categories: FrameCategories = {}
     const links: FrameLinks = {}
@@ -291,7 +316,7 @@ export function generateSchema(frames: CifFrame[], imports: Imports = new Map())
     // get category metadata
     frames.forEach(d => {
         // category definitions in mmCIF start with '_' and don't include a '.'
-        // category definitions in cif don't include a '.'
+        // category definitions in cifCore don't include a '.'
         if (d.header[0] === '_'  || d.header.includes('.')) return
         const categoryName = d.header.toLowerCase()
         // console.log(d.header, d.categoryNames, d.categories)
@@ -399,6 +424,10 @@ export function generateSchema(frames: CifFrame[], imports: Imports = new Map())
         } else if (FORCE_INT_FIELDS.includes(d.header)) {
             fields[itemName] = IntCol(description)
             console.log(`forcing int: ${d.header}`)
+        } else if (FORCE_MATRIX_FIELDS.includes(d.header)) {
+            fields[itemName] = FloatCol(description)
+            fields[FORCE_MATRIX_FIELDS_MAP[d.header]] = MatrixCol(3, 3, description)
+            console.log(`forcing matrix: ${d.header}`)
         } else if (subCategory === 'matrix') {
             fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description)
         } else if (subCategory === 'vector') {

+ 27 - 6
src/mol-io/reader/cif/data-model.ts

@@ -267,21 +267,40 @@ export namespace CifField {
     }
 }
 
-export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data {
-    const ret = space.create();
+export function tensorFieldNameGetter(field: string, rank: number, zeroIndexed: boolean, namingVariant: 'brackets' | 'underscore') {
     const offset = zeroIndexed ? 0 : 1;
+    switch (rank) {
+        case 1:
+            return namingVariant === 'brackets'
+                ? (i: number) => `${field}[${i + offset}]`
+                : (i: number) => `${field}_${i + offset}`
+        case 2:
+            return namingVariant === 'brackets'
+                ? (i: number, j: number) => `${field}[${i + offset}][${j + offset}]`
+                : (i: number, j: number) => `${field}_${i + offset}${j + offset}`
+        case 3:
+            return namingVariant === 'brackets'
+                ? (i: number, j: number, k: number) => `${field}[${i + offset}][${j + offset}][${k + offset}]`
+                : (i: number, j: number, k: number) => `${field}_${i + offset}${j + offset}${k + offset}`
+        default:
+            throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
+    }
+}
+
+export function getTensor(category: CifCategory, space: Tensor.Space, row: number, getName: (...args: number[]) => string): Tensor.Data {
+    const ret = space.create();
 
     if (space.rank === 1) {
         const rows = space.dimensions[0];
         for (let i = 0; i < rows; i++) {
-            const f = category.getField(`${field}[${i + offset}]`);
+            const f = category.getField(getName(i));
             space.set(ret, i, !!f ? f.float(row) : 0.0);
         }
     } else if (space.rank === 2) {
         const rows = space.dimensions[0], cols = space.dimensions[1];
         for (let i = 0; i < rows; i++) {
             for (let j = 0; j < cols; j++) {
-                const f = category.getField(`${field}[${i + offset}][${j + offset}]`);
+                const f = category.getField(getName(i, j));
                 space.set(ret, i, j, !!f ? f.float(row) : 0.0);
             }
         }
@@ -290,12 +309,14 @@ export function getTensor(category: CifCategory, field: string, space: Tensor.Sp
         for (let i = 0; i < d0; i++) {
             for (let j = 0; j < d1; j++) {
                 for (let k = 0; k < d2; k++) {
-                    const f = category.getField(`${field}[${i + offset}][${j + offset}][${k + offset}]`);
+                    const f = category.getField(getName(i, j, k));
                     space.set(ret, i, j, k, !!f ? f.float(row) : 0.0);
                 }
             }
         }
-    } else throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
+    } else {
+        throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
+    }
     return ret;
 }
 

+ 14 - 11
src/mol-io/reader/cif/schema.ts

@@ -88,18 +88,21 @@ function createListColumn<T extends number | string>(schema: Column.Schema.List<
 
 function createTensorColumn(schema: Column.Schema.Tensor, category: Data.CifCategory, key: string): Column<Tensor.Data> {
     const space = schema.space;
-    const zeroOffset = category.fieldNames.indexOf(`${key}[0]`) >= 0;
+    const zeroOffset = (
+        category.fieldNames.includes(`${key}[0]`) ||
+        category.fieldNames.includes(`${key}[0][0]`) ||
+        category.fieldNames.includes(`${key}[0][0][0]`)
+    );
     const fst = zeroOffset ? 0 : 1;
-
-    let firstFieldName: string;
-    switch (space.rank) {
-        case 1: firstFieldName = `${key}[${fst}]`; break;
-        case 2: firstFieldName = `${key}[${fst}][${fst}]`; break;
-        case 3: firstFieldName = `${key}[${fst}][${fst}][${fst}]`; break;
-        default: throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
-    }
-    const first = category.getField(firstFieldName) || Column.Undefined(category.rowCount, schema);
-    const value = (row: number) => Data.getTensor(category, key, space, row, zeroOffset);
+    const namingVariant = (
+        category.fieldNames.includes(`${key}_1`) ||
+        category.fieldNames.includes(`${key}_11`) ||
+        category.fieldNames.includes(`${key}_111`)
+    ) ? 'underscore' : 'brackets'
+
+    const getName = Data.tensorFieldNameGetter(key, space.rank, zeroOffset, namingVariant)
+    const first = category.getField(getName(fst, fst, fst)) || Column.Undefined(category.rowCount, schema);
+    const value = (row: number) => Data.getTensor(category, space, row, getName);
     const toArray: Column<Tensor.Data>['toArray'] = params => ColumnHelpers.createAndFillArray(category.rowCount, value, params)
 
     return {

+ 94 - 0
src/mol-io/reader/cif/schema/cif-core.ts

@@ -13,6 +13,7 @@ import Schema = Column.Schema
 const int = Schema.int;
 const float = Schema.float;
 const str = Schema.str;
+const Matrix = Schema.Matrix;
 
 export const CifCore_Schema = {
     /**
@@ -519,6 +520,33 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_11: float,
+        /**
+         * These are the standard anisotropic atomic displacement
+         * components in angstroms squared which appear in the
+         * structure factor term:
+         *
+         * T = exp{-2pi^2^ sum~i~ [sum~j~ (U^ij^ h~i~ h~j~ a*~i~ a*~j~) ] }
+         *
+         * h = the Miller indices
+         * a* = the reciprocal-space cell lengths
+         *
+         * The unique elements of the real symmetric matrix are entered by row.
+         */
+        U: Matrix(3, 3),
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_11_su: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_su: Matrix(3, 3),
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -532,6 +560,13 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_12: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_12_su: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -545,6 +580,13 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_13: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_13_su: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -558,6 +600,13 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_22: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_22_su: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -571,6 +620,13 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_23: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_23_su: float,
         /**
          * These are the standard anisotropic atomic displacement
          * components in angstroms squared which appear in the
@@ -584,6 +640,13 @@ export const CifCore_Schema = {
          * The unique elements of the real symmetric matrix are entered by row.
          */
         U_33: float,
+        /**
+         * These are the standard uncertainty values (SU) for the standard
+         * form of the Uij anisotropic atomic displacement components (see
+         * _aniso_UIJ. Because these values are TYPE measurand, the su values
+         * may in practice be auto generated as part of the Uij calculation.
+         */
+        U_33_su: float,
     },
     /**
      * The CATEGORY of data items used to describe atomic type information
@@ -629,6 +692,13 @@ export const CifCore_Schema = {
 }
 
 export const CifCore_Aliases = {
+    'atom_site_aniso.U': [
+        'atom_site_anisotrop_U',
+    ],
+    'atom_site_aniso.U_su': [
+        'atom_site_aniso_U_esd',
+        'atom_site_anisotrop_U_esd',
+    ],
     'space_group.IT_number': [
         'symmetry_Int_Tables_number',
     ],
@@ -662,21 +732,45 @@ export const CifCore_Aliases = {
     'atom_site_aniso.U_11': [
         'atom_site_anisotrop_U_11',
     ],
+    'atom_site_aniso.U_11_su': [
+        'atom_site_aniso_U_11_esd',
+        'atom_site_anisotrop_U_11_esd',
+    ],
     'atom_site_aniso.U_12': [
         'atom_site_anisotrop_U_12',
     ],
+    'atom_site_aniso.U_12_su': [
+        'atom_site_aniso_U_12_esd',
+        'atom_site_anisotrop_U_12_esd',
+    ],
     'atom_site_aniso.U_13': [
         'atom_site_anisotrop_U_13',
     ],
+    'atom_site_aniso.U_13_su': [
+        'atom_site_aniso_U_13_esd',
+        'atom_site_anisotrop_U_13_esd',
+    ],
     'atom_site_aniso.U_22': [
         'atom_site_anisotrop_U_22',
     ],
+    'atom_site_aniso.U_22_su': [
+        'atom_site_aniso_U_22_esd',
+        'atom_site_anisotrop_U_22_esd',
+    ],
     'atom_site_aniso.U_23': [
         'atom_site_anisotrop_U_23',
     ],
+    'atom_site_aniso.U_23_su': [
+        'atom_site_aniso_U_23_esd',
+        'atom_site_anisotrop_U_23_esd',
+    ],
     'atom_site_aniso.U_33': [
         'atom_site_anisotrop_U_33',
     ],
+    'atom_site_aniso.U_33_su': [
+        'atom_site_aniso_U_33_esd',
+        'atom_site_anisotrop_U_33_esd',
+    ],
 }
 
 export type CifCore_Schema = typeof CifCore_Schema;

+ 30 - 1
src/mol-model-formats/structure/_spec/cif-core.spec.ts

@@ -44,6 +44,34 @@ _cell_measurement_temperature    100(2)
 _cell_measurement_reflns_used    5934
 _cell_measurement_theta_min      2.86
 _cell_measurement_theta_max      64.30
+
+loop_
+_atom_site_aniso_label
+_atom_site_aniso_U_11
+_atom_site_aniso_U_22
+_atom_site_aniso_U_33
+_atom_site_aniso_U_23
+_atom_site_aniso_U_13
+_atom_site_aniso_U_12
+Pt1 0.0425(2) 0.0423(2) 0.0375(2) 0.00066(13) 0.01515(13) 0.00089(12)
+K1 0.0605(15) 0.0687(17) 0.0559(17) 0.000 0.0203(13) 0.000
+Cl2 0.0511(11) 0.0554(11) 0.0533(13) 0.0078(10) 0.0225(9) 0.0027(9)
+Cl3 0.0708(13) 0.0484(11) 0.0605(13) -0.0053(10) 0.0276(10) 0.0026(10)
+Cl1 0.0950(16) 0.0442(11) 0.0942(18) -0.0051(12) 0.0526(14) 0.0035(12)
+N9 0.045(3) 0.047(4) 0.035(4) 0.004(3) 0.014(3) -0.003(3)
+N7 0.040(3) 0.048(4) 0.036(3) 0.008(3) 0.004(3) -0.004(3)
+O2 0.052(3) 0.098(4) 0.046(4) -0.012(4) 0.006(3) -0.016(3)
+N3 0.041(3) 0.044(3) 0.044(4) 0.001(3) 0.008(3) -0.002(3)
+O6 0.053(3) 0.093(4) 0.052(3) 0.008(3) 0.021(3) -0.019(3)
+C4 0.044(4) 0.032(4) 0.050(5) 0.004(4) 0.011(4) 0.003(3)
+N1 0.049(4) 0.049(4) 0.040(4) 0.004(3) 0.014(3) -0.005(3)
+C8 0.050(4) 0.045(4) 0.033(4) -0.007(4) 0.000(3) -0.004(4)
+C5 0.036(4) 0.039(4) 0.045(5) 0.003(4) 0.013(3) -0.001(3)
+C2 0.047(4) 0.045(4) 0.039(5) -0.007(4) 0.011(4) -0.004(4)
+C7 0.041(4) 0.072(5) 0.055(5) 0.013(5) 0.006(4) -0.015(4)
+C1 0.061(5) 0.067(5) 0.043(5) -0.002(4) 0.017(4) -0.005(4)
+C3 0.038(4) 0.090(6) 0.054(5) 0.003(5) 0.013(4) -0.018(4)
+C6 0.045(4) 0.043(4) 0.038(4) 0.004(4) 0.008(3) -0.002(4)
 `
 
 describe('cif-core read', () => {
@@ -65,6 +93,7 @@ describe('cif-core read', () => {
         const cifCore = CIF.schema.cifCore(block)
 
         expect(cifCore.cell.length_a.value(0)).toBe(11.0829)
-        expect.assertions(1)
+        expect(cifCore.atom_site_aniso.U.value(0)).toEqual(new Float64Array([ 0.0425, 0, 0, 0.00089, 0.0423, 0, 0.01515, 0.00066, 0.0375 ]))
+        expect.assertions(2)
     });
 });