|
@@ -4,22 +4,25 @@
|
|
|
* @author Alexander Rose <alexander.rose@weirdbyte.de>
|
|
|
*/
|
|
|
|
|
|
-import { Database, Column } from './json-schema'
|
|
|
+import { Database, ValueColumn, ListColumn } from './json-schema'
|
|
|
import * as Data from 'mol-io/reader/cif/data-model'
|
|
|
|
|
|
-export function getFieldType (type: string, values?: string[]): Column {
|
|
|
+export function getFieldType (type: string, values?: string[]): ValueColumn|ListColumn {
|
|
|
switch (type) {
|
|
|
case 'code':
|
|
|
case 'ucode':
|
|
|
- if (values && values.length) {
|
|
|
- return { 'enum': values }
|
|
|
- } else {
|
|
|
- return 'str'
|
|
|
- }
|
|
|
case 'line':
|
|
|
case 'uline':
|
|
|
case 'text':
|
|
|
case 'char':
|
|
|
+ case 'uchar3':
|
|
|
+ case 'uchar1':
|
|
|
+ case 'boolean':
|
|
|
+ if (values && values.length) {
|
|
|
+ return { enum: [ 'str', values ] }
|
|
|
+ } else {
|
|
|
+ return 'str'
|
|
|
+ }
|
|
|
case 'aliasname':
|
|
|
case 'name':
|
|
|
case 'idname':
|
|
@@ -29,7 +32,6 @@ export function getFieldType (type: string, values?: string[]): Column {
|
|
|
case 'phone':
|
|
|
case 'email':
|
|
|
case 'code30':
|
|
|
- case 'ec-type':
|
|
|
case 'seq-one-letter-code':
|
|
|
case 'author':
|
|
|
case 'orcid_id':
|
|
@@ -44,27 +46,30 @@ export function getFieldType (type: string, values?: string[]): Column {
|
|
|
case 'float-range':
|
|
|
case 'binary':
|
|
|
case 'operation_expression':
|
|
|
- case 'ucode-alphanum-csv':
|
|
|
case 'point_symmetry':
|
|
|
- case 'id_list':
|
|
|
case '4x3_matrix':
|
|
|
case '3x4_matrices':
|
|
|
case 'point_group':
|
|
|
case 'point_group_helical':
|
|
|
- case 'boolean':
|
|
|
case 'symmetry_operation':
|
|
|
case 'date_dep':
|
|
|
- case 'uchar3':
|
|
|
- case 'uchar1':
|
|
|
case 'url':
|
|
|
case 'symop':
|
|
|
return 'str'
|
|
|
case 'int':
|
|
|
case 'non_negative_int':
|
|
|
case 'positive_int':
|
|
|
- return 'int'
|
|
|
+ if (values && values.length) {
|
|
|
+ return { enum: [ 'int', values ] }
|
|
|
+ } else {
|
|
|
+ return 'int'
|
|
|
+ }
|
|
|
case 'float':
|
|
|
return 'float'
|
|
|
+ case 'ec-type':
|
|
|
+ case 'ucode-alphanum-csv':
|
|
|
+ case 'id_list':
|
|
|
+ return { list: [ 'str', ',' ] }
|
|
|
}
|
|
|
console.log(`unknown type '${type}'`)
|
|
|
return 'str'
|
|
@@ -94,10 +99,10 @@ function getField ( category: string, field: string, d: Data.Frame, ctx: FrameDa
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined {
|
|
|
+function getEnums (d: Data.Frame, ctx: FrameData) {
|
|
|
const value = getField('item_enumeration', 'value', d, ctx)
|
|
|
+ const enums: string[] = []
|
|
|
if (value) {
|
|
|
- const enums: string[] = []
|
|
|
for (let i = 0; i < value.rowCount; ++i) {
|
|
|
enums.push(value.str(i))
|
|
|
// console.log(value.str(i))
|
|
@@ -108,16 +113,10 @@ function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-function getCode (d: Data.Frame, ctx: FrameData): [string, string[]]|undefined {
|
|
|
+function getCode (d: Data.Frame, ctx: FrameData): [string, string[]|undefined]|undefined {
|
|
|
const code = getField('item_type', 'code', d, ctx)
|
|
|
if (code) {
|
|
|
- let c = code.str(0)
|
|
|
- let e = []
|
|
|
- if (c === 'ucode') {
|
|
|
- const enums = getEnums(d, ctx)
|
|
|
- if (enums) e.push(...enums)
|
|
|
- }
|
|
|
- return [c, e]
|
|
|
+ return [ code.str(0), getEnums(d, ctx) ]
|
|
|
} else {
|
|
|
console.log(`item_type.code not found for '${d.header}'`)
|
|
|
}
|
|
@@ -131,15 +130,46 @@ function getSubCategory (d: Data.Frame, ctx: FrameData): string|undefined {
|
|
|
}
|
|
|
|
|
|
const FORCE_INT_FIELDS = [
|
|
|
+ '_atom_site.id',
|
|
|
+ '_atom_site.auth_seq_id',
|
|
|
+ '_pdbx_struct_mod_residue.auth_seq_id',
|
|
|
'_struct_conf.beg_auth_seq_id',
|
|
|
'_struct_conf.end_auth_seq_id',
|
|
|
- '_struct_sheet_range.beg_auth_seq_id',
|
|
|
- '_struct_sheet_range.end_auth_seq_id',
|
|
|
'_struct_conn.ptnr1_auth_seq_id',
|
|
|
'_struct_conn.ptnr2_auth_seq_id',
|
|
|
- '_pdbx_struct_mod_residue.auth_seq_id',
|
|
|
- '_atom_site.id',
|
|
|
- '_atom_site.auth_seq_id'
|
|
|
+ '_struct_sheet_range.beg_auth_seq_id',
|
|
|
+ '_struct_sheet_range.end_auth_seq_id',
|
|
|
+];
|
|
|
+
|
|
|
+const COMMA_SEPARATED_LIST_FIELDS = [
|
|
|
+ '_atom_site.pdbx_struct_group_id',
|
|
|
+ '_chem_comp.mon_nstd_parent_comp_id',
|
|
|
+ '_diffrn_radiation.pdbx_wavelength_list',
|
|
|
+ '_diffrn_source.pdbx_wavelength_list',
|
|
|
+ '_em_diffraction.tilt_angle_list', // 20,40,50,55
|
|
|
+ '_em_entity_assembly.entity_id_list',
|
|
|
+ '_entity.pdbx_ec',
|
|
|
+ '_pdbx_depui_entry_details.experimental_methods',
|
|
|
+ '_pdbx_depui_entry_details.requested_accession_types',
|
|
|
+ '_pdbx_soln_scatter_model.software_list', // INSIGHT II, HOMOLOGY, DISCOVERY, BIOPOLYMER, DELPHI
|
|
|
+ '_pdbx_soln_scatter_model.software_author_list', // MSI
|
|
|
+ '_pdbx_soln_scatter_model.entry_fitting_list', // Odd example: 'PDB CODE 1HFI, 1HCC, 1HFH, 1VCC'
|
|
|
+ '_pdbx_struct_assembly_gen.entity_inst_id',
|
|
|
+ '_pdbx_struct_assembly_gen.asym_id_list',
|
|
|
+ '_pdbx_struct_assembly_gen.auth_asym_id_list',
|
|
|
+ '_pdbx_struct_assembly_gen_depositor_info.asym_id_list',
|
|
|
+ '_pdbx_struct_assembly_gen_depositor_info.chain_id_list',
|
|
|
+ '_pdbx_struct_group_list.group_enumeration_type',
|
|
|
+ '_reflns.pdbx_diffrn_id',
|
|
|
+ '_refine.pdbx_diffrn_id',
|
|
|
+ '_reflns_shell.pdbx_diffrn_id',
|
|
|
+ '_struct_keywords.text',
|
|
|
+];
|
|
|
+
|
|
|
+const SPACE_SEPARATED_LIST_FIELDS = [
|
|
|
+ '_chem_comp.pdbx_subcomponent_list', // TSM DPH HIS CHF EMR
|
|
|
+ '_pdbx_soln_scatter.data_reduction_software_list', // OTOKO
|
|
|
+ '_pdbx_soln_scatter.data_analysis_software_list', // SCTPL5 GNOM
|
|
|
];
|
|
|
|
|
|
export function generateSchema (dic: Data.Block) {
|
|
@@ -195,14 +225,24 @@ export function generateSchema (dic: Data.Block) {
|
|
|
} else {
|
|
|
if (itemName.match(/\[[1-3]\]\[[1-3]\]/)) {
|
|
|
fields[itemName.replace(/\[[1-3]\]\[[1-3]\]/, '')] = { 'matrix': [ 3, 3 ] }
|
|
|
- // console.log(`${d.header} should have 'matrix' _item_sub_category.id`)
|
|
|
+ console.log(`${d.header} should have 'matrix' _item_sub_category.id`)
|
|
|
} else if (itemName.match(/\[[1-3]\]/)) {
|
|
|
fields[itemName.replace(/\[[1-3]\]/, '')] = { 'vector': [ 3 ] }
|
|
|
- // console.log(`${d.header} should have 'vector' _item_sub_category.id`)
|
|
|
+ console.log(`${d.header} should have 'vector' _item_sub_category.id`)
|
|
|
} else {
|
|
|
const code = getCode(d, ctx)
|
|
|
if (code) {
|
|
|
- fields[itemName] = getFieldType(code[0], code[1])
|
|
|
+ let fieldType = getFieldType(code[0], code[1]);
|
|
|
+ if (typeof fieldType === 'string') {
|
|
|
+ if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) {
|
|
|
+ fieldType = { 'list': [ 'str', ',' ] };
|
|
|
+ console.log(`comma separated: ${d.header}`)
|
|
|
+ } else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) {
|
|
|
+ fieldType = { 'list': [ 'str', ' ' ] };
|
|
|
+ console.log(`space separated: ${d.header}`)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ fields[itemName] = fieldType
|
|
|
} else {
|
|
|
console.log(`could not determine code for '${d.header}'`)
|
|
|
}
|