index.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #!/usr/bin/env node
  2. /**
  3. * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  4. *
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import * as argparse from 'argparse';
  8. import * as fs from 'fs';
  9. import * as path from 'path';
  10. import fetch from 'node-fetch';
  11. import { parseCsv } from '../../mol-io/reader/csv/parser';
  12. import { CifFrame, CifBlock } from '../../mol-io/reader/cif';
  13. import { parseCifText } from '../../mol-io/reader/cif/text/parser';
  14. import { generateSchema } from './util/cif-dic';
  15. import { generate } from './util/generate';
  16. import { Filter, Database } from './util/schema';
  17. import { parseImportGet } from './util/helper';
  18. function getDicVersion(block: CifBlock) {
  19. return block.categories.dictionary.getField('version')!.str(0);
  20. }
  21. function getDicNamespace(block: CifBlock) {
  22. return block.categories.dictionary.getField('namespace')!.str(0);
  23. }
  24. async function runGenerateSchemaMmcif(name: string, fieldNamesPath: string, typescript = false, out: string, moldbImportPath: string, addAliases: boolean) {
  25. await ensureMmcifDicAvailable();
  26. const mmcifDic = await parseCifText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8')).run();
  27. if (mmcifDic.isError) throw mmcifDic;
  28. await ensureIhmDicAvailable();
  29. const ihmDic = await parseCifText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run();
  30. if (ihmDic.isError) throw ihmDic;
  31. await ensureCarbBranchDicAvailable();
  32. const carbBranchDic = await parseCifText(fs.readFileSync(CARB_BRANCH_DIC_PATH, 'utf8')).run();
  33. if (carbBranchDic.isError) throw carbBranchDic;
  34. await ensureCarbCompDicAvailable();
  35. const carbCompDic = await parseCifText(fs.readFileSync(CARB_COMP_DIC_PATH, 'utf8')).run();
  36. if (carbCompDic.isError) throw carbCompDic;
  37. const mmcifDicVersion = getDicVersion(mmcifDic.result.blocks[0]);
  38. const ihmDicVersion = getDicVersion(ihmDic.result.blocks[0]);
  39. const carbDicVersion = 'draft';
  40. const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, CARB ${carbDicVersion}.`;
  41. const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames];
  42. const schema = generateSchema(frames);
  43. await runGenerateSchema(name, version, schema, fieldNamesPath, typescript, out, moldbImportPath, addAliases);
  44. }
  45. async function runGenerateSchemaCifCore(name: string, fieldNamesPath: string, typescript = false, out: string, moldbImportPath: string, addAliases: boolean) {
  46. await ensureCifCoreDicAvailable();
  47. const cifCoreDic = await parseCifText(fs.readFileSync(CIF_CORE_DIC_PATH, 'utf8')).run();
  48. if (cifCoreDic.isError) throw cifCoreDic;
  49. const cifCoreDicVersion = getDicVersion(cifCoreDic.result.blocks[0]);
  50. const version = `Dictionary versions: CifCore ${cifCoreDicVersion}.`;
  51. const frames: CifFrame[] = [...cifCoreDic.result.blocks[0].saveFrames];
  52. const imports = await resolveImports(frames, DIC_DIR);
  53. const schema = generateSchema(frames, imports);
  54. await runGenerateSchema(name, version, schema, fieldNamesPath, typescript, out, moldbImportPath, addAliases);
  55. }
  56. async function resolveImports(frames: CifFrame[], baseDir: string): Promise<Map<string, CifFrame[]>> {
  57. const imports = new Map<string, CifFrame[]>();
  58. for (const d of frames) {
  59. if ('import' in d.categories) {
  60. const importGet = parseImportGet(d.categories['import'].getField('get')!.str(0));
  61. for (const g of importGet) {
  62. const { file } = g;
  63. if (!file) continue;
  64. if (imports.has(file)) continue;
  65. const dic = await parseCifText(fs.readFileSync(path.join(baseDir, file), 'utf8')).run();
  66. if (dic.isError) throw dic;
  67. imports.set(file, [...dic.result.blocks[0].saveFrames]);
  68. }
  69. }
  70. }
  71. return imports;
  72. }
  73. async function runGenerateSchemaDic(name: string, dicPath: string, fieldNamesPath: string, typescript = false, out: string, moldbImportPath: string, addAliases: boolean) {
  74. const dic = await parseCifText(fs.readFileSync(dicPath, 'utf8')).run();
  75. if (dic.isError) throw dic;
  76. const dicVersion = getDicVersion(dic.result.blocks[0]);
  77. const dicName = getDicNamespace(dic.result.blocks[0]);
  78. const version = `Dictionary versions: ${dicName} ${dicVersion}.`;
  79. const frames: CifFrame[] = [...dic.result.blocks[0].saveFrames];
  80. const imports = await resolveImports(frames, path.dirname(dicPath));
  81. const schema = generateSchema(frames, imports);
  82. await runGenerateSchema(name, version, schema, fieldNamesPath, typescript, out, moldbImportPath, addAliases);
  83. }
  84. async function runGenerateSchema(name: string, version: string, schema: Database, fieldNamesPath: string, typescript = false, out: string, moldbImportPath: string, addAliases: boolean) {
  85. const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined;
  86. const output = typescript ? generate(name, version, schema, filter, moldbImportPath, addAliases) : JSON.stringify(schema, undefined, 4);
  87. if (out) {
  88. fs.writeFileSync(out, output);
  89. } else {
  90. console.log(output);
  91. }
  92. }
  93. async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> {
  94. const fieldNamesStr = fs.readFileSync(fieldNamesPath, 'utf8');
  95. const parsed = await parseCsv(fieldNamesStr, { noColumnNames: true }).run();
  96. if (parsed.isError) throw parser.error;
  97. const csvFile = parsed.result;
  98. const fieldNamesCol = csvFile.table.getColumn('0');
  99. if (!fieldNamesCol) throw new Error('error getting fields columns');
  100. const fieldNames = fieldNamesCol.toStringArray();
  101. const filter: Filter = {};
  102. fieldNames.forEach((name, i) => {
  103. const [category, field] = name.split('.');
  104. // console.log(category, field)
  105. if (!filter[category]) filter[category] = {};
  106. filter[category][field] = true;
  107. });
  108. return filter;
  109. }
  110. async function ensureMmcifDicAvailable() { await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL); }
  111. async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL); }
  112. async function ensureCarbBranchDicAvailable() { await ensureDicAvailable(CARB_BRANCH_DIC_PATH, CARB_BRANCH_DIC_URL); }
  113. async function ensureCarbCompDicAvailable() { await ensureDicAvailable(CARB_COMP_DIC_PATH, CARB_COMP_DIC_URL); }
  114. async function ensureCifCoreDicAvailable() {
  115. await ensureDicAvailable(CIF_CORE_DIC_PATH, CIF_CORE_DIC_URL);
  116. await ensureDicAvailable(CIF_CORE_ENUM_PATH, CIF_CORE_ENUM_URL);
  117. await ensureDicAvailable(CIF_CORE_ATTR_PATH, CIF_CORE_ATTR_URL);
  118. }
  119. async function ensureDicAvailable(dicPath: string, dicUrl: string) {
  120. if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) {
  121. const name = dicUrl.substr(dicUrl.lastIndexOf('/') + 1);
  122. console.log(`downloading ${name}...`);
  123. const data = await fetch(dicUrl);
  124. if (!fs.existsSync(DIC_DIR)) {
  125. fs.mkdirSync(DIC_DIR);
  126. }
  127. fs.writeFileSync(dicPath, await data.text());
  128. console.log(`done downloading ${name}`);
  129. }
  130. }
  131. const DIC_DIR = path.resolve(__dirname, '../../../../build/dics/');
  132. const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`;
  133. const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic';
  134. const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`;
  135. const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic';
  136. const CARB_BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic`;
  137. const CARB_BRANCH_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic';
  138. const CARB_COMP_DIC_PATH = `${DIC_DIR}/chem_comp-extension.dic`;
  139. const CARB_COMP_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/chem_comp-extension.dic';
  140. const CIF_CORE_DIC_PATH = `${DIC_DIR}/cif_core.dic`;
  141. const CIF_CORE_DIC_URL = 'https://raw.githubusercontent.com/COMCIFS/cif_core/master/cif_core.dic';
  142. const CIF_CORE_ENUM_PATH = `${DIC_DIR}/templ_enum.cif`;
  143. const CIF_CORE_ENUM_URL = 'https://raw.githubusercontent.com/COMCIFS/cif_core/master/templ_enum.cif';
  144. const CIF_CORE_ATTR_PATH = `${DIC_DIR}/templ_attr.cif`;
  145. const CIF_CORE_ATTR_URL = 'https://raw.githubusercontent.com/COMCIFS/cif_core/master/templ_attr.cif';
  146. const parser = new argparse.ArgumentParser({
  147. add_help: true,
  148. description: 'Create schema from mmcif dictionary (v50 plus IHM and entity_branch extensions, downloaded from wwPDB)'
  149. });
  150. parser.add_argument('--preset', '-p', {
  151. default: '',
  152. choices: ['', 'mmCIF', 'CCD', 'BIRD', 'CifCore'],
  153. help: 'Preset name'
  154. });
  155. parser.add_argument('--name', '-n', {
  156. default: '',
  157. help: 'Schema name'
  158. });
  159. parser.add_argument('--out', '-o', {
  160. help: 'Generated schema output path, if not given printed to stdout'
  161. });
  162. parser.add_argument('--targetFormat', '-tf', {
  163. default: 'typescript-molstar',
  164. choices: ['typescript-molstar', 'json-internal'],
  165. help: 'Target format'
  166. });
  167. parser.add_argument('--dicPath', '-d', {
  168. default: '',
  169. help: 'Path to dictionary'
  170. });
  171. parser.add_argument('--fieldNamesPath', '-fn', {
  172. default: '',
  173. help: 'Field names to include'
  174. });
  175. parser.add_argument('--forceDicDownload', '-f', {
  176. action: 'store_true',
  177. help: 'Force download of dictionaries'
  178. });
  179. parser.add_argument('--moldataImportPath', '-mip', {
  180. default: 'molstar/lib/mol-data',
  181. help: 'mol-data import path (for typescript target only)'
  182. });
  183. parser.add_argument('--addAliases', '-aa', {
  184. action: 'store_true',
  185. help: 'Add field name/path aliases'
  186. });
  187. interface Args {
  188. name: string
  189. preset: '' | 'mmCIF' | 'CCD' | 'BIRD' | 'CifCore'
  190. forceDicDownload: boolean
  191. dic: '' | 'mmCIF' | 'CifCore'
  192. dicPath: string,
  193. fieldNamesPath: string
  194. targetFormat: 'typescript-molstar' | 'json-internal'
  195. out: string,
  196. moldataImportPath: string
  197. addAliases: boolean
  198. }
  199. const args: Args = parser.parse_args();
  200. const FORCE_DIC_DOWNLOAD = args.forceDicDownload;
  201. switch (args.preset) {
  202. case 'mmCIF':
  203. args.name = 'mmCIF';
  204. args.dic = 'mmCIF';
  205. args.fieldNamesPath = path.resolve(__dirname, '../../../../data/cif-field-names/mmcif-field-names.csv');
  206. break;
  207. case 'CCD':
  208. args.name = 'CCD';
  209. args.dic = 'mmCIF';
  210. args.fieldNamesPath = path.resolve(__dirname, '../../../../data/cif-field-names/ccd-field-names.csv');
  211. break;
  212. case 'BIRD':
  213. args.name = 'BIRD';
  214. args.dic = 'mmCIF';
  215. args.fieldNamesPath = path.resolve(__dirname, '../../../../data/cif-field-names/bird-field-names.csv');
  216. break;
  217. case 'CifCore':
  218. args.name = 'CifCore';
  219. args.dic = 'CifCore';
  220. args.fieldNamesPath = path.resolve(__dirname, '../../../../data/cif-field-names/cif-core-field-names.csv');
  221. break;
  222. }
  223. if (args.name) {
  224. const typescript = args.targetFormat === 'typescript-molstar';
  225. if (args.dicPath) {
  226. runGenerateSchemaDic(args.name, args.dicPath, args.fieldNamesPath, typescript, args.out, args.moldataImportPath, args.addAliases).catch(e => {
  227. console.error(e);
  228. });
  229. } else if (args.dic === 'mmCIF') {
  230. runGenerateSchemaMmcif(args.name, args.fieldNamesPath, typescript, args.out, args.moldataImportPath, args.addAliases).catch(e => {
  231. console.error(e);
  232. });
  233. } else if (args.dic === 'CifCore') {
  234. runGenerateSchemaCifCore(args.name, args.fieldNamesPath, typescript, args.out, args.moldataImportPath, args.addAliases).catch(e => {
  235. console.error(e);
  236. });
  237. }
  238. }