123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269 |
- /**
- * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
- *
- * @author David Sehnal <david.sehnal@gmail.com>
- */
- import Format from '../format';
- import { Model } from '../model';
- import { Task } from 'mol-task';
- import { PdbFile } from 'mol-io/reader/pdb/schema';
- import from_mmCIF from './mmcif';
- import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
- import { substringStartsWith } from 'mol-util/string';
- import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
- import { CifField, CifCategory } from 'mol-io/reader/cif';
- import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field';
- function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
- return {
- name,
- fieldNames: Object.keys(fields),
- rowCount,
- getField(f: string) {
- return fields[f];
- }
- }
- }
- function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
- return {
- id: CifTextValueField(['1', '2', '3']),
- type: CifTextValueField(['polymer', 'non-polymer', 'water'])
- }
- }
- function atom_site_template(data: string, count: number) {
- const str = () => new Array(count) as string[];
- const ts = () => TokenBuilder.create(data, 2 * count);
- return {
- count,
- group_PDB: ts(),
- id: str(),
- auth_atom_id: ts(),
- label_alt_id: ts(),
- auth_comp_id: ts(),
- auth_asym_id: ts(),
- auth_seq_id: ts(),
- pdbx_PDB_ins_code: ts(),
- Cartn_x: ts(),
- Cartn_y: ts(),
- Cartn_z: ts(),
- occupancy: ts(),
- B_iso_or_equiv: ts(),
- type_symbol: ts(),
- pdbx_PDB_model_num: str(),
- label_entity_id: str()
- };
- }
- function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
- const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
- const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
- const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
- const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
- return {
- auth_asym_id,
- auth_atom_id,
- auth_comp_id,
- auth_seq_id,
- B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
- Cartn_x: CifTextField(sites.Cartn_x, sites.count),
- Cartn_y: CifTextField(sites.Cartn_y, sites.count),
- Cartn_z: CifTextField(sites.Cartn_z, sites.count),
- group_PDB: CifTextField(sites.group_PDB, sites.count),
- id: CifTextValueField(sites.id),
- label_alt_id: CifTextField(sites.label_alt_id, sites.count),
- label_asym_id: auth_asym_id,
- label_atom_id: auth_atom_id,
- label_comp_id: auth_comp_id,
- label_seq_id: auth_seq_id,
- label_entity_id: CifTextValueField(sites.label_entity_id),
- occupancy: CifTextField(sites.occupancy, sites.count),
- type_symbol: CifTextField(sites.type_symbol, sites.count),
- pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
- pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
- };
- }
- function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
- const { data: str } = data;
- let startPos = s;
- let start = s;
- const end = e;
- const length = end - start;
- // TODO: filter invalid atoms
- // COLUMNS DATA TYPE CONTENTS
- // --------------------------------------------------------------------------------
- // 1 - 6 Record name "ATOM "
- Tokenizer.trim(data, start, start + 6);
- TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
- // 7 - 11 Integer Atom serial number.
- // TODO: support HEX
- start = startPos + 6;
- Tokenizer.trim(data, start, start + 5);
- sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd);
- // 13 - 16 Atom Atom name.
- start = startPos + 12;
- Tokenizer.trim(data, start, start + 4);
- TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
- // 17 Character Alternate location indicator.
- if (str.charCodeAt(startPos + 16) === 32) { // ' '
- TokenBuilder.add(sites.label_alt_id, 0, 0);
- } else {
- TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
- }
- // 18 - 20 Residue name Residue name.
- start = startPos + 17;
- Tokenizer.trim(data, start, start + 3);
- TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
- // 22 Character Chain identifier.
- TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
- // 23 - 26 Integer Residue sequence number.
- // TODO: support HEX
- start = startPos + 22;
- Tokenizer.trim(data, start, start + 4);
- TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
- // 27 AChar Code for insertion of residues.
- if (str.charCodeAt(startPos + 26) === 32) { // ' '
- TokenBuilder.add(sites.label_alt_id, 0, 0);
- } else {
- TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
- }
- // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
- start = startPos + 30;
- Tokenizer.trim(data, start, start + 8);
- TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
- // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
- start = startPos + 38;
- Tokenizer.trim(data, start, start + 8);
- TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
- // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
- start = startPos + 46;
- Tokenizer.trim(data, start, start + 8);
- TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
- // 55 - 60 Real(6.2) Occupancy.
- start = startPos + 54;
- Tokenizer.trim(data, start, start + 6);
- TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
- // 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
- if (length >= 66) {
- start = startPos + 60;
- Tokenizer.trim(data, start, start + 6);
- TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
- } else {
- TokenBuilder.add(sites.label_alt_id, 0, 0);
- }
- // 73 - 76 LString(4) Segment identifier, left-justified.
- // ignored
- // 77 - 78 LString(2) Element symbol, right-justified.
- if (length >= 78) {
- start = startPos + 76;
- Tokenizer.trim(data, start, start + 2);
- if (data.tokenStart < data.tokenEnd) {
- TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
- } else {
- // "guess" the symbol
- TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
- }
- } else {
- TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
- }
- // TODO
- sites.label_entity_id.push('1');
- sites.pdbx_PDB_model_num.push(model);
- }
- type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
- async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> {
- const { lines } = pdb;
- const { data, indices } = lines;
- const tokenizer = Tokenizer(data);
- // Count the atoms
- let atomCount = 0;
- for (let i = 0, _i = lines.count; i < _i; i++) {
- const s = indices[2 * i], e = indices[2 * i + 1];
- switch (data[s]) {
- case 'A':
- if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++;
- break;
- case 'H':
- if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++;
- break;
- }
- }
- const atom_site = atom_site_template(data, atomCount);
- let modelNum = 0, modelStr = '';
- for (let i = 0, _i = lines.count; i < _i; i++) {
- const s = indices[2 * i], e = indices[2 * i + 1];
- switch (data[s]) {
- case 'A':
- if (!substringStartsWith(data, s, e, 'ATOM ')) continue;
- if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
- addAtom(atom_site, modelStr, tokenizer, s, e);
- break;
- case 'H':
- if (!substringStartsWith(data, s, e, 'HETATM')) continue;
- if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
- addAtom(atom_site, modelStr, tokenizer, s, e);
- break;
- case 'M':
- if (substringStartsWith(data, s, e, 'MODEL ')) {
- modelNum++;
- modelStr = '' + modelNum;
- }
- break;
- }
- }
- const categories = {
- entity: toCategory('entity', _entity(), 3),
- atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
- }
- return Format.mmCIF({
- header: pdb.id || 'PDB',
- categoryNames: Object.keys(categories),
- categories
- });
- }
- function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> {
- return Task.create('Create PDB Model', async ctx => {
- await ctx.update('Converting to mmCIF...');
- const cif = await pdbToMmCIF(format.data);
- return from_mmCIF(cif).runInContext(ctx);
- });
- }
- export default buildModels;
|