|
@@ -6,161 +6,17 @@
|
|
|
*/
|
|
|
|
|
|
import { substringStartsWith } from '../../../mol-util/string';
|
|
|
-import { CifField, CifCategory, CifFrame } from '../../../mol-io/reader/cif';
|
|
|
-import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
|
|
|
-import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
|
|
|
+import { CifCategory, CifFrame } from '../../../mol-io/reader/cif';
|
|
|
+import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
|
|
|
import { PdbFile } from '../../../mol-io/reader/pdb/schema';
|
|
|
import { parseCryst1, parseRemark350, parseMtrix } from './assembly';
|
|
|
import { parseHelix, parseSheet } from './secondary-structure';
|
|
|
-import { guessElementSymbolTokens } from '../util';
|
|
|
import { parseCmpnd, parseHetnam } from './entity';
|
|
|
import { ComponentBuilder } from '../common/component';
|
|
|
import { EntityBuilder } from '../common/entity';
|
|
|
import { Column } from '../../../mol-data/db';
|
|
|
import { getMoleculeType } from '../../../mol-model/structure/model/types';
|
|
|
-
|
|
|
-type AtomSiteTemplate = typeof getAtomSiteTemplate extends (...args: any) => infer T ? T : never
|
|
|
-function getAtomSiteTemplate(data: string, count: number) {
|
|
|
- const str = () => [] as string[];
|
|
|
- const ts = () => TokenBuilder.create(data, 2 * count);
|
|
|
- return {
|
|
|
- index: 0,
|
|
|
- count,
|
|
|
- group_PDB: ts(),
|
|
|
- id: str(),
|
|
|
- auth_atom_id: ts(),
|
|
|
- label_alt_id: ts(),
|
|
|
- auth_comp_id: ts(),
|
|
|
- auth_asym_id: ts(),
|
|
|
- auth_seq_id: ts(),
|
|
|
- pdbx_PDB_ins_code: ts(),
|
|
|
- Cartn_x: ts(),
|
|
|
- Cartn_y: ts(),
|
|
|
- Cartn_z: ts(),
|
|
|
- occupancy: ts(),
|
|
|
- B_iso_or_equiv: ts(),
|
|
|
- type_symbol: ts(),
|
|
|
- pdbx_PDB_model_num: str(),
|
|
|
- label_entity_id: str()
|
|
|
- };
|
|
|
-}
|
|
|
-
|
|
|
-function getAomSite(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
|
|
|
- const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
|
|
|
- const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
|
|
|
- const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
|
|
|
- const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
|
|
|
-
|
|
|
- return {
|
|
|
- auth_asym_id,
|
|
|
- auth_atom_id,
|
|
|
- auth_comp_id,
|
|
|
- auth_seq_id,
|
|
|
- B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
|
|
|
- Cartn_x: CifField.ofTokens(sites.Cartn_x),
|
|
|
- Cartn_y: CifField.ofTokens(sites.Cartn_y),
|
|
|
- Cartn_z: CifField.ofTokens(sites.Cartn_z),
|
|
|
- group_PDB: CifField.ofTokens(sites.group_PDB),
|
|
|
- id: CifField.ofStrings(sites.id),
|
|
|
-
|
|
|
- label_alt_id: CifField.ofTokens(sites.label_alt_id),
|
|
|
-
|
|
|
- label_asym_id: auth_asym_id,
|
|
|
- label_atom_id: auth_atom_id,
|
|
|
- label_comp_id: auth_comp_id,
|
|
|
- label_seq_id: auth_seq_id,
|
|
|
- label_entity_id: CifField.ofStrings(sites.label_entity_id),
|
|
|
-
|
|
|
- occupancy: CifField.ofTokens(sites.occupancy),
|
|
|
- type_symbol: CifField.ofTokens(sites.type_symbol),
|
|
|
-
|
|
|
- pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
|
|
|
- pdbx_PDB_model_num: CifField.ofStrings(sites.pdbx_PDB_model_num)
|
|
|
- };
|
|
|
-}
|
|
|
-
|
|
|
-function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
|
|
|
- const { data: str } = data;
|
|
|
- const length = e - s;
|
|
|
-
|
|
|
- // TODO: filter invalid atoms
|
|
|
-
|
|
|
- // COLUMNS DATA TYPE CONTENTS
|
|
|
- // --------------------------------------------------------------------------------
|
|
|
- // 1 - 6 Record name "ATOM "
|
|
|
- TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
|
|
|
-
|
|
|
- // 7 - 11 Integer Atom serial number.
|
|
|
- // TODO: support HEX
|
|
|
- Tokenizer.trim(data, s + 6, s + 11);
|
|
|
- sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
|
|
|
-
|
|
|
- // 13 - 16 Atom Atom name.
|
|
|
- TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
|
|
|
-
|
|
|
- // 17 Character Alternate location indicator.
|
|
|
- if (str.charCodeAt(s + 16) === 32) { // ' '
|
|
|
- TokenBuilder.add(sites.label_alt_id, 0, 0);
|
|
|
- } else {
|
|
|
- TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
|
|
|
- }
|
|
|
-
|
|
|
- // 18 - 20 Residue name Residue name.
|
|
|
- TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
|
|
|
-
|
|
|
- // 22 Character Chain identifier.
|
|
|
- TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
|
|
|
-
|
|
|
- // 23 - 26 Integer Residue sequence number.
|
|
|
- // TODO: support HEX
|
|
|
- TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
|
|
|
-
|
|
|
- // 27 AChar Code for insertion of residues.
|
|
|
- if (str.charCodeAt(s + 26) === 32) { // ' '
|
|
|
- TokenBuilder.add(sites.label_alt_id, 0, 0);
|
|
|
- } else {
|
|
|
- TokenBuilder.add(sites.label_alt_id, s + 26, s + 27);
|
|
|
- }
|
|
|
-
|
|
|
- // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
|
|
|
- TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
|
|
|
-
|
|
|
- // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
|
|
|
- TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
|
|
|
-
|
|
|
- // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
|
|
|
- TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
|
|
|
-
|
|
|
- // 55 - 60 Real(6.2) Occupancy.
|
|
|
- TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
|
|
|
-
|
|
|
- // 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
|
|
|
- if (length >= 66) {
|
|
|
- TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
|
|
|
- } else {
|
|
|
- TokenBuilder.add(sites.label_alt_id, 0, 0);
|
|
|
- }
|
|
|
-
|
|
|
- // 73 - 76 LString(4) Segment identifier, left-justified.
|
|
|
- // ignored
|
|
|
-
|
|
|
- // 77 - 78 LString(2) Element symbol, right-justified.
|
|
|
- if (length >= 78) {
|
|
|
- Tokenizer.trim(data, s + 76, s + 78);
|
|
|
-
|
|
|
- if (data.tokenStart < data.tokenEnd) {
|
|
|
- TokenBuilder.addToken(sites.type_symbol, data);
|
|
|
- } else {
|
|
|
- guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
|
|
|
- }
|
|
|
- } else {
|
|
|
- guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16)
|
|
|
- }
|
|
|
-
|
|
|
- sites.pdbx_PDB_model_num[sites.index] = model;
|
|
|
-
|
|
|
- sites.index++;
|
|
|
-}
|
|
|
+import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site';
|
|
|
|
|
|
export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
|
|
|
const { lines } = pdb;
|
|
@@ -301,7 +157,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> {
|
|
|
const categories = {
|
|
|
entity: entityBuilder.getEntityCategory(),
|
|
|
chem_comp: componentBuilder.getChemCompCategory(),
|
|
|
- atom_site: CifCategory.ofFields('atom_site', getAomSite(atomSite))
|
|
|
+ atom_site: CifCategory.ofFields('atom_site', getAtomSite(atomSite))
|
|
|
} as any;
|
|
|
|
|
|
for (const c of helperCategories) {
|