/**
 * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
 * @author David Sehnal <david.sehnal@gmail.com>
 */

import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif'
import StructureSequence from '../../properties/sequence'
import { Column } from 'mol-data/db';
import { AtomicHierarchy } from '../../properties/atomic';
import { Entities } from '../../properties/common';
import { Sequence } from '../../../../sequence';

// TODO how to handle microheterogeneity
//    see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html
//
// Data items in the ENTITY_POLY_SEQ category specify the sequence
// of monomers in a polymer. Allowance is made for the possibility
// of microheterogeneity in a sample by allowing a given sequence
// number to be correlated with more than one monomer ID. The
// corresponding ATOM_SITE entries should reflect this
// heterogeneity.

export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): StructureSequence {
    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy);

    const { entity_id, num, mon_id } = cif.entity_poly_seq;

    const byEntityKey: StructureSequence['byEntityKey'] = {};
    const count = entity_id.rowCount;

    let i = 0;
    while (i < count) {
        const start = i;
        while (i < count - 1 && entity_id.areValuesEqual(i, i + 1)) i++;
        i++;

        const id = entity_id.value(start);
        const _compId = Column.window(mon_id, start, i);
        const _num = Column.window(num, start, i);

        byEntityKey[entities.getEntityIndex(id)] = {
            entityId: id,
            compId: _compId,
            num: _num,
            sequence: Sequence.ofResidueNames(_compId, _num)
        };
    }

    return { byEntityKey };
}