sequence.ts 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /**
  2. * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. */
  6. import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
  7. import { Column } from 'mol-data/db'
  8. // TODO add mapping support to other sequence spaces, e.g. uniprot
  9. // TODO sequence alignment (take NGL code as starting point)
  10. type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic
  11. namespace Sequence {
  12. export const enum Kind {
  13. Protein = 'protein',
  14. RNA = 'RNA',
  15. DNA = 'DNA',
  16. Generic = 'generic'
  17. }
  18. export interface Base<K extends Kind, Alphabet extends string> {
  19. readonly kind: K,
  20. readonly offset: number,
  21. readonly sequence: ArrayLike<Alphabet>
  22. }
  23. export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
  24. export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
  25. export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
  26. export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
  27. export function create(kind: Kind, sequence: string, offset: number = 0): Sequence {
  28. return { kind: kind as any, sequence: sequence as any, offset };
  29. }
  30. export function getSequenceString(seq: Sequence) {
  31. return seq.sequence as string;
  32. }
  33. function determineKind(names: Column<string>) {
  34. for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) {
  35. const name = names.value(i) || '';
  36. if (getProteinOneLetterCode(name) !== 'X') return { kind: Kind.Protein, code: getProteinOneLetterCode };
  37. if (getRnaOneLetterCode(name) !== 'X') return { kind: Kind.RNA, code: getRnaOneLetterCode };
  38. if (getDnaOneLetterCode(name) !== 'X') return { kind: Kind.DNA, code: getDnaOneLetterCode };
  39. }
  40. return { kind: Kind.Generic, code: (v: string) => 'X' };
  41. }
  42. function modCode(code: (name: string) => string, map: Map<string, string>): (name: string) => string {
  43. return n => {
  44. const ret = code(n);
  45. if (ret !== 'X' || !map.has(n)) return ret;
  46. return code(map.get(n)!);
  47. }
  48. }
  49. export function ofResidueNames(residueName: Column<string>, seqId: Column<number>, modifiedMap?: Map<string, string>): Sequence {
  50. if (seqId.rowCount === 0) throw new Error('cannot be empty');
  51. const { kind, code } = determineKind(residueName);
  52. if (!modifiedMap || modifiedMap.size === 0) return new Impl(kind, residueName, seqId, code) as Sequence;
  53. return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence;
  54. }
  55. class Impl implements Base<any, any> {
  56. private _offset = 0;
  57. private _seq: string | undefined = void 0;
  58. get offset() {
  59. if (this._seq !== void 0) return this._offset;
  60. this.create();
  61. return this._offset;
  62. }
  63. get sequence(): any {
  64. if (this._seq !== void 0) return this._seq;
  65. this.create();
  66. return this._seq;
  67. }
  68. private create() {
  69. let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
  70. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  71. const id = this.seqId.value(i);
  72. if (maxSeqId < id) maxSeqId = id;
  73. if (id < minSeqId) minSeqId = id;
  74. }
  75. const count = maxSeqId - minSeqId + 1;
  76. const sequenceArray = new Array(maxSeqId + 1);
  77. for (let i = 0; i < count; i++) {
  78. sequenceArray[i] = '-';
  79. }
  80. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  81. sequenceArray[this.seqId.value(i) - minSeqId] = this.code(this.residueName.value(i) || '');
  82. }
  83. this._seq = sequenceArray.join('');
  84. this._offset = minSeqId - 1;
  85. }
  86. constructor(public kind: Kind, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
  87. }
  88. }
  89. }
  90. export { Sequence }