sequence.ts 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. /**
  2. * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
  8. import { Column } from '../../mol-data/db'
  9. // TODO add mapping support to other sequence spaces, e.g. uniprot
  10. // TODO sequence alignment (take NGL code as starting point)
  11. type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic
  12. namespace Sequence {
  13. export const enum Kind {
  14. Protein = 'protein',
  15. RNA = 'RNA',
  16. DNA = 'DNA',
  17. Generic = 'generic'
  18. }
  19. export interface Base<K extends Kind, Alphabet extends string> {
  20. readonly kind: K,
  21. readonly offset: number,
  22. readonly sequence: ArrayLike<Alphabet>
  23. readonly labels: ArrayLike<string>
  24. /** maps seqId to list of compIds */
  25. readonly microHet: ReadonlyMap<number, string[]>
  26. }
  27. export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
  28. export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
  29. export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
  30. export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
  31. export function create<K extends Kind, Alphabet extends string>(kind: K, sequence: Alphabet[], labels: string[], microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> {
  32. return { kind: kind, sequence: sequence, labels, microHet, offset };
  33. }
  34. export function getSequenceString(seq: Sequence) {
  35. return seq.sequence as string;
  36. }
  37. function determineKind(names: Column<string>) {
  38. for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) {
  39. const name = names.value(i) || '';
  40. if (getProteinOneLetterCode(name) !== 'X') return { kind: Kind.Protein, code: getProteinOneLetterCode };
  41. if (getRnaOneLetterCode(name) !== 'X') return { kind: Kind.RNA, code: getRnaOneLetterCode };
  42. if (getDnaOneLetterCode(name) !== 'X') return { kind: Kind.DNA, code: getDnaOneLetterCode };
  43. }
  44. return { kind: Kind.Generic, code: (v: string) => 'X' };
  45. }
  46. function modCode(code: (name: string) => string, map: ReadonlyMap<string, string>): (name: string) => string {
  47. return n => {
  48. const ret = code(n);
  49. if (ret !== 'X' || !map.has(n)) return ret;
  50. return code(map.get(n)!);
  51. }
  52. }
  53. export function ofResidueNames(residueName: Column<string>, seqId: Column<number>, modifiedMap?: ReadonlyMap<string, string>): Sequence {
  54. if (seqId.rowCount === 0) throw new Error('cannot be empty');
  55. const { kind, code } = determineKind(residueName);
  56. if (!modifiedMap || modifiedMap.size === 0) {
  57. return new Impl(kind, residueName, seqId, code) as Sequence;
  58. }
  59. return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence;
  60. }
  61. class Impl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
  62. private _offset = 0;
  63. private _seq: ArrayLike<Alphabet> | undefined = void 0;
  64. private _labels: ArrayLike<string> | undefined = void 0;
  65. private _microHet: ReadonlyMap<number, string[]> | undefined = void 0;
  66. get offset() {
  67. if (this._seq !== void 0) return this._offset;
  68. this.create();
  69. return this._offset;
  70. }
  71. get sequence(): ArrayLike<Alphabet> {
  72. if (this._seq !== void 0) return this._seq;
  73. this.create();
  74. return this._seq!;
  75. }
  76. get labels(): ArrayLike<string> {
  77. if (this._labels !== void 0) return this._labels;
  78. this.create();
  79. return this._labels!;
  80. }
  81. get microHet(): ReadonlyMap<number, string[]> {
  82. if (this._microHet !== void 0) return this._microHet;
  83. this.create();
  84. return this._microHet!;
  85. }
  86. private create() {
  87. let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
  88. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  89. const id = this.seqId.value(i);
  90. if (maxSeqId < id) maxSeqId = id;
  91. if (id < minSeqId) minSeqId = id;
  92. }
  93. const count = maxSeqId - minSeqId + 1;
  94. const sequenceArray = new Array<string>(maxSeqId + 1);
  95. const labels = new Array<string[]>(maxSeqId + 1);
  96. for (let i = 0; i < count; i++) {
  97. sequenceArray[i] = '-';
  98. labels[i] = [];
  99. }
  100. const compIds = new Array<string[]>(maxSeqId + 1);
  101. for (let i = minSeqId; i <= maxSeqId; ++i) {
  102. compIds[i] = [];
  103. }
  104. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  105. const seqId = this.seqId.value(i)
  106. const idx = seqId - minSeqId;
  107. const name = this.residueName.value(i);
  108. const code = this.code(name);
  109. // in case of MICROHETEROGENEITY `sequenceArray[idx]` may already be set
  110. if (!sequenceArray[idx] || sequenceArray[idx] === '-') {
  111. sequenceArray[idx] = code;
  112. }
  113. labels[idx].push(code === 'X' ? name : code);
  114. compIds[seqId].push(name);
  115. }
  116. const microHet = new Map()
  117. for (let i = minSeqId; i <= maxSeqId; ++i) {
  118. if (compIds[i].length > 1) microHet.set(i, compIds[i])
  119. }
  120. this._seq = sequenceArray.join('') as unknown as ArrayLike<Alphabet>;
  121. this._labels = labels.map(l => l.length > 1 ? `(${l.join('|')})` : l.join(''));
  122. this._microHet = microHet
  123. this._offset = minSeqId - 1;
  124. }
  125. constructor(public kind: K, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) {
  126. }
  127. }
  128. }
  129. export { Sequence }