sequence.ts 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /**
  2. * Copyright (c) 2018-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
  8. import { Column } from '../../mol-data/db';
  9. import { assertUnreachable } from '../../mol-util/type-helpers';
  10. // TODO add mapping support to other sequence spaces, e.g. uniprot
  11. type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic
  12. namespace Sequence {
  13. export const enum Kind {
  14. Protein = 'protein',
  15. RNA = 'RNA',
  16. DNA = 'DNA',
  17. Generic = 'generic'
  18. }
  19. export interface Base<K extends Kind, Alphabet extends string> {
  20. readonly kind: K,
  21. readonly length: number,
  22. /** One letter code */
  23. readonly code: Column<Alphabet>
  24. readonly label: Column<string>
  25. readonly seqId: Column<number>
  26. /** Component id */
  27. readonly compId: Column<string>
  28. /** returns index for given seqId */
  29. readonly index: (seqId: number) => number
  30. /** maps seqId to list of compIds */
  31. readonly microHet: ReadonlyMap<number, string[]>
  32. }
  33. export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
  34. export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
  35. export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
  36. export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
  37. export function getSequenceString(seq: Sequence) {
  38. const array = seq.code.toArray();
  39. return (array instanceof Array ? array : Array.from(array)).join('');
  40. }
  41. function determineKind(names: Column<string>) {
  42. for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) {
  43. const name = names.value(i) || '';
  44. if (getProteinOneLetterCode(name) !== 'X') return Kind.Protein;
  45. if (getRnaOneLetterCode(name) !== 'X') return Kind.RNA;
  46. if (getDnaOneLetterCode(name) !== 'X') return Kind.DNA;
  47. }
  48. return Kind.Generic;
  49. }
  50. function codeProvider(kind: Kind, map?: ReadonlyMap<string, string>) {
  51. let code: (name: string) => string;
  52. switch (kind) {
  53. case Kind.Protein: code = getProteinOneLetterCode; break;
  54. case Kind.DNA: code = getDnaOneLetterCode; break;
  55. case Kind.RNA: code = getRnaOneLetterCode; break;
  56. case Kind.Generic: code = () => 'X'; break;
  57. default: assertUnreachable(kind);
  58. }
  59. if (map && map.size > 0) {
  60. return (name: string) => {
  61. const ret = code(name);
  62. if (ret !== 'X' || !map.has(name)) return ret;
  63. return code(map.get(name)!);
  64. };
  65. }
  66. return code;
  67. }
  68. export function ofResidueNames(compId: Column<string>, seqId: Column<number>): Sequence {
  69. if (seqId.rowCount === 0) throw new Error('cannot be empty');
  70. const kind = determineKind(compId);
  71. return new ResidueNamesImpl(kind, compId, seqId) as Sequence;
  72. }
  73. class ResidueNamesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
  74. public length: number;
  75. public code: Column<Alphabet>;
  76. public label: Column<string>;
  77. public seqId: Column<number>;
  78. public compId: Column<string>;
  79. public microHet: ReadonlyMap<number, string[]> = new Map();
  80. private indexMap: Map<number, number>;
  81. index(seqId: number) {
  82. return this.indexMap.get(seqId)!;
  83. }
  84. constructor(public kind: K, compId: Column<string>, seqId: Column<number>) {
  85. const codeFromName = codeProvider(kind);
  86. const codes: string[] = [];
  87. const compIds: string[] = [];
  88. const seqIds: number[] = [];
  89. const microHet = new Map<number, string[]>();
  90. let idx = 0;
  91. const indexMap = new Map<number, number>();
  92. for (let i = 0, il = seqId.rowCount; i < il; ++i) {
  93. const seq_id = seqId.value(i);
  94. if (!indexMap.has(seq_id)) {
  95. indexMap.set(seq_id, idx);
  96. const comp_id = compId.value(i);
  97. compIds[idx] = comp_id;
  98. seqIds[idx] = seq_id;
  99. codes[idx] = codeFromName(comp_id);
  100. idx += 1;
  101. } else {
  102. // micro-heterogeneity
  103. if (!microHet.has(seq_id)) {
  104. microHet.set(seq_id, [compIds[indexMap.get(seq_id)!], compId.value(i)]);
  105. } else {
  106. microHet.get(seq_id)!.push(compId.value(i));
  107. }
  108. }
  109. }
  110. const labels: string[] = [];
  111. for (let i = 0, il = idx; i < il; ++i) {
  112. const mh = microHet.get(seqIds[i]);
  113. if (mh) {
  114. const l = mh.map(id => {
  115. const c = codeFromName(id);
  116. return c === 'X' ? id : c;
  117. });
  118. labels[i] = `(${l.join('|')})`;
  119. } else {
  120. labels[i] = codes[i] === 'X' ? compIds[i] : codes[i];
  121. }
  122. }
  123. this.length = idx;
  124. this.code = Column.ofStringArray(codes) as Column<Alphabet>;
  125. this.compId = Column.ofStringArray(compIds);
  126. this.seqId = Column.ofIntArray(seqIds);
  127. this.label = Column.ofStringArray(labels);
  128. this.microHet = microHet;
  129. this.indexMap = indexMap;
  130. }
  131. }
  132. export function ofSequenceRanges(seqIdBegin: Column<number>, seqIdEnd: Column<number>): Sequence {
  133. const kind = Kind.Generic;
  134. return new SequenceRangesImpl(kind, seqIdBegin, seqIdEnd) as Sequence;
  135. }
  136. class SequenceRangesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
  137. public length: number;
  138. public code: Column<Alphabet>;
  139. public label: Column<string>;
  140. public seqId: Column<number>;
  141. public compId: Column<string>;
  142. public microHet: ReadonlyMap<number, string[]> = new Map();
  143. private minSeqId: number;
  144. index(seqId: number) {
  145. return seqId - this.minSeqId;
  146. }
  147. constructor(public kind: K, private seqIdStart: Column<number>, private seqIdEnd: Column<number>) {
  148. let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
  149. for (let i = 0, _i = this.seqIdStart.rowCount; i < _i; i++) {
  150. const idStart = this.seqIdStart.value(i);
  151. const idEnd = this.seqIdEnd.value(i);
  152. if (idStart < minSeqId) minSeqId = idStart;
  153. if (maxSeqId < idEnd) maxSeqId = idEnd;
  154. }
  155. const count = maxSeqId - minSeqId + 1;
  156. this.code = Column.ofConst('X', count, Column.Schema.str) as Column<Alphabet>;
  157. this.label = Column.ofConst('', count, Column.Schema.str);
  158. this.seqId = Column.ofLambda({
  159. value: row => row + minSeqId + 1,
  160. rowCount: count,
  161. schema: Column.Schema.int
  162. });
  163. this.compId = Column.ofConst('', count, Column.Schema.str);
  164. this.length = count;
  165. this.minSeqId = minSeqId;
  166. }
  167. }
  168. }
  169. export { Sequence };