sequence.ts 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /**
  2. * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
  3. *
  4. * @author David Sehnal <david.sehnal@gmail.com>
  5. * @author Alexander Rose <alexander.rose@weirdbyte.de>
  6. */
  7. import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants';
  8. import { Column } from '../../mol-data/db'
  9. // TODO add mapping support to other sequence spaces, e.g. uniprot
  10. // TODO sequence alignment (take NGL code as starting point)
  11. type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic
  12. namespace Sequence {
  13. export const enum Kind {
  14. Protein = 'protein',
  15. RNA = 'RNA',
  16. DNA = 'DNA',
  17. Generic = 'generic'
  18. }
  19. export interface Base<K extends Kind, Alphabet extends string> {
  20. readonly kind: K,
  21. readonly length: number,
  22. readonly offset: number,
  23. readonly code: Column<Alphabet>
  24. readonly label: Column<string>
  25. readonly seqId: Column<number>
  26. readonly compId: Column<string>
  27. /** maps seqId to list of compIds */
  28. readonly microHet: ReadonlyMap<number, string[]>
  29. }
  30. export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
  31. export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
  32. export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
  33. export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
  34. export function create<K extends Kind, Alphabet extends string>(kind: K, code: Column<Alphabet>, label: Column<string>, seqId: Column<number>, compId: Column<string>, microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> {
  35. const length = code.rowCount
  36. return { kind, code, label, seqId, compId, microHet, offset, length };
  37. }
  38. export function getSequenceString(seq: Sequence) {
  39. const array = seq.code.toArray()
  40. return (array instanceof Array ? array : Array.from(array)).join('')
  41. }
  42. function determineKind(names: Column<string>) {
  43. for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) {
  44. const name = names.value(i) || '';
  45. if (getProteinOneLetterCode(name) !== 'X') return Kind.Protein;
  46. if (getRnaOneLetterCode(name) !== 'X') return Kind.RNA;
  47. if (getDnaOneLetterCode(name) !== 'X') return Kind.DNA;
  48. }
  49. return Kind.Generic;
  50. }
  51. function codeProvider(kind: Kind, map?: ReadonlyMap<string, string>) {
  52. let code: (name: string) => string
  53. switch (kind) {
  54. case Kind.Protein: code = getProteinOneLetterCode; break;
  55. case Kind.DNA: code = getDnaOneLetterCode; break;
  56. case Kind.RNA: code = getRnaOneLetterCode; break;
  57. case Kind.Generic: code = () => 'X'; break;
  58. default: throw new Error(`unknown kind '${kind}'`)
  59. }
  60. if (map && map.size > 0) {
  61. return (name: string) => {
  62. const ret = code(name);
  63. if (ret !== 'X' || !map.has(name)) return ret;
  64. return code(map.get(name)!);
  65. }
  66. }
  67. return code
  68. }
  69. export function ofResidueNames(compId: Column<string>, seqId: Column<number>): Sequence {
  70. if (seqId.rowCount === 0) throw new Error('cannot be empty');
  71. const kind = determineKind(compId);
  72. return new ResidueNamesImpl(kind, compId, seqId) as Sequence;
  73. }
  74. class ResidueNamesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
  75. private _offset = 0;
  76. private _length = 0;
  77. private _microHet: ReadonlyMap<number, string[]> | undefined = void 0;
  78. private _code: Column<Alphabet> | undefined = undefined
  79. private _label: Column<string> | undefined = undefined
  80. private codeFromName: (name: string) => string
  81. get code(): Column<Alphabet> {
  82. if (this._code !== void 0) return this._code;
  83. this.create();
  84. return this._code!;
  85. }
  86. get label(): Column<string> {
  87. if (this._label !== void 0) return this._label;
  88. this.create();
  89. return this._label!;
  90. }
  91. get offset() {
  92. if (this._code !== void 0) return this._offset;
  93. this.create();
  94. return this._offset;
  95. }
  96. get length() {
  97. if (this._code !== void 0) return this._length;
  98. this.create();
  99. return this._length;
  100. }
  101. get microHet(): ReadonlyMap<number, string[]> {
  102. if (this._microHet !== void 0) return this._microHet;
  103. this.create();
  104. return this._microHet!;
  105. }
  106. private create() {
  107. let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
  108. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  109. const id = this.seqId.value(i);
  110. if (maxSeqId < id) maxSeqId = id;
  111. if (id < minSeqId) minSeqId = id;
  112. }
  113. const count = maxSeqId - minSeqId + 1;
  114. const sequenceArray = new Array<string>(maxSeqId + 1);
  115. const labels = new Array<string[]>(maxSeqId + 1);
  116. for (let i = 0; i < count; i++) {
  117. sequenceArray[i] = '-';
  118. labels[i] = [];
  119. }
  120. const compIds = new Array<string[]>(maxSeqId + 1);
  121. for (let i = minSeqId; i <= maxSeqId; ++i) {
  122. compIds[i] = [];
  123. }
  124. for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
  125. const seqId = this.seqId.value(i)
  126. const idx = seqId - minSeqId;
  127. const name = this.compId.value(i);
  128. const code = this.codeFromName(name);
  129. // in case of MICROHETEROGENEITY `sequenceArray[idx]` may already be set
  130. if (!sequenceArray[idx] || sequenceArray[idx] === '-') {
  131. sequenceArray[idx] = code;
  132. }
  133. labels[idx].push(code === 'X' ? name : code);
  134. compIds[seqId].push(name);
  135. }
  136. const microHet = new Map()
  137. for (let i = minSeqId; i <= maxSeqId; ++i) {
  138. if (compIds[i].length > 1) microHet.set(i, compIds[i])
  139. }
  140. this._code = Column.ofStringArray(sequenceArray) as Column<Alphabet>
  141. this._label = Column.ofLambda({
  142. value: i => {
  143. const l = labels[i]
  144. return l.length > 1 ? `(${l.join('|')})` : l.join('')
  145. },
  146. rowCount: labels.length,
  147. schema: Column.Schema.str
  148. })
  149. this._microHet = microHet
  150. this._offset = minSeqId - 1;
  151. this._length = count
  152. }
  153. constructor(public kind: K, public compId: Column<string>, public seqId: Column<number>) {
  154. this.codeFromName = codeProvider(kind)
  155. }
  156. }
  157. export function ofSequenceRanges(seqIdBegin: Column<number>, seqIdEnd: Column<number>): Sequence {
  158. const kind = Kind.Generic
  159. return new SequenceRangesImpl(kind, seqIdBegin, seqIdEnd) as Sequence;
  160. }
  161. class SequenceRangesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> {
  162. public offset: number
  163. public length: number
  164. public code: Column<Alphabet>
  165. public label: Column<string>
  166. public seqId: Column<number>
  167. public compId: Column<string>
  168. public microHet: ReadonlyMap<number, string[]>
  169. constructor(public kind: K, private seqIdStart: Column<number>, private seqIdEnd: Column<number>) {
  170. let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER;
  171. for (let i = 0, _i = this.seqIdStart.rowCount; i < _i; i++) {
  172. const idStart = this.seqIdStart.value(i);
  173. const idEnd = this.seqIdEnd.value(i);
  174. if (idStart < minSeqId) minSeqId = idStart;
  175. if (maxSeqId < idEnd) maxSeqId = idEnd;
  176. }
  177. const count = maxSeqId - minSeqId + 1;
  178. this.code = Column.ofConst('X', count, Column.Schema.str) as Column<Alphabet>
  179. this.label = Column.ofConst('', count, Column.Schema.str)
  180. this.seqId = Column.ofLambda({
  181. value: row => row + minSeqId + 1,
  182. rowCount: count,
  183. schema: Column.Schema.int
  184. })
  185. this.compId = Column.ofConst('', count, Column.Schema.str)
  186. this.offset = minSeqId - 1;
  187. this.length = count
  188. }
  189. }
  190. }
  191. export { Sequence }