SeqAn3
The Modern C++ library for sequence analysis.
format_bam.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iterator>
16 #include <string>
17 #include <vector>
18 
19 #include <seqan3/alphabet/detail/convert.hpp>
43 #include <seqan3/std/concepts>
44 #include <seqan3/std/ranges>
45 
46 namespace seqan3
47 {
48 
59 class format_bam : private detail::format_sam_base
60 {
61 public:
65  format_bam() noexcept = default;
66  format_bam(format_bam const &) noexcept = default;
67  format_bam & operator=(format_bam const &) noexcept = default;
68  format_bam(format_bam &&) noexcept = default;
69  format_bam & operator=(format_bam &&) noexcept = default;
70  ~format_bam() noexcept = default;
71 
75  {
76  { "bam" }
77  };
78 
79 protected:
80  template <typename stream_type, // constraints checked by file
81  typename seq_legal_alph_type,
82  typename ref_seqs_type,
83  typename ref_ids_type,
84  typename seq_type,
85  typename id_type,
86  typename offset_type,
87  typename ref_seq_type,
88  typename ref_id_type,
89  typename ref_offset_type,
90  typename align_type,
91  typename cigar_type,
92  typename flag_type,
93  typename mapq_type,
94  typename qual_type,
95  typename mate_type,
96  typename tag_dict_type,
97  typename e_value_type,
98  typename bit_score_type>
99  void read_alignment_record(stream_type & stream,
100  alignment_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
101  ref_seqs_type & ref_seqs,
103  seq_type & seq,
104  qual_type & qual,
105  id_type & id,
106  offset_type & offset,
107  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
108  ref_id_type & ref_id,
109  ref_offset_type & ref_offset,
110  align_type & align,
111  cigar_type & cigar_vector,
112  flag_type & flag,
113  mapq_type & mapq,
114  mate_type & mate,
115  tag_dict_type & tag_dict,
116  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
117  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
118 
119  template <typename stream_type,
120  typename header_type,
121  typename seq_type,
122  typename id_type,
123  typename ref_seq_type,
124  typename ref_id_type,
125  typename align_type,
126  typename cigar_type,
127  typename qual_type,
128  typename mate_type,
129  typename tag_dict_type>
130  void write_alignment_record([[maybe_unused]] stream_type & stream,
131  [[maybe_unused]] alignment_file_output_options const & options,
132  [[maybe_unused]] header_type && header,
133  [[maybe_unused]] seq_type && seq,
134  [[maybe_unused]] qual_type && qual,
135  [[maybe_unused]] id_type && id,
136  [[maybe_unused]] int32_t const offset,
137  [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
138  [[maybe_unused]] ref_id_type && ref_id,
139  [[maybe_unused]] std::optional<int32_t> ref_offset,
140  [[maybe_unused]] align_type && align,
141  [[maybe_unused]] cigar_type && cigar_vector,
142  [[maybe_unused]] sam_flag const flag,
143  [[maybe_unused]] uint8_t const mapq,
144  [[maybe_unused]] mate_type && mate,
145  [[maybe_unused]] tag_dict_type && tag_dict,
146  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
147  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score));
148 
149 private:
151  bool header_was_read{false};
152 
154  std::string string_buffer{};
155 
157  struct alignment_record_core
158  { // naming corresponds to official SAM/BAM specifications
159  int32_t block_size;
160  int32_t refID;
161  int32_t pos;
162  uint32_t l_read_name:8;
163  uint32_t mapq:8;
164  uint32_t bin:16;
165  uint32_t n_cigar_op:16;
166  sam_flag flag;
167  int32_t l_seq;
168  int32_t next_refID;
169  int32_t next_pos;
170  int32_t tlen;
171  };
172 
174  static constexpr std::array<uint8_t, 256> char_to_sam_rank
175  {
176  [] () constexpr
177  {
179 
180  using index_t = std::make_unsigned_t<char>;
181 
182  // ret['M'] = 0; set anyway by initialization
183  ret[static_cast<index_t>('I')] = 1;
184  ret[static_cast<index_t>('D')] = 2;
185  ret[static_cast<index_t>('N')] = 3;
186  ret[static_cast<index_t>('S')] = 4;
187  ret[static_cast<index_t>('H')] = 5;
188  ret[static_cast<index_t>('P')] = 6;
189  ret[static_cast<index_t>('=')] = 7;
190  ret[static_cast<index_t>('X')] = 8;
191 
192  return ret;
193  }()
194  };
195 
197  static uint16_t reg2bin(int32_t beg, int32_t end) noexcept
198  {
199  --end;
200  if (beg >> 14 == end >> 14) return ((1 << 15) - 1) / 7 + (beg >> 14);
201  if (beg >> 17 == end >> 17) return ((1 << 12) - 1) / 7 + (beg >> 17);
202  if (beg >> 20 == end >> 20) return ((1 << 9) - 1) / 7 + (beg >> 20);
203  if (beg >> 23 == end >> 23) return ((1 << 6) - 1) / 7 + (beg >> 23);
204  if (beg >> 26 == end >> 26) return ((1 << 3) - 1) / 7 + (beg >> 26);
205  return 0;
206  }
207 
208  using format_sam_base::read_field; // inherit read_field functions from format_base explicitly
209 
216  template <typename stream_view_type, std::integral number_type>
217  void read_field(stream_view_type && stream_view, number_type & target)
218  {
219  std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(target), reinterpret_cast<char *>(&target));
220  }
221 
227  template <typename stream_view_type>
228  void read_field(stream_view_type && stream_view, float & target)
229  {
230  std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(int32_t), reinterpret_cast<char *>(&target));
231  }
232 
243  template <typename stream_view_type, typename optional_value_type>
244  void read_field(stream_view_type && stream_view, std::optional<optional_value_type> & target)
245  {
246  optional_value_type tmp;
247  read_field(std::forward<stream_view_type>(stream_view), tmp);
248  target = tmp;
249  }
250 
251  template <typename stream_view_type, typename value_type>
252  void read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
253  stream_view_type && stream_view,
254  value_type const & SEQAN3_DOXYGEN_ONLY(value));
255 
256  template <typename stream_view_type>
257  void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
258 
259  template <typename cigar_input_type>
260  auto parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op) const;
261 
262  static std::string get_tag_dict_str(sam_tag_dictionary const & tag_dict);
263 };
264 
266 template <typename stream_type, // constraints checked by file
267  typename seq_legal_alph_type,
268  typename ref_seqs_type,
269  typename ref_ids_type,
270  typename seq_type,
271  typename id_type,
272  typename offset_type,
273  typename ref_seq_type,
274  typename ref_id_type,
275  typename ref_offset_type,
276  typename align_type,
277  typename cigar_type,
278  typename flag_type,
279  typename mapq_type,
280  typename qual_type,
281  typename mate_type,
282  typename tag_dict_type,
283  typename e_value_type,
284  typename bit_score_type>
285 inline void format_bam::read_alignment_record(stream_type & stream,
286  alignment_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
287  ref_seqs_type & ref_seqs,
289  seq_type & seq,
290  qual_type & qual,
291  id_type & id,
292  offset_type & offset,
293  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
294  ref_id_type & ref_id,
295  ref_offset_type & ref_offset,
296  align_type & align,
297  cigar_type & cigar_vector,
298  flag_type & flag,
299  mapq_type & mapq,
300  mate_type & mate,
301  tag_dict_type & tag_dict,
302  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
303  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
304 {
305  static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
306  detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
307  "The ref_offset must be a specialisation of std::optional.");
308 
309  static_assert(detail::decays_to_ignore_v<mapq_type> || std::same_as<mapq_type, uint8_t>,
310  "The type of field::mapq must be uint8_t.");
311 
312  static_assert(detail::decays_to_ignore_v<flag_type> || std::same_as<flag_type, sam_flag>,
313  "The type of field::flag must be seqan3::sam_flag.");
314 
316  auto stream_view = std::ranges::subrange<decltype(stream_buf_t{stream}), decltype(stream_buf_t{})>
317  {stream_buf_t{stream}, stream_buf_t{}};
318 
319  // these variables need to be stored to compute the ALIGNMENT
320  [[maybe_unused]] int32_t offset_tmp{};
321  [[maybe_unused]] int32_t soft_clipping_end{};
322  [[maybe_unused]] std::vector<cigar> tmp_cigar_vector{};
323  [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query
324 
325  // Header
326  // -------------------------------------------------------------------------------------------------------------
327  if (!header_was_read)
328  {
329  // magic BAM string
330  if (!std::ranges::equal(stream_view | views::take_exactly_or_throw(4), std::string_view{"BAM\1"}))
331  throw format_error{"File is not in BAM format."};
332 
333  int32_t tmp32{};
334  read_field(stream_view, tmp32);
335 
336  if (tmp32 > 0) // header text is present
337  read_header(stream_view | views::take_exactly_or_throw(tmp32)
338  | views::take_until_and_consume(is_char<'\0'>),
339  header,
340  ref_seqs);
341 
342  int32_t n_ref;
343  read_field(stream_view, n_ref);
344 
345  for (int32_t ref_idx = 0; ref_idx < n_ref; ++ref_idx)
346  {
347  read_field(stream_view, tmp32); // l_name (length of reference name including \0 character)
348 
349  string_buffer.resize(tmp32 - 1);
350  std::ranges::copy_n(std::ranges::begin(stream_view), tmp32 - 1, string_buffer.data()); // copy without \0 character
351  std::ranges::next(std::ranges::begin(stream_view)); // skip \0 character
352 
353  read_field(stream_view, tmp32); // l_ref (length of reference sequence)
354 
355  auto id_it = header.ref_dict.find(string_buffer);
356 
357  // sanity checks of reference information to existing header object:
358  if (id_it == header.ref_dict.end()) // [unlikely]
359  {
360  throw format_error{detail::to_string("Unknown reference name '" + string_buffer +
361  "' found in BAM file header (header.ref_ids():",
362  header.ref_ids(), ").")};
363  }
364  else if (id_it->second != ref_idx) // [unlikely]
365  {
366  throw format_error{detail::to_string("Reference id '", string_buffer, "' at position ", ref_idx,
367  " does not correspond to the position ", id_it->second,
368  " in the header (header.ref_ids():", header.ref_ids(), ").")};
369  }
370  else if (std::get<0>(header.ref_id_info[id_it->second]) != tmp32) // [unlikely]
371  {
372  throw format_error{"Provided reference has unequal length as specified in the header."};
373  }
374  }
375 
376  header_was_read = true;
377 
378  if (stream_buf_t{stream} == stream_buf_t{}) // no records follow
379  return;
380  }
381 
382  // read alignment record into buffer
383  // -------------------------------------------------------------------------------------------------------------
384  alignment_record_core core;
385  std::ranges::copy_n(stream_view.begin(), sizeof(core), reinterpret_cast<char *>(&core));
386 
387  if (core.refID >= static_cast<int32_t>(header.ref_ids().size()) || core.refID < -1) // [[unlikely]]
388  {
389  throw format_error{detail::to_string("Reference id index '", core.refID, "' is not in range of ",
390  "header.ref_ids(), which has size ", header.ref_ids().size(), ".")};
391  }
392  else if (core.refID > -1) // not unmapped
393  {
394  ref_id = core.refID; // field::ref_id
395  }
396 
397  flag = core.flag; // field::flag
398  mapq = core.mapq; // field::mapq
399 
400  if (core.pos > -1) // [[likely]]
401  ref_offset = core.pos; // field::ref_offset
402 
403  if constexpr (!detail::decays_to_ignore_v<mate_type>) // field::mate
404  {
405  if (core.next_refID > -1)
406  get<0>(mate) = core.next_refID;
407 
408  if (core.next_pos > -1) // [[likely]]
409  get<1>(mate) = core.next_pos;
410 
411  get<2>(mate) = core.tlen;
412  }
413 
414  // read id
415  // -------------------------------------------------------------------------------------------------------------
416  read_field(stream_view | views::take_exactly_or_throw(core.l_read_name - 1), id); // field::id
417  std::ranges::next(std::ranges::begin(stream_view)); // skip '\0'
418 
419  // read cigar string
420  // -------------------------------------------------------------------------------------------------------------
421  if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
422  {
423  std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op);
424  transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
425  // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying
426  }
427  else
428  {
429  detail::consume(stream_view | views::take_exactly_or_throw(core.n_cigar_op * 4));
430  }
431 
432  offset = offset_tmp;
433 
434  // read sequence
435  // -------------------------------------------------------------------------------------------------------------
436  if (core.l_seq > 0) // sequence information is given
437  {
438  auto seq_stream = stream_view
439  | views::take_exactly_or_throw(core.l_seq / 2) // one too short if uneven
441  {
442  return {sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(c) >> 4)),
443  sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(c) & 0x0f))};
444  });
445 
446  if constexpr (detail::decays_to_ignore_v<seq_type>)
447  {
448  if constexpr (!detail::decays_to_ignore_v<align_type>)
449  {
450  static_assert(sequence_container<std::remove_reference_t<decltype(get<1>(align))>>,
451  "If you want to read ALIGNMENT but not SEQ, the alignment"
452  " object must store a sequence container at the second (query) position.");
453 
454  if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given
455  {
456  assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end)); // sanity check
457  using alph_t = value_type_t<decltype(get<1>(align))>;
458  constexpr auto from_dna16 = detail::convert_through_char_representation<alph_t, sam_dna16>;
459 
460  get<1>(align).reserve(seq_length);
461 
462  auto tmp_iter = std::ranges::begin(seq_stream);
463  std::ranges::advance(tmp_iter, offset_tmp / 2); // skip soft clipped bases at the beginning
464 
465  if (offset_tmp & 1)
466  {
467  get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
468  ++tmp_iter;
469  --seq_length;
470  }
471 
472  for (size_t i = (seq_length / 2); i > 0; --i)
473  {
474  get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
475  get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
476  ++tmp_iter;
477  }
478 
479  if (seq_length & 1)
480  {
481  get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
482  ++tmp_iter;
483  --soft_clipping_end;
484  }
485 
486  std::ranges::advance(tmp_iter, (soft_clipping_end + !(seq_length & 1)) / 2);
487  }
488  else
489  {
490  get<1>(align) = std::remove_reference_t<decltype(get<1>(align))>{}; // assign empty container
491  }
492  }
493  else
494  {
495  detail::consume(seq_stream);
496  if (core.l_seq & 1)
497  std::ranges::next(std::ranges::begin(stream_view));
498  }
499  }
500  else
501  {
502  using alph_t = value_type_t<decltype(seq)>;
503  constexpr auto from_dna16 = detail::convert_through_char_representation<alph_t, sam_dna16>;
504 
505  for (auto [d1, d2] : seq_stream)
506  {
507  seq.push_back(from_dna16[to_rank(d1)]);
508  seq.push_back(from_dna16[to_rank(d2)]);
509  }
510 
511  if (core.l_seq & 1)
512  {
513  sam_dna16 d = sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(*std::ranges::begin(stream_view)) >> 4));
514  seq.push_back(from_dna16[to_rank(d)]);
515  std::ranges::next(std::ranges::begin(stream_view));
516  }
517 
518  if constexpr (!detail::decays_to_ignore_v<align_type>)
519  {
520  assign_unaligned(get<1>(align),
521  seq | views::slice(static_cast<decltype(std::ranges::distance(seq))>(offset_tmp),
522  std::ranges::distance(seq) - soft_clipping_end));
523  }
524  }
525  }
526 
527  // read qual string
528  // -------------------------------------------------------------------------------------------------------------
529  read_field(stream_view | views::take_exactly_or_throw(core.l_seq)
530  | std::views::transform([] (char chr) { return static_cast<char>(chr + 33); }), qual);
531 
532  // All remaining optional fields if any: SAM tags dictionary
533  // -------------------------------------------------------------------------------------------------------------
534  int32_t remaining_bytes = core.block_size - (sizeof(alignment_record_core) - 4/*block_size excluded*/) -
535  core.l_read_name - core.n_cigar_op * 4 - (core.l_seq + 1) / 2 - core.l_seq;
536  assert(remaining_bytes >= 0);
537  auto tags_view = stream_view | views::take_exactly_or_throw(remaining_bytes);
538 
539  while (tags_view.size() > 0)
540  read_field(tags_view, tag_dict);
541 
542  // DONE READING - wrap up
543  // -------------------------------------------------------------------------------------------------------------
544  if constexpr (!detail::decays_to_ignore_v<align_type>)
545  {
546  // Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the
547  // alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger
548  // than 65535 operations and is stored in the sam_tag_dictionary (tag GC).
549  if (core.l_seq != 0 && offset_tmp == core.l_seq)
550  {
551  if constexpr (detail::decays_to_ignore_v<tag_dict_type> | detail::decays_to_ignore_v<seq_type>)
552  { // maybe only throw in debug mode and otherwise return an empty alignment?
553  throw format_error{detail::to_string("The cigar string '", offset_tmp, "S", ref_length,
554  "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
555  "stored in the optional field CG. You need to read in the field::tags and "
556  "field::seq in order to access this information.")};
557  }
558  else
559  {
560  auto it = tag_dict.find("CG"_tag);
561 
562  if (it == tag_dict.end())
563  throw format_error{detail::to_string("The cigar string '", offset_tmp, "S", ref_length,
564  "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
565  "stored in the optional field CG but this tag is not present in the given ",
566  "record.")};
567 
568  auto cigar_view = std::views::all(std::get<std::string>(it->second));
569  std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(cigar_view);
570  offset_tmp = soft_clipping_end = 0;
571  transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
572 
573  assign_unaligned(get<1>(align),
574  seq | views::slice(static_cast<decltype(std::ranges::distance(seq))>(offset_tmp),
575  std::ranges::distance(seq) - soft_clipping_end));
576  tag_dict.erase(it); // remove redundant information
577  }
578  }
579 
580  // Alignment object construction
581  construct_alignment(align, tmp_cigar_vector, core.refID, ref_seqs, core.pos, ref_length); // inherited from SAM format
582  }
583 
584  if constexpr (!detail::decays_to_ignore_v<cigar_type>)
585  std::swap(cigar_vector, tmp_cigar_vector);
586 }
587 
589 template <typename stream_type,
590  typename header_type,
591  typename seq_type,
592  typename id_type,
593  typename ref_seq_type,
594  typename ref_id_type,
595  typename align_type,
596  typename cigar_type,
597  typename qual_type,
598  typename mate_type,
599  typename tag_dict_type>
600 inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & stream,
601  [[maybe_unused]] alignment_file_output_options const & options,
602  [[maybe_unused]] header_type && header,
603  [[maybe_unused]] seq_type && seq,
604  [[maybe_unused]] qual_type && qual,
605  [[maybe_unused]] id_type && id,
606  [[maybe_unused]] int32_t const offset,
607  [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
608  [[maybe_unused]] ref_id_type && ref_id,
609  [[maybe_unused]] std::optional<int32_t> ref_offset,
610  [[maybe_unused]] align_type && align,
611  [[maybe_unused]] cigar_type && cigar_vector,
612  [[maybe_unused]] sam_flag const flag,
613  [[maybe_unused]] uint8_t const mapq,
614  [[maybe_unused]] mate_type && mate,
615  [[maybe_unused]] tag_dict_type && tag_dict,
616  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
617  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score))
618 {
619  // ---------------------------------------------------------------------
620  // Type Requirements (as static asserts for user friendliness)
621  // ---------------------------------------------------------------------
622  static_assert((std::ranges::forward_range<seq_type> &&
624  "The seq object must be a std::ranges::forward_range over "
625  "letters that model seqan3::alphabet.");
626 
627  static_assert((std::ranges::forward_range<id_type> &&
629  "The id object must be a std::ranges::forward_range over "
630  "letters that model seqan3::alphabet.");
631 
632  static_assert((std::ranges::forward_range<ref_seq_type> &&
634  "The ref_seq object must be a std::ranges::forward_range "
635  "over letters that model seqan3::alphabet.");
636 
637  if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
638  {
639  static_assert((std::ranges::forward_range<ref_id_type> ||
641  detail::is_type_specialisation_of_v<remove_cvref_t<ref_id_type>, std::optional>),
642  "The ref_id object must be a std::ranges::forward_range "
643  "over letters that model seqan3::alphabet or an integral or a std::optional<integral>.");
644  }
645 
646  static_assert(tuple_like<remove_cvref_t<align_type>>,
647  "The align object must be a std::pair of two ranges whose "
648  "value_type is comparable to seqan3::gap");
649 
650  static_assert((std::tuple_size_v<remove_cvref_t<align_type>> == 2 &&
651  std::equality_comparable_with<gap, reference_t<decltype(std::get<0>(align))>> &&
652  std::equality_comparable_with<gap, reference_t<decltype(std::get<1>(align))>>),
653  "The align object must be a std::pair of two ranges whose "
654  "value_type is comparable to seqan3::gap");
655 
656  static_assert((std::ranges::forward_range<qual_type> &&
658  "The qual object must be a std::ranges::forward_range "
659  "over letters that model seqan3::alphabet.");
660 
661  static_assert(tuple_like<remove_cvref_t<mate_type>>,
662  "The mate object must be a std::tuple of size 3 with "
663  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
664  "2) a std::integral or std::optional<std::integral>, and "
665  "3) a std::integral.");
666 
667  static_assert(((std::ranges::forward_range<decltype(std::get<0>(mate))> ||
668  std::integral<remove_cvref_t<decltype(std::get<0>(mate))>> ||
669  detail::is_type_specialisation_of_v<remove_cvref_t<decltype(std::get<0>(mate))>, std::optional>) &&
670  (std::integral<remove_cvref_t<decltype(std::get<1>(mate))>> ||
671  detail::is_type_specialisation_of_v<remove_cvref_t<decltype(std::get<1>(mate))>, std::optional>) &&
672  std::integral<remove_cvref_t<decltype(std::get<2>(mate))>>),
673  "The mate object must be a std::tuple of size 3 with "
674  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
675  "2) a std::integral or std::optional<std::integral>, and "
676  "3) a std::integral.");
677 
679  "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
680 
681  if constexpr (detail::decays_to_ignore_v<header_type>)
682  {
683  throw format_error{"BAM can only be written with a header but you did not provide enough information! "
684  "You can either construct the output file with ref_ids and ref_seqs information and "
685  "the header will be created for you, or you can access the `header` member directly."};
686  }
687  else
688  {
689  // ---------------------------------------------------------------------
690  // logical Requirements
691  // ---------------------------------------------------------------------
692 
693  if (ref_offset.has_value() && (ref_offset.value() + 1) < 0)
694  throw format_error{detail::to_string("The ref_offset object must be >= -1 but is: ", ref_offset)};
695 
696  seqan3::ostreambuf_iterator stream_it{stream};
697 
698  // ---------------------------------------------------------------------
699  // Writing the BAM Header on first call
700  // ---------------------------------------------------------------------
701  if (!header_was_written)
702  {
703  stream << "BAM\1";
705  write_header(os, options, header); // write SAM header to temporary stream to query the size.
706  int32_t l_text{static_cast<int32_t>(os.str().size())};
707  std::ranges::copy_n(reinterpret_cast<char *>(&l_text), 4, stream_it); // write read id
708 
709  stream << os.str();
710 
711  int32_t n_ref{static_cast<int32_t>(header.ref_ids().size())};
712  std::ranges::copy_n(reinterpret_cast<char *>(&n_ref), 4, stream_it); // write read id
713 
714  for (int32_t ridx = 0; ridx < n_ref; ++ridx)
715  {
716  int32_t l_name{static_cast<int32_t>(header.ref_ids()[ridx].size()) + 1}; // plus null character
717  std::ranges::copy_n(reinterpret_cast<char *>(&l_name), 4, stream_it); // write l_name
718  // write reference name:
719  std::ranges::copy(header.ref_ids()[ridx].begin(), header.ref_ids()[ridx].end(), stream_it);
720  stream_it = '\0';
721  // write reference sequence length:
722  std::ranges::copy_n(reinterpret_cast<char *>(&get<0>(header.ref_id_info[ridx])), 4, stream_it);
723  }
724 
725  header_was_written = true;
726  }
727 
728  // ---------------------------------------------------------------------
729  // Writing the Record
730  // ---------------------------------------------------------------------
731  int32_t ref_length{};
732 
733  // if alignment is non-empty, replace cigar_vector.
734  // else, compute the ref_length from given cigar_vector which is needed to fill field `bin`.
735  if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
736  {
737  ref_length = std::ranges::distance(get<1>(align));
738 
739  // compute possible distance from alignment end to sequence end
740  // which indicates soft clipping at the end.
741  // This should be replaced by a free count_gaps function for
742  // aligned sequences which is more efficient if possible.
743  int32_t off_end{static_cast<int32_t>(std::ranges::distance(seq)) - offset};
744 
745  for (auto chr : get<1>(align))
746  if (chr == gap{})
747  ++off_end;
748 
749  off_end -= ref_length;
750  cigar_vector = detail::get_cigar_vector(align, offset, off_end);
751  }
752  else
753  {
754  int32_t dummy_seq_length{};
755  for (auto & [count, operation] : cigar_vector)
756  update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count);
757  }
758 
759  if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG
760  {
761  tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector);
762  cigar_vector.resize(2);
763  cigar_vector[0] = cigar{static_cast<uint32_t>(std::ranges::distance(seq)), 'S'_cigar_op};
764  cigar_vector[1] = cigar{static_cast<uint32_t>(std::ranges::distance(get<1>(align))), 'N'_cigar_op};
765  }
766 
767  std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
768 
769  alignment_record_core core
770  {
771  /* block_size */ 0, // will be initialised right after
772  /* refID */ -1, // will be initialised right after
773  /* pos */ ref_offset.value_or(-1),
774  /* l_read_name */ std::max<uint8_t>(std::min<size_t>(std::ranges::distance(id) + 1, 255), 2),
775  /* mapq */ mapq,
776  /* bin */ reg2bin(ref_offset.value_or(-1), ref_length),
777  /* n_cigar_op */ static_cast<uint16_t>(cigar_vector.size()),
778  /* flag */ flag,
779  /* l_seq */ static_cast<int32_t>(std::ranges::distance(seq)),
780  /* next_refId */ -1, // will be initialised right after
781  /* next_pos */ get<1>(mate).value_or(-1),
782  /* tlen */ get<2>(mate)
783  };
784 
785  auto check_and_assign_id_to = [&header] ([[maybe_unused]] auto & id_source,
786  [[maybe_unused]] auto & id_target)
787  {
788  using id_t = std::remove_reference_t<decltype(id_source)>;
789 
790  if constexpr (!detail::decays_to_ignore_v<id_t>)
791  {
792  if constexpr (std::integral<id_t>)
793  {
794  id_target = id_source;
795  }
796  else if constexpr (detail::is_type_specialisation_of_v<id_t, std::optional>)
797  {
798  id_target = id_source.value_or(-1);
799  }
800  else
801  {
802  if (!std::ranges::empty(id_source)) // otherwise default will remain (-1)
803  {
804  auto id_it = header.ref_dict.end();
805 
806  if constexpr (std::ranges::contiguous_range<decltype(id_source)> &&
807  std::ranges::sized_range<decltype(id_source)> &&
808  forwarding_range<decltype(id_source)>)
809  {
810  id_it = header.ref_dict.find(std::span{std::ranges::data(id_source),
811  std::ranges::size(id_source)});
812  }
813  else
814  {
815  using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;
816 
817  static_assert(implicitly_convertible_to<decltype(id_source), header_ref_id_type>,
818  "The ref_id type is not convertible to the reference id information stored in the "
819  "reference dictionary of the header object.");
820 
821  id_it = header.ref_dict.find(id_source);
822  }
823 
824  if (id_it == header.ref_dict.end())
825  {
826  throw format_error{detail::to_string("Unknown reference name '", id_source, "' could "
827  "not be found in BAM header ref_dict: ",
828  header.ref_dict, ".")};
829  }
830 
831  id_target = id_it->second;
832  }
833  }
834  }
835  };
836 
837  // initialise core.refID
838  check_and_assign_id_to(ref_id, core.refID);
839 
840  // initialise core.next_refID
841  check_and_assign_id_to(get<0>(mate), core.next_refID);
842 
843  // initialise core.block_size
844  core.block_size = sizeof(core) - 4/*block_size excluded*/ +
845  core.l_read_name +
846  core.n_cigar_op * 4 + // each int32_t has 4 bytes
847  (core.l_seq + 1) / 2 + // bitcompressed seq
848  core.l_seq + // quality string
849  tag_dict_binary_str.size();
850 
851  std::ranges::copy_n(reinterpret_cast<char *>(&core), sizeof(core), stream_it); // write core
852 
853  if (std::ranges::distance(id) == 0) // empty id is represented as * for backward compatibility
854  stream_it = '*';
855  else
856  std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id
857  stream_it = '\0';
858 
859  // write cigar
860  for (auto [cigar_count, op] : cigar_vector)
861  {
862  cigar_count = cigar_count << 4;
863  cigar_count |= static_cast<int32_t>(char_to_sam_rank[op.to_char()]);
864  std::ranges::copy_n(reinterpret_cast<char *>(&cigar_count), 4, stream_it);
865  }
866 
867  // write seq (bit-compressed: sam_dna16 characters go into one byte)
868  using alph_t = value_type_t<seq_type>;
869  constexpr auto to_dna16 = detail::convert_through_char_representation<sam_dna16, alph_t>;
870 
871  auto sit = std::ranges::begin(seq);
872  for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit)
873  {
874  uint8_t compressed_chr = to_rank(to_dna16[to_rank(*sit)]) << 4;
875  ++sidx, ++sit;
876  compressed_chr |= to_rank(to_dna16[to_rank(*sit)]);
877  stream_it = static_cast<char>(compressed_chr);
878  }
879 
880  if (core.l_seq & 1) // write one more
881  stream_it = static_cast<char>(to_rank(to_dna16[to_rank(*sit)]) << 4);
882 
883  // write qual
884  if (std::ranges::empty(qual))
885  {
886  auto v = views::repeat_n(static_cast<char>(255), core.l_seq);
887  std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
888  }
889  else
890  {
891  assert(static_cast<int32_t>(std::ranges::distance(qual)) == core.l_seq);
892  auto v = qual | std::views::transform([] (auto chr) { return static_cast<char>(to_rank(chr)); });
893  std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
894  }
895 
896  // write optional fields
897  stream << tag_dict_binary_str;
898  } // if constexpr (!detail::decays_to_ignore_v<header_type>)
899 }
900 
902 template <typename stream_view_type, typename value_type>
903 inline void format_bam::read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
904  stream_view_type && stream_view,
905  value_type const & SEQAN3_DOXYGEN_ONLY(value))
906 {
907  int32_t count;
908  read_field(stream_view, count); // read length of vector
909  std::vector<value_type> tmp_vector;
910  tmp_vector.reserve(count);
911 
912  while (count > 0)
913  {
914  value_type tmp{};
915  read_field(stream_view, tmp);
916  tmp_vector.push_back(std::move(tmp));
917  --count;
918  }
919  variant = std::move(tmp_vector);
920 }
921 
939 template <typename stream_view_type>
940 inline void format_bam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
941 {
942  /* Every BA< tag has the format "[TAG][TYPE_ID][VALUE]", where TAG is a two letter
943  name tag which is converted to a unique integer identifier and TYPE_ID is one character in [A,i,Z,H,B,f]
944  describing the type for the upcoming VALUES. If TYPE_ID=='B' it signals an array of
945  VALUE's and the inner value type is identified by the next character, one of [cCsSiIf], followed
946  by the length (int32_t) of the array, followed by the values.
947  */
948  uint16_t tag = static_cast<uint16_t>(*std::ranges::begin(stream_view)) << 8;
949  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
950  tag += static_cast<uint16_t>(*std::ranges::begin(stream_view));
951  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
952  char type_id = static_cast<char>(*std::ranges::begin(stream_view));
953  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
954 
955  switch (type_id)
956  {
957  case 'A' : // char
958  {
959  target[tag] = static_cast<char>(*std::ranges::begin(stream_view));
960  std::ranges::next(std::ranges::begin(stream_view)); // skip char that has been read
961  break;
962  }
963  // all integer sizes are possible
964  case 'c' : // int8_t
965  {
966  int8_t tmp;
967  read_field(stream_view, tmp);
968  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
969  break;
970  }
971  case 'C' : // uint8_t
972  {
973  uint8_t tmp;
974  read_field(stream_view, tmp);
975  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
976  break;
977  }
978  case 's' : // int16_t
979  {
980  int16_t tmp;
981  read_field(stream_view, tmp);
982  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
983  break;
984  }
985  case 'S' : // uint16_t
986  {
987  uint16_t tmp;
988  read_field(stream_view, tmp);
989  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
990  break;
991  }
992  case 'i' : // int32_t
993  {
994  int32_t tmp;
995  read_field(stream_view, tmp);
996  target[tag] = std::move(tmp); // readable sam format only allows int32_t
997  break;
998  }
999  case 'I' : // uint32_t
1000  {
1001  uint32_t tmp;
1002  read_field(stream_view, tmp);
1003  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
1004  break;
1005  }
1006  case 'f' : // float
1007  {
1008  float tmp;
1009  read_field(stream_view, tmp);
1010  target[tag] = tmp;
1011  break;
1012  }
1013  case 'Z' : // string
1014  {
1015  string_buffer.clear();
1016  while (!is_char<'\0'>(*std::ranges::begin(stream_view)))
1017  {
1018  string_buffer.push_back(*std::ranges::begin(stream_view));
1019  std::ranges::next(std::ranges::begin(stream_view));
1020  }
1021  std::ranges::next(std::ranges::begin(stream_view)); // skip \0
1022  target[tag] = string_buffer;
1023  break;
1024  }
1025  case 'H' :
1026  {
1027  // TODO
1028  break;
1029  }
1030  case 'B' : // Array. Value type depends on second char [cCsSiIf]
1031  {
1032  char array_value_type_id = *std::ranges::begin(stream_view);
1033  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1034 
1035  switch (array_value_type_id)
1036  {
1037  case 'c' : // int8_t
1038  read_sam_dict_vector(target[tag], stream_view, int8_t{});
1039  break;
1040  case 'C' : // uint8_t
1041  read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1042  break;
1043  case 's' : // int16_t
1044  read_sam_dict_vector(target[tag], stream_view, int16_t{});
1045  break;
1046  case 'S' : // uint16_t
1047  read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1048  break;
1049  case 'i' : // int32_t
1050  read_sam_dict_vector(target[tag], stream_view, int32_t{});
1051  break;
1052  case 'I' : // uint32_t
1053  read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1054  break;
1055  case 'f' : // float
1056  read_sam_dict_vector(target[tag], stream_view, float{});
1057  break;
1058  default:
1059  throw format_error{detail::to_string("The first character in the numerical id of a SAM tag ",
1060  "must be one of [cCsSiIf] but '", array_value_type_id, "' was given.")};
1061  }
1062  break;
1063  }
1064  default:
1065  throw format_error{detail::to_string("The second character in the numerical id of a "
1066  "SAM tag must be one of [A,i,Z,H,B,f] but '", type_id, "' was given.")};
1067  }
1068 }
1069 
1084 template <typename cigar_input_type>
1085 inline auto format_bam::parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op) const
1086 {
1087  std::vector<cigar> operations{};
1088  char operation{'\0'};
1089  uint32_t count{};
1090  int32_t ref_length{}, seq_length{};
1091  uint32_t operation_and_count{}; // in BAM operation and count values are stored within one 32 bit integer
1092  constexpr char const * cigar_mapping = "MIDNSHP=X*******";
1093  constexpr uint32_t cigar_mask = 0x0f; // 0000000000001111
1094 
1095  if (n_cigar_op == 0) // [[unlikely]]
1096  return std::tuple{operations, ref_length, seq_length};
1097 
1098  // parse the rest of the cigar
1099  // -------------------------------------------------------------------------------------------------------------
1100  while (n_cigar_op > 0) // until stream is not empty
1101  {
1102  std::ranges::copy_n(std::ranges::begin(cigar_input),
1103  sizeof(operation_and_count),
1104  reinterpret_cast<char*>(&operation_and_count));
1105  operation = cigar_mapping[operation_and_count & cigar_mask];
1106  count = operation_and_count >> 4;
1107 
1108  update_alignment_lengths(ref_length, seq_length, operation, count);
1109  operations.emplace_back(count, cigar_op{}.assign_char(operation));
1110  --n_cigar_op;
1111  }
1112 
1113  return std::tuple{operations, ref_length, seq_length};
1114 }
1115 
1119 inline std::string format_bam::get_tag_dict_str(sam_tag_dictionary const & tag_dict)
1120 {
1121  std::string result{};
1122 
1123  auto stream_variant_fn = [&result] (auto && arg) // helper to print an std::variant
1124  {
1125  // T is either char, int32_t, float, std::string, or a std::vector<some int>
1126  using T = remove_cvref_t<decltype(arg)>;
1127 
1128  if constexpr (std::same_as<T, int32_t>)
1129  {
1130  // always choose the smallest possible representation [cCsSiI]
1131  bool negative = arg < 0;
1132  auto n = __builtin_ctz(detail::next_power_of_two(((negative) ? arg * -1 : arg) + 1) >> 1) / 8;
1133  n = n * n + 2 * negative; // for switch case order
1134 
1135  switch (n)
1136  {
1137  case 0:
1138  {
1139  result[result.size() - 1] = 'C';
1140  result.append(reinterpret_cast<char const *>(&arg), 1);
1141  break;
1142  }
1143  case 1:
1144  {
1145  result[result.size() - 1] = 'S';
1146  result.append(reinterpret_cast<char const *>(&arg), 2);
1147  break;
1148  }
1149  case 2:
1150  {
1151  result[result.size() - 1] = 'c';
1152  int8_t tmp = static_cast<int8_t>(arg);
1153  result.append(reinterpret_cast<char const *>(&tmp), 1);
1154  break;
1155  }
1156  case 3:
1157  {
1158  result[result.size() - 1] = 's';
1159  int16_t tmp = static_cast<int16_t>(arg);
1160  result.append(reinterpret_cast<char const *>(&tmp), 2);
1161  break;
1162  }
1163  default:
1164  {
1165  result.append(reinterpret_cast<char const *>(&arg), 4); // always i
1166  break;
1167  }
1168  }
1169  }
1170  else if constexpr (std::same_as<T, std::string>)
1171  {
1172  result.append(reinterpret_cast<char const *>(arg.data()), arg.size() + 1/*+ null character*/);
1173  }
1174  else if constexpr (!std::ranges::range<T>) // char, float
1175  {
1176  result.append(reinterpret_cast<char const *>(&arg), sizeof(arg));
1177  }
1178  else // std::vector of some arithmetic_type type
1179  {
1180  int32_t sz{static_cast<int32_t>(arg.size())};
1181  result.append(reinterpret_cast<char *>(&sz), 4);
1182  result.append(reinterpret_cast<char const *>(arg.data()), arg.size() * sizeof(value_type_t<T>));
1183  }
1184  };
1185 
1186  for (auto & [tag, variant] : tag_dict)
1187  {
1188  result.push_back(static_cast<char>(tag / 256));
1189  result.push_back(static_cast<char>(tag % 256));
1190 
1191  result.push_back(detail::sam_tag_type_char[variant.index()]);
1192 
1193  if (!is_char<'\0'>(detail::sam_tag_type_char_extra[variant.index()]))
1194  result.push_back(detail::sam_tag_type_char_extra[variant.index()]);
1195 
1196  std::visit(stream_variant_fn, variant);
1197  }
1198 
1199  return result;
1200 }
1201 
1202 } // namespace seqan3
seqan3::format_bam::file_extensions
static std::vector< std::string > file_extensions
The valid file extensions for this format; note that you can modify this value.
Definition: format_bam.hpp:75
seqan3::views::take_exactly_or_throw
constexpr auto take_exactly_or_throw
A view adaptor that returns the first size elements from the underlying range and also exposes size i...
Definition: take_exactly.hpp:91
seqan3::gap
The alphabet of a gap character '-'.
Definition: gap.hpp:36
seqan3::alignment_file_output_options
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:22
std::string::resize
T resize(T... args)
seqan3::remove_cvref_t
std::remove_cv_t< std::remove_reference_t< t > > remove_cvref_t
Return the input type with const, volatile and references removed (type trait).
Definition: basic.hpp:35
misc.hpp
Provides helper data structures for the seqan3::alignment_file_output.
seqan3::views::take_until_and_consume
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:613
seqan3::field::seq
The "sequence", usually a range of nucleotides or amino acids.
bit_manipulation.hpp
Provides utility functions for bit twiddling.
seqan3::format_bam::operator=
format_bam & operator=(format_bam const &) noexcept=default
Defaulted.
take_until.hpp
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
std::string
sequence_container
A more refined container concept than seqan3::container.
std::string_view
tuple.hpp
Provides seqan3::tuple_like.
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
sam_tag_dictionary.hpp
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
std::pair
std::vector::reserve
T reserve(T... args)
seqan3::format_error
Thrown if information given to output format didn't match expectations.
Definition: exception.hpp:84
seqan3::reference_t
typename reference< t >::type reference_t
Shortcut for seqan3::reference (transformation_trait shortcut).
Definition: pre.hpp:77
vector
std::string::size
T size(T... args)
seqan3::format_bam::read_alignment_record
void read_alignment_record(stream_type &stream, alignment_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, alignment_file_header< ref_ids_type > &header, seq_type &seq, qual_type &qual, id_type &id, offset_type &offset, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, align_type &align, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score)
Read from the specified stream and back-insert into the given field buffers.
Definition: format_bam.hpp:285
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
template_inspection.hpp
Provides seqan3::type_list and auxiliary type traits.
seqan3::sam_flag
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:70
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:142
std::tuple
seqan3::alignment_file_input_options
The options type defines various option members that influence the behaviour of all or some formats.
Definition: input_options.hpp:23
seqan3::alignment_file_header::ref_ids
ref_ids_type & ref_ids()
The range of reference ids.
Definition: header.hpp:119
seqan3::field::bit_score
The bit score (statistical significance indicator), unsigned value.
seqan3::field::ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
seqan3::field::ref_offset
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
seqan3::format_bam::format_bam
format_bam() noexcept=default
Defaulted.
std::string::clear
T clear(T... args)
concepts
The Concepts library.
input_format_concept.hpp
Provides seqan3::alignment_file_input_format and auxiliary classes.
std::tie
T tie(T... args)
std::vector::push_back
T push_back(T... args)
seqan3::seq
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
same_as
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
seqan3::value_type
Exposes the value_type of another type.
Definition: pre.hpp:41
seqan3::format_bam::~format_bam
~format_bam() noexcept=default
slice.hpp
Provides seqan3::views::slice.
seqan3::sam_dna16
A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('=').
Definition: sam_dna16.hpp:45
seqan3::alignment_file_header
Stores the header information of alignment files.
Definition: header.hpp:32
range.hpp
Provides various transformation traits used by the range module.
seqan3::field::mapq
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
core_language.hpp
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
seqan3::value_type_t
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (transformation_trait shortcut).
Definition: pre.hpp:48
seqan3::alignment_file_header::ref_dict
std::unordered_map< key_type, int32_t, std::hash< key_type >, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition: header.hpp:164
seqan3::cigar
The cigar semialphabet pairs a counter with a seqan3::cigar_op letter.
Definition: cigar.hpp:54
seqan3::ostreambuf_iterator
::ranges::ostreambuf_iterator ostreambuf_iterator
Alias for ranges::ostreambuf_iterator. Writes successive characters onto the output stream from which...
Definition: iterator.hpp:204
header.hpp
Provides the seqan3::alignment_file_header class.
std::array< uint8_t, 256 >
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
detail.hpp
Auxiliary functions for the alignment IO.
std::istreambuf_iterator
output_options.hpp
Provides seqan3::alignment_file_output_options.
seqan3::search_cfg::all
constexpr detail::search_mode_all all
Configuration element to receive all hits within the error bounds.
Definition: mode.hpp:43
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
tuple_like
Whether a type behaves like a tuple.
std::swap
T swap(T... args)
std::min
T min(T... args)
std::ostringstream
std::variant::index
T index(T... args)
ranges
Adaptations of concepts from the Ranges TS.
seqan3::sam_tag_dictionary
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
std::remove_reference_t
seqan3::format_bam
The BAM format.
Definition: format_bam.hpp:59
take_exactly.hpp
Provides seqan3::views::take_exactly and seqan3::views::take_exactly_or_throw.
seqan3::alignment_file_header::ref_id_info
std::vector< std::tuple< int32_t, std::string > > ref_id_info
The reference information. (used by the SAM/BAM format)
Definition: header.hpp:161
alphabet
The generic alphabet concept that covers most data types used in ranges.
predicate.hpp
Provides character predicates for tokenisation.
seqan3::field::ref_id
The identifier of the (reference) sequence that SEQ was aligned to.
seqan3::views::slice
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
ignore_output_iterator.hpp
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
std::visit
T visit(T... args)
output_format_concept.hpp
Provides seqan3::alignment_file_output_format and auxiliary classes.
equality_comparable_with
Requires seqan3::detail::weakly_equality_comparable_witht<t1,t2>, but also that t1 and t2,...
sam_dna16.hpp
Provides seqan3::sam_dna16.
std::remove_cv_t
implicitly_convertible_to
Resolves to std::ranges::implicitly_convertible_to<type1, type2>().
seqan3::field::qual
The qualities, usually in phred-score notation.
std::optional
to_string.hpp
Auxiliary for pretty printing of exception messages.
std::ostringstream::str
T str(T... args)
seqan3::alphabet_base::assign_rank
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition: alphabet_base.hpp:165
seqan3::format_bam::write_alignment_record
void write_alignment_record([[maybe_unused]] stream_type &stream, [[maybe_unused]] alignment_file_output_options const &options, [[maybe_unused]] header_type &&header, [[maybe_unused]] seq_type &&seq, [[maybe_unused]] qual_type &&qual, [[maybe_unused]] id_type &&id, [[maybe_unused]] int32_t const offset, [[maybe_unused]] ref_seq_type &&ref_seq, [[maybe_unused]] ref_id_type &&ref_id, [[maybe_unused]] std::optional< int32_t > ref_offset, [[maybe_unused]] align_type &&align, [[maybe_unused]] cigar_type &&cigar_vector, [[maybe_unused]] sam_flag const flag, [[maybe_unused]] uint8_t const mapq, [[maybe_unused]] mate_type &&mate, [[maybe_unused]] tag_dict_type &&tag_dict, [[maybe_unused]] double e_value, [[maybe_unused]] double bit_score)
Write the given fields to the specified stream.
Definition: format_bam.hpp:600
seqan3::views::repeat_n
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:94
std::end
T end(T... args)
seqan3::pack_traits::transform
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
seqan3::field::flag
The alignment flag (bit information), uint16_t value.
integral
The concept integral is satisfied if and only if T is an integral type.
misc.hpp
Provides various utility functions.
input_options.hpp
Provides seqan3::alignment_file_input_options.
forwarding_range
Specifies a range whose iterators may outlive the range and remain valid.
format_sam_base.hpp
Provides the seqan3::format_sam_base that can be inherited from.
misc.hpp
Provides various utility functions.
std::string::data
T data(T... args)
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
std::make_unsigned_t
std::tuple_size_v
T tuple_size_v
seqan3::assign_unaligned
void assign_unaligned(aligned_seq_t &aligned_seq, unaligned_sequence_type &&unaligned_seq)
An implementation of seqan3::aligned_sequence::assign_unaligned_sequence for sequence containers.
Definition: aligned_sequence_concept.hpp:375
std::variant
seqan3::field::mate
The mate pair information given as a std::tuple of reference name, offset and template length.
string