SeqAn3
The Modern C++ library for sequence analysis.
search.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
20 
21 namespace seqan3::detail
22 {
23 
45 template <typename index_t, typename query_t, typename configuration_t>
46 inline auto search_single(index_t const & index, query_t & query, configuration_t const & cfg)
47 {
48  using cfg_t = remove_cvref_t<configuration_t>;
49 
50  // retrieve error numbers / rates
51  detail::search_param max_error{0, 0, 0, 0};
52  if constexpr (cfg.template exists<search_cfg::max_error>())
53  {
54  auto & [total, subs, ins, del] = max_error;
55  std::tie(total, subs, ins, del) = std::apply([](auto ...args){ return std::tuple{args...}; },
56  get<search_cfg::max_error>(cfg).value);
57  }
58  else if constexpr (cfg.template exists<search_cfg::max_error_rate>())
59  {
60  // NOTE: Casting doubles rounds towards zero (i.e. floor for positive numbers). Thus, given a rate of
61  // 10% and a read length of 101 the max number of errors is correctly casted from 10.1 errors to 10
62  auto & [total, subs, ins, del] = max_error;
63  std::tie(total, subs, ins, del) = std::apply([& query] (auto && ... args)
64  {
65  return std::tuple{(args * std::ranges::size(query))...};
66  }, get<search_cfg::max_error_rate>(cfg).value);
67  }
68 
69  // TODO: if total not set: max_error.total = max_error.deletion + max_error.substitution + max_error.insertion;
70  // TODO: throw exception when any error number or rate is higher than the total error number/rate
71  // throw std::invalid_argument("The total number of errors is set to zero while there is a positive number"
72  // " of errors for a specific error type.");
73 
74  // construct internal delegate for collecting hits for later filtering (if necessary)
76  auto internal_delegate = [&internal_hits, &max_error] (auto const & it)
77  {
78  internal_hits.push_back(it);
79  };
80 
81  // choose mode
82  if constexpr (cfg_t::template exists<search_cfg::mode<detail::search_mode_best>>())
83  {
84  detail::search_param max_error2{max_error};
85  max_error2.total = 0;
86  while (internal_hits.empty() && max_error2.total <= max_error.total)
87  {
88  detail::search_algo<true>(index, query, max_error2, internal_delegate);
89  max_error2.total++;
90  }
91  }
92  else if constexpr (cfg_t::template exists<search_cfg::mode<detail::search_mode_all_best>>())
93  {
94  detail::search_param max_error2{max_error};
95  max_error2.total = 0;
96  while (internal_hits.empty() && max_error2.total <= max_error.total)
97  {
98  detail::search_algo<false>(index, query, max_error2, internal_delegate);
99  max_error2.total++;
100  }
101  }
102  else if constexpr (cfg_t::template exists<search_cfg::mode<search_cfg::strata>>())
103  {
104  detail::search_param max_error2{max_error};
105  max_error2.total = 0;
106  while (internal_hits.empty() && max_error2.total <= max_error.total)
107  {
108  detail::search_algo<true>(index, query, max_error2, internal_delegate);
109  max_error2.total++;
110  }
111  if (!internal_hits.empty())
112  {
113  internal_hits.clear(); // TODO: don't clear when using Optimum Search Schemes with lower error bounds
114  uint8_t const s = get<search_cfg::mode>(cfg).value;
115  max_error2.total += s - 1;
116  detail::search_algo<false>(index, query, max_error2, internal_delegate);
117  }
118  }
119  else // detail::search_mode_all
120  {
121  detail::search_algo<false>(index, query, max_error, internal_delegate);
122  }
123 
124  // TODO: filter hits and only do it when necessary (depending on error types)
125 
126  // output cursors or text_positions
127  if constexpr (cfg_t::template exists<search_cfg::output<detail::search_output_index_cursor>>())
128  {
129  return internal_hits;
130  }
131  else
132  {
133  using hit_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
135  typename index_t::size_type>;
136  std::vector<hit_t> hits;
137 
138  if constexpr (cfg_t::template exists<search_cfg::mode<detail::search_mode_best>>())
139  {
140  // only one cursor is reported but it might contain more than one text position
141  if (!internal_hits.empty())
142  {
143  auto text_pos = internal_hits[0].lazy_locate();
144  hits.push_back(text_pos[0]);
145  }
146  }
147  else
148  {
149  for (auto const & cur : internal_hits)
150  {
151  for (auto const & text_pos : cur.locate())
152  hits.push_back(text_pos);
153  std::sort(hits.begin(), hits.end());
154  hits.erase(std::unique(hits.begin(), hits.end()), hits.end());
155  }
156  }
157  return hits;
158  }
159 }
160 
179 template <typename index_t, typename queries_t, typename configuration_t>
180 inline auto search_all(index_t const & index, queries_t & queries, configuration_t const & cfg)
181 {
182  using cfg_t = remove_cvref_t<configuration_t>;
183  // return type: for each query: a vector of text_positions (or cursors)
184  // delegate params: text_position (or cursor). we will withhold all hits of one query anyway to filter
185  // duplicates. more efficient to call delegate once with one vector instead of calling
186  // delegate for each hit separately at once.
187  using text_pos_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
189  typename index_t::size_type>;
191  typename index_t::cursor_type,
192  text_pos_t>;
193 
194  if constexpr (std::ranges::forward_range<queries_t> && std::ranges::random_access_range<value_type_t<queries_t>>)
195  {
196  // TODO: if constexpr (contains<search_cfg::id::on_hit>(cfg))
198  hits.reserve(std::distance(queries.begin(), queries.end()));
199  for (auto const query : queries)
200  {
201  hits.push_back(search_single(index, query, cfg));
202  }
203  return hits;
204  }
205  else // std::ranges::random_access_range<queries_t>
206  {
207  // TODO: if constexpr (contains<search_cfg::id::on_hit>(cfg))
208  return search_single(index, queries, cfg);
209  }
210 }
211 
213 
214 } // namespace seqan3::detail
std::apply
T apply(T... args)
search_scheme_algorithm.hpp
Provides the algorithm to search in an index using search schemes.
pre.hpp
Provides various transformation trait base templates and shortcuts.
std::pair
std::vector::reserve
T reserve(T... args)
std::vector
std::distance
T distance(T... args)
std::tuple
std::sort
T sort(T... args)
std::vector::clear
T clear(T... args)
std::tie
T tie(T... args)
std::vector::push_back
T push_back(T... args)
seqan3::collection
The text is a range of ranges.
Definition: concept.hpp:86
std::vector::erase
T erase(T... args)
search_trivial.hpp
Provides an approximate string matching algorithm based on simple backtracking. This should only be u...
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
all.hpp
Meta-header for the search configuration module .
std::vector::begin
T begin(T... args)
concept.hpp
Provides the concepts for seqan3::fm_index and seqan3::bi_fm_index and its traits and cursors.
std::vector::empty
T empty(T... args)
std::unique
T unique(T... args)
std::vector::end
T end(T... args)
std::conditional_t