SeqAn3
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <string_view>
19 #include <variant>
20 #include <vector>
21 
33 #include <seqan3/io/detail/record.hpp>
34 #include <seqan3/io/exception.hpp>
35 #include <seqan3/std/filesystem>
36 #include <seqan3/io/record.hpp>
38 #include <seqan3/std/ranges>
39 
40 namespace seqan3
41 {
42 
43 // ----------------------------------------------------------------------------
44 // alignment_file_output
45 // ----------------------------------------------------------------------------
46 
169 template <detail::Fields selected_field_ids_ =
170  fields<field::SEQ,
171  field::ID,
177  field::MAPQ,
178  field::QUAL,
179  field::FLAG,
180  field::MATE,
181  field::TAGS,
185  detail::TypeListOfAlignmentFileOutputFormats valid_formats_ = type_list<format_sam, format_bam>,
186  Char stream_char_type_ = char,
187  typename ref_ids_type = ref_info_not_given>
189 {
190 public:
195  using selected_field_ids = selected_field_ids_;
198  using valid_formats = valid_formats_;
200  using stream_char_type = stream_char_type_;
202 
205  field::SEQ,
206  field::ID,
212  field::MAPQ,
213  field::FLAG,
214  field::QUAL,
215  field::MATE,
216  field::TAGS,
218  field::BIT_SCORE>;
219 
220  static_assert([] () constexpr
221  {
222  for (field f : selected_field_ids::as_array)
223  if (!field_ids::contains(f))
224  return false;
225  return true;
226  }(),
227  "You selected a field that is not valid for alignment files, "
228  "please refer to the documentation of "
229  "seqan3::alignment_file_output::field_ids for the accepted values.");
230 
236  using value_type = void;
239  using reference = void;
241  using const_reference = void;
243  using size_type = void;
247  using iterator = detail::out_file_iterator<alignment_file_output>;
249  using const_iterator = void;
253 
257  alignment_file_output() = delete;
268  ~alignment_file_output() = default;
269 
296  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
297  primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default}
298  {
299  // open stream
300  if (!primary_stream->good())
301  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
302 
303  // possibly add intermediate compression stream
304  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
305 
306  // initialise format handler or throw if format is not found
307  detail::set_format(format, filename);
308  }
309 
326  template <OStream2 stream_type, AlignmentFileOutputFormat file_format>
327  alignment_file_output(stream_type & stream,
328  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
329  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
330  primary_stream{&stream, stream_deleter_noop},
331  secondary_stream{&stream, stream_deleter_noop},
332  format{detail::alignment_file_output_format<file_format>{}}
333  {
334  static_assert(meta::in<valid_formats, file_format>::value,
335  "You selected a format that is not in the valid_formats of this file.");
336  }
337 
339  template <OStream2 stream_type, AlignmentFileOutputFormat file_format>
340  alignment_file_output(stream_type && stream,
341  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
342  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
343  primary_stream{new stream_type{std::move(stream)}, stream_deleter_default},
344  secondary_stream{&*primary_stream, stream_deleter_noop},
345  format{detail::alignment_file_output_format<file_format>{}}
346  {
347  static_assert(meta::in<valid_formats, file_format>::value,
348  "You selected a format that is not in the valid_formats of this file.");
349  }
350 
381  template <typename ref_ids_type_, std::ranges::ForwardRange ref_lengths_type>
386  ref_ids_type_ && ref_ids,
387  ref_lengths_type && ref_lengths,
388  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
390 
391  {
392  assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));
393 
394  header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));
395 
396  // fill ref_dict
397  for (size_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
398  {
399  header_ptr->ref_id_info.push_back({ref_lengths[idx], ""});
400  header_ptr->ref_dict[(header_ptr->ref_ids()[idx])] = idx;
401  }
402  }
403 
425  template <OStream2 stream_type,
426  AlignmentFileOutputFormat file_format,
427  typename ref_ids_type_, // generic type to capture lvalue references
428  std::ranges::ForwardRange ref_lengths_type>
430  requires std::Same<std::remove_reference_t<ref_ids_type_>, ref_ids_type>
432  alignment_file_output(stream_type && stream,
433  ref_ids_type_ && ref_ids,
434  ref_lengths_type && ref_lengths,
435  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
436  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
437  alignment_file_output{std::forward<stream_type>(stream), file_format{}, selected_field_ids{}}
438  {
439  assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));
440 
441  header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));
442 
443  // fill ref_dict
444  for (uint32_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
445  {
446  header_ptr->ref_id_info.emplace_back(ref_lengths[idx], "");
447  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
448  }
449  }
451 
473  iterator begin() noexcept
474  {
475  return {*this};
476  }
477 
492  sentinel end() noexcept
493  {
494  return {};
495  }
496 
515  template <typename record_t>
516  void push_back(record_t && r)
518  requires TupleLike<record_t> &&
519  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
521  {
522  using default_align_t = std::pair<std::span<gapped<char>>, std::span<gapped<char>>>;
523  using default_mate_t = std::tuple<std::string_view, std::optional<int32_t>, int32_t>;
524 
525  write_record(detail::get_or<field::HEADER_PTR>(r, nullptr),
526  detail::get_or<field::SEQ>(r, std::string_view{}),
527  detail::get_or<field::QUAL>(r, std::string_view{}),
528  detail::get_or<field::ID>(r, std::string_view{}),
529  detail::get_or<field::OFFSET>(r, 0u),
530  detail::get_or<field::REF_SEQ>(r, std::string_view{}),
531  detail::get_or<field::REF_ID>(r, std::ignore),
532  detail::get_or<field::REF_OFFSET>(r, std::optional<int32_t>{}),
533  detail::get_or<field::ALIGNMENT>(r, default_align_t{}),
534  detail::get_or<field::FLAG>(r, 0u),
535  detail::get_or<field::MAPQ>(r, 0u),
536  detail::get_or<field::MATE>(r, default_mate_t{}),
537  detail::get_or<field::TAGS>(r, sam_tag_dictionary{}),
538  detail::get_or<field::EVALUE>(r, 0u),
539  detail::get_or<field::BIT_SCORE>(r, 0u));
540  }
541 
563  template <typename tuple_t>
564  void push_back(tuple_t && t)
566  requires TupleLike<tuple_t>
568  {
569  using default_align_t = std::pair<std::span<gapped<char>>, std::span<gapped<char>>>;
570  using default_mate_t = std::tuple<std::string_view, std::optional<int32_t>, int32_t>;
571 
572  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
573  write_record(detail::get_or<selected_field_ids::index_of(field::HEADER_PTR)>(t, nullptr),
574  detail::get_or<selected_field_ids::index_of(field::SEQ)>(t, std::string_view{}),
575  detail::get_or<selected_field_ids::index_of(field::QUAL)>(t, std::string_view{}),
576  detail::get_or<selected_field_ids::index_of(field::ID)>(t, std::string_view{}),
577  detail::get_or<selected_field_ids::index_of(field::OFFSET)>(t, 0u),
578  detail::get_or<selected_field_ids::index_of(field::REF_SEQ)>(t, std::string_view{}),
579  detail::get_or<selected_field_ids::index_of(field::REF_ID)>(t, std::ignore),
580  detail::get_or<selected_field_ids::index_of(field::REF_OFFSET)>(t, std::optional<int32_t>{}),
581  detail::get_or<selected_field_ids::index_of(field::ALIGNMENT)>(t, default_align_t{}),
582  detail::get_or<selected_field_ids::index_of(field::FLAG)>(t, 0u),
583  detail::get_or<selected_field_ids::index_of(field::MAPQ)>(t, 0u),
584  detail::get_or<selected_field_ids::index_of(field::MATE)>(t, default_mate_t{}),
585  detail::get_or<selected_field_ids::index_of(field::TAGS)>(t, sam_tag_dictionary{}),
586  detail::get_or<selected_field_ids::index_of(field::EVALUE)>(t, 0u),
587  detail::get_or<selected_field_ids::index_of(field::BIT_SCORE)>(t, 0u));
588  }
589 
613  template <typename arg_t, typename ... arg_types>
614  void emplace_back(arg_t && arg, arg_types && ... args)
615  {
616  push_back(std::tie(arg, args...));
617  }
618 
640  template <typename rng_t>
645  {
646  for (auto && record : range)
647  push_back(std::forward<decltype(record)>(record));
648  return *this;
649  }
650 
679  template <typename rng_t>
684  {
685  f = range;
686  return f;
687  }
688 
690  template <typename rng_t>
695  {
696  f = range;
697  return std::move(f);
698  }
700 
703 
708  {
709  return *secondary_stream;
710  }
712 
723  auto & header()
724  {
726  throw std::logic_error{"Please construct your file with reference id and length information in order "
727  "to properly initialise the header before accessing it."};
728 
729  return *header_ptr;
730  }
731 
732 protected:
734 
742  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
744  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
745 
747  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
749  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
750 
752  using format_type = typename detail::variant_from_tags<valid_formats, detail::alignment_file_output_format>::type;
753 
755  format_type format;
757 
761  ref_ids_type>>;
762 
764  std::unique_ptr<header_type> header_ptr;
765 
767  template <typename record_header_ptr_t, typename ...pack_type>
768  void write_record(record_header_ptr_t && record_header_ptr, pack_type && ...remainder)
769  {
770  static_assert((sizeof...(pack_type) == 14), "Wrong parameter list passed to write_record.");
771 
772  assert(!format.valueless_by_exception());
773 
774  std::visit([&] (auto & f)
775  {
776  // use header from record if explicitly given, e.g. file_output = file_input
778  f.write(*secondary_stream, options, *record_header_ptr, std::forward<pack_type>(remainder)...);
780  f.write(*secondary_stream, options, std::ignore, std::forward<pack_type>(remainder)...);
781  else
782  f.write(*secondary_stream, options, *header_ptr, std::forward<pack_type>(remainder)...);
783  }, format);
784  }
785 
787  friend iterator;
788 };
789 
798 template <detail::Fields selected_field_ids>
804 
808 template <OStream2 stream_type,
809  AlignmentFileOutputFormat file_format,
810  detail::Fields selected_field_ids>
811 alignment_file_output(stream_type &&, file_format const &, selected_field_ids const &)
815  ref_info_not_given>;
816 
820 template <OStream2 stream_type,
821  AlignmentFileOutputFormat file_format,
822  detail::Fields selected_field_ids>
823 alignment_file_output(stream_type &, file_format const &, selected_field_ids const &)
825  type_list<file_format>,
826  typename std::remove_reference_t<stream_type>::char_type,
827  ref_info_not_given>;
828 
832 template <OStream2 stream_type,
833  AlignmentFileOutputFormat file_format>
834 alignment_file_output(stream_type &&, file_format const &)
836  type_list<file_format>,
837  typename std::remove_reference_t<stream_type>::char_type,
838  ref_info_not_given>;
839 
843 template <OStream2 stream_type,
844  AlignmentFileOutputFormat file_format>
845 alignment_file_output(stream_type &, file_format const &)
847  type_list<file_format>,
848  typename std::remove_reference_t<stream_type>::char_type,
849  ref_info_not_given>;
850 
852 template <detail::Fields selected_field_ids,
853  std::ranges::ForwardRange ref_ids_type,
854  std::ranges::ForwardRange ref_lengths_type>
856  ref_ids_type &&,
857  ref_lengths_type &&,
858  selected_field_ids const &)
860  typename alignment_file_output<>::valid_formats,
861  typename alignment_file_output<>::stream_char_type,
863 
865 template <std::ranges::ForwardRange ref_ids_type,
866  std::ranges::ForwardRange ref_lengths_type>
868  ref_ids_type &&,
869  ref_lengths_type &&)
871  typename alignment_file_output<>::valid_formats,
872  typename alignment_file_output<>::stream_char_type,
873  std::remove_reference_t<ref_ids_type>>;
874 
876 template <OStream2 stream_type,
877  std::ranges::ForwardRange ref_ids_type,
878  std::ranges::ForwardRange ref_lengths_type,
879  AlignmentFileOutputFormat file_format,
880  detail::Fields selected_field_ids>
881 alignment_file_output(stream_type &&,
882  ref_ids_type &&,
883  ref_lengths_type &&,
884  file_format const &,
885  selected_field_ids const &)
887  type_list<file_format>,
888  typename std::remove_reference_t<stream_type>::char_type,
889  std::remove_reference_t<ref_ids_type>>;
890 
892 template <OStream2 stream_type,
893  std::ranges::ForwardRange ref_ids_type,
894  std::ranges::ForwardRange ref_lengths_type,
895  AlignmentFileOutputFormat file_format,
896  detail::Fields selected_field_ids>
897 alignment_file_output(stream_type &,
898  ref_ids_type &&,
899  ref_lengths_type &&,
900  file_format const &,
901  selected_field_ids const &)
903  type_list<file_format>,
904  typename std::remove_reference_t<stream_type>::char_type,
905  std::remove_reference_t<ref_ids_type>>;
906 
908 template <OStream2 stream_type,
909  std::ranges::ForwardRange ref_ids_type,
910  std::ranges::ForwardRange ref_lengths_type,
911  AlignmentFileOutputFormat file_format>
912 alignment_file_output(stream_type &&,
913  ref_ids_type &&,
914  ref_lengths_type &&,
915  file_format const &)
917  type_list<file_format>,
918  typename std::remove_reference_t<stream_type>::char_type,
919  std::remove_reference_t<ref_ids_type>>;
920 
922 template <OStream2 stream_type,
923  std::ranges::ForwardRange ref_ids_type,
924  std::ranges::ForwardRange ref_lengths_type,
925  AlignmentFileOutputFormat file_format>
926 alignment_file_output(stream_type &,
927  ref_ids_type &&,
928  ref_lengths_type &&,
929  file_format const &)
931  type_list<file_format>,
932  typename std::remove_reference_t<stream_type>::char_type,
933  std::remove_reference_t<ref_ids_type>>;
935 
936 } // namespace seqan3
detail::out_file_iterator< alignment_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:247
T visit(T... args)
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
The "sequence", usually a range of nucleotides or amino acids.
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:614
~alignment_file_output()=default
Destructor is defaulted.
Provides exceptions used in the I/O module.
T tie(T... args)
alignment_file_output & operator=(alignment_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:198
alignment_file_output(stream_type &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:327
The (pairwise) alignment stored in an seqan3::alignment object.
The alignment flag (bit information), uint16_t value.
auto & header()
Access the file&#39;s header.
Definition: output.hpp:723
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:187
Provides various utility functions required only for output.
alignment_file_output(std::filesystem::path const &filename, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:385
Provides seqan3::type_list and auxiliary type traits.
void const_reference
The const reference type (void).
Definition: output.hpp:241
alignment_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:295
The generic concept for alignment file out formats.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:492
alignment_file_output(stream_type &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: output.hpp:340
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
The qualities, usually in phred-score notation.
The e-value (length normalized bit score), double value.
Provides seqan3::alignment_file_output_options.
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:196
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:39
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
A class for writing alignment files, e.g. SAM, BAL, BLAST, ...
Definition: output.hpp:188
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:473
Stores the header information of alignment files.
Definition: header.hpp:29
Provides seqan3::TupleLike.
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:251
Provides the seqan3::alignment_file_header class.
alignment_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:641
Provides the seqan3::record template and the seqan3::field enum.
Sequence (SEQ) relative start position (0-based), unsigned value.
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:516
The identifier, usually a string.
void size_type
The size type (void).
Definition: output.hpp:243
void value_type
The value type (void).
Definition: output.hpp:237
Adaptations of concepts from the Ranges TS.
The mate pair information given as a std::tuple of reference name, offset and template length...
The identifier of the (reference) sequence that SEQ was aligned to.
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
The options type defines various option members that influence the behavior of all or some formats...
Definition: output_options.hpp:22
Whether a type behaves like a tuple.
A pointer to the seqan3::alignment_file_header object storing header information. ...
alignment_file_output(stream_type &&stream, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:432
Provides the seqan3::format_sam tag and the seqan3::alignment_file_input_format and seqan3::alignment...
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:564
Stream concepts.
Provides helper data structures for the seqan3::alignment_file_output.
alignment_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:702
Provides various type traits on generic types.
Provides the seqan3::detail::out_file_iterator class template.
Provides seqan3::AlignmentFileOutputFormat and auxiliary classes.
Provides the seqan3::alignment_file_format_bam class.
void reference
The reference type (void).
Definition: output.hpp:239
friend alignment_file_output operator|(rng_t &&range, alignment_file_output &&f)
Definition: output.hpp:691
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:249
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:27
Type tag which indicates that no reference information has been passed to the alignment file on const...
Definition: misc.hpp:21
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
The optional tags in the SAM format, stored in a dictionary.
alignment_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
T forward(T... args)
The bit score (statistical significance indicator), unsigned value.
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: output.hpp:200
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
friend alignment_file_output & operator|(rng_t &&range, alignment_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:680
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...