129 template <
typename stream_type,
130 typename seq_legal_alph_type,
131 typename stream_pos_type,
137 stream_pos_type & position_buffer,
140 qual_type & qualities);
142 template <
typename stream_type,
150 qual_type && qualities);
152 template <
typename stream_type,
153 typename seq_legal_alph_type,
154 typename ref_seqs_type,
155 typename ref_ids_type,
156 typename stream_pos_type,
159 typename offset_type,
160 typename ref_seq_type,
161 typename ref_id_type,
162 typename ref_offset_type,
168 typename tag_dict_type,
169 typename e_value_type,
170 typename bit_score_type>
173 ref_seqs_type & ref_seqs,
175 stream_pos_type & position_buffer,
179 offset_type & offset,
180 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
181 ref_id_type & ref_id,
182 ref_offset_type & ref_offset,
183 cigar_type & cigar_vector,
187 tag_dict_type & tag_dict,
188 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
189 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
191 template <
typename stream_type,
192 typename header_type,
195 typename ref_seq_type,
196 typename ref_id_type,
199 typename tag_dict_type,
200 typename e_value_type,
201 typename bit_score_type>
204 header_type && header,
208 int32_t
const offset,
209 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
210 ref_id_type && ref_id,
216 tag_dict_type && tag_dict,
217 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
218 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
228 sam_file_header<> default_header{};
231 bool ref_info_present_in_header{
false};
240 template <
typename t>
241 decltype(
auto) default_or(t && v)
const noexcept
243 return std::forward<t>(v);
246 template <
typename stream_view_type, arithmetic value_type>
250 template <
typename stream_view_type>
253 template <
typename stream_view_type>
254 void read_sam_dict_field(stream_view_type && stream_view, sam_tag_dictionary & target);
256 template <
typename stream_it_t, std::ranges::forward_range field_type>
257 void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
259 template <
typename stream_it_t>
260 void write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value);
262 template <
typename stream_it_t>
263 void write_tag_fields(stream_it_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
267template <
typename stream_type,
268 typename seq_legal_alph_type,
269 typename stream_pos_type,
275 stream_pos_type & position_buffer,
278 qual_type & qualities)
304 if constexpr (!detail::decays_to_ignore_v<seq_type>)
305 if (std::ranges::distance(
sequence) == 0)
306 throw parse_error{
"The sequence information must not be empty."};
307 if constexpr (!detail::decays_to_ignore_v<id_type>)
308 if (std::ranges::distance(
id) == 0)
309 throw parse_error{
"The id information must not be empty."};
312 id =
id | detail::take_until_and_consume(
is_space) | ranges::to<id_type>();
316template <
typename stream_type,
324 qual_type && qualities)
334 default_or(qualities),
350template <
typename stream_type,
351 typename seq_legal_alph_type,
352 typename ref_seqs_type,
353 typename ref_ids_type,
354 typename stream_pos_type,
357 typename offset_type,
358 typename ref_seq_type,
359 typename ref_id_type,
360 typename ref_offset_type,
366 typename tag_dict_type,
367 typename e_value_type,
368 typename bit_score_type>
372 ref_seqs_type & ref_seqs,
374 stream_pos_type & position_buffer,
378 offset_type & offset,
379 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
380 ref_id_type & ref_id,
381 ref_offset_type & ref_offset,
382 cigar_type & cigar_vector,
386 tag_dict_type & tag_dict,
387 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
388 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
390 static_assert(detail::decays_to_ignore_v<ref_offset_type>
391 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
392 "The ref_offset must be a specialisation of std::optional.");
394 auto stream_view = detail::istreambuf(stream);
395 auto field_view = stream_view | detail::take_until_or_throw_and_consume(is_char<'\t'>);
397 int32_t ref_offset_tmp{};
398 std::ranges::range_value_t<
decltype(header.
ref_ids())> ref_id_tmp{};
404 read_header(stream_view, header, ref_seqs);
411 position_buffer = stream.tellg();
415 if constexpr (!detail::decays_to_ignore_v<id_type>)
416 read_forward_range_field(field_view,
id);
418 detail::consume(field_view);
420 uint16_t flag_integral{};
421 read_arithmetic_field(field_view, flag_integral);
424 read_forward_range_field(field_view, ref_id_tmp);
425 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
427 read_arithmetic_field(field_view, ref_offset_tmp);
430 if (ref_offset_tmp == -1)
432 else if (ref_offset_tmp > -1)
434 else if (ref_offset_tmp < -1)
435 throw format_error{
"No negative values are allowed for field::ref_offset."};
437 if constexpr (!detail::decays_to_ignore_v<mapq_type>)
438 read_arithmetic_field(field_view,
mapq);
440 detail::consume(field_view);
444 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
448 int32_t ref_length{}, seq_length{};
449 std::tie(cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view);
450 int32_t soft_clipping_end{};
451 int32_t offset_tmp{};
452 transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end);
462 detail::consume(field_view);
467 if constexpr (!detail::decays_to_ignore_v<mate_type>)
469 std::ranges::range_value_t<
decltype(header.
ref_ids())> tmp_mate_ref_id{};
470 read_forward_range_field(field_view, tmp_mate_ref_id);
472 if (tmp_mate_ref_id ==
"=")
474 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
477 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
481 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
485 read_arithmetic_field(field_view, tmp_pnext);
488 get<1>(
mate) = --tmp_pnext;
489 else if (tmp_pnext < 0)
490 throw format_error{
"No negative values are allowed at the mate mapping position."};
493 read_arithmetic_field(field_view, get<2>(
mate));
497 for (
size_t i = 0; i < 3u; ++i)
499 detail::consume(field_view);
507 constexpr auto is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
511 [is_legal_alph](
char const c)
513 if (!is_legal_alph(c))
515 + detail::type_name_as_string<seq_legal_alph_type>
516 +
"> evaluated to false on " + detail::make_printable(c)};
520 if constexpr (detail::decays_to_ignore_v<seq_type>)
522 detail::consume(seq_stream);
526 read_forward_range_field(seq_stream,
seq);
536 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
537 auto qual_view = stream_view | detail::take_until_or_throw(tab_or_end);
538 if constexpr (!detail::decays_to_ignore_v<qual_type>)
539 read_forward_range_field(qual_view,
qual);
541 detail::consume(qual_view);
543 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
545 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0
546 && std::ranges::distance(
seq) != std::ranges::distance(
qual))
548 throw format_error{detail::to_string(
"Sequence length (",
549 std::ranges::distance(
seq),
550 ") and quality length (",
551 std::ranges::distance(
qual),
552 ") must be the same.")};
561 auto stream_until_tab_or_end = stream_view | detail::take_until_or_throw(tab_or_end);
562 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
563 read_sam_dict_field(stream_until_tab_or_end, tag_dict);
565 detail::consume(stream_until_tab_or_end);
568 detail::consume(stream_view | detail::take_until(!(is_char<'\r'> || is_char<'\n'>)));
572template <
typename stream_type,
573 typename header_type,
576 typename ref_seq_type,
577 typename ref_id_type,
580 typename tag_dict_type,
581 typename e_value_type,
582 typename bit_score_type>
585 header_type && header,
589 int32_t
const offset,
590 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
591 ref_id_type && ref_id,
597 tag_dict_type && tag_dict,
598 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
599 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
618 "The seq object must be a std::ranges::forward_range over "
619 "letters that model seqan3::alphabet.");
622 "The id object must be a std::ranges::forward_range over "
623 "letters that model seqan3::alphabet.");
625 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
627 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
628 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>),
629 "The ref_id object must be a std::ranges::forward_range "
630 "over letters that model seqan3::alphabet.");
632 if constexpr (std::integral<std::remove_cvref_t<ref_id_type>>
633 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>)
634 static_assert(!detail::decays_to_ignore_v<header_type>,
635 "If you give indices as reference id information the header must also be present.");
639 "The qual object must be a std::ranges::forward_range "
640 "over letters that model seqan3::alphabet.");
643 "The mate object must be a std::tuple of size 3 with "
644 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
645 "2) a std::integral or std::optional<std::integral>, and "
646 "3) a std::integral.");
649 ((std::ranges::forward_range<decltype(std::get<0>(
mate))>
651 || detail::is_type_specialisation_of_v<
653 std::optional>)&&(std::integral<std::remove_cvref_t<decltype(std::get<1>(
mate))>>
654 || detail::is_type_specialisation_of_v<
656 std::optional>)&&std::integral<std::remove_cvref_t<decltype(std::get<2>(
mate))>>),
657 "The mate object must be a std::tuple of size 3 with "
658 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
659 "2) a std::integral or std::optional<std::integral>, and "
660 "3) a std::integral.");
662 if constexpr (std::integral<std::remove_cvref_t<decltype(std::get<0>(
mate))>>
665 static_assert(!detail::decays_to_ignore_v<header_type>,
666 "If you give indices as mate reference id information the header must also be present.");
669 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
674 if constexpr (!detail::decays_to_ignore_v<header_type> && !detail::decays_to_ignore_v<ref_id_type>
675 && !std::integral<std::remove_reference_t<ref_id_type>>
676 && !detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
683 if constexpr (std::ranges::contiguous_range<
decltype(
ref_id)> && std::ranges::sized_range<
decltype(
ref_id)>
684 && std::ranges::borrowed_range<
decltype(
ref_id)>)
693 "The ref_id type is not convertible to the reference id information stored in the "
694 "reference dictionary of the header object.");
702 "' was not in the list of references:",
708 throw format_error{
"The ref_offset object must be a std::integral >= 0."};
713 if constexpr (!detail::decays_to_ignore_v<header_type>)
717 write_header(stream, options, header);
718 header_was_written =
true;
726 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
727 constexpr char separator{
'\t'};
729 write_range_or_asterisk(stream_it,
id);
730 *stream_it = separator;
732 stream_it.write_number(
static_cast<uint16_t
>(
flag));
733 *stream_it = separator;
735 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
737 if constexpr (std::integral<std::remove_reference_t<ref_id_type>>)
739 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id]);
741 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
744 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id.value()]);
750 write_range_or_asterisk(stream_it,
ref_id);
758 *stream_it = separator;
761 stream_it.write_number(
ref_offset.value_or(-1) + 1);
762 *stream_it = separator;
764 stream_it.write_number(
static_cast<unsigned>(
mapq));
765 *stream_it = separator;
767 if (!std::ranges::empty(cigar_vector))
769 for (
auto & c : cigar_vector)
770 stream_it.write_range(c.to_string());
777 *stream_it = separator;
779 if constexpr (std::integral<std::remove_reference_t<decltype(get<0>(
mate))>>)
781 write_range_or_asterisk(stream_it, (header.
ref_ids())[get<0>(
mate)]);
783 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<decltype(get<0>(
mate))>,
786 if (get<0>(
mate).has_value())
787 write_range_or_asterisk(stream_it, header.
ref_ids()[get<0>(
mate).value()]);
793 write_range_or_asterisk(stream_it, get<0>(
mate));
796 *stream_it = separator;
798 if constexpr (detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(get<1>(
mate))>,
std::optional>)
801 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
802 *stream_it = separator;
806 stream_it.write_number(get<1>(
mate));
807 *stream_it = separator;
810 stream_it.write_number(get<2>(
mate));
811 *stream_it = separator;
813 write_range_or_asterisk(stream_it,
seq);
814 *stream_it = separator;
816 write_range_or_asterisk(stream_it,
qual);
818 write_tag_fields(stream_it, tag_dict, separator);
840template <
typename stream_view_type, arithmetic value_type>
842 stream_view_type && stream_view,
848 read_arithmetic_field(stream_view | detail::take_until(is_char<','>), value);
854 variant = std::move(tmp_vector);
870template <
typename stream_view_type>
880 read_byte_field(stream_view | detail::take_exactly_or_throw(2), value);
884 throw format_error{
"Hexadecimal tag has an uneven number of digits!"};
890 variant = std::move(tmp_vector);
910template <
typename stream_view_type>
911inline void format_sam::read_sam_dict_field(stream_view_type && stream_view, sam_tag_dictionary & target)
938 read_arithmetic_field(stream_view, tmp);
945 read_arithmetic_field(stream_view, tmp);
951 target[tag] = stream_view | ranges::to<std::string>();
956 read_sam_byte_vector(target[tag], stream_view);
965 switch (array_value_type_id)
968 read_sam_dict_vector(target[tag], stream_view, int8_t{});
971 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
974 read_sam_dict_vector(target[tag], stream_view, int16_t{});
977 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
980 read_sam_dict_vector(target[tag], stream_view, int32_t{});
983 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
986 read_sam_dict_vector(target[tag], stream_view,
float{});
989 throw format_error{
std::string(
"The first character in the numerical ")
990 +
"id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id
996 throw format_error{
std::string(
"The second character in the numerical id of a "
997 "SAM tag must be one of [A,i,Z,H,B,f] but '")
998 + type_id +
"' was given."};
1009template <
typename stream_it_t, std::ranges::forward_range field_type>
1010inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
1012 if (std::ranges::empty(field_value))
1018 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
1019 stream_it.write_range(field_value);
1031template <
typename stream_it_t>
1032inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value)
1044template <
typename stream_it_t>
1046format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary
const & tag_dict,
char const separator)
1048 auto const stream_variant_fn = [&stream_it](
auto && arg)
1052 if constexpr (std::ranges::input_range<T>)
1054 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
char>)
1056 stream_it.write_range(arg);
1058 else if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
std::byte>)
1060 if (!std::ranges::empty(arg))
1067 stream_it.write_number(std::to_integer<uint8_t>(elem));
1073 if (!std::ranges::empty(arg))
1080 stream_it.write_number(elem);
1085 else if constexpr (std::same_as<std::remove_cvref_t<T>,
char>)
1091 stream_it.write_number(arg);
1095 for (
auto & [tag, variant] : tag_dict)
1097 *stream_it = separator;
1099 char const char0 = tag / 256;
1100 char const char1 = tag % 256;
1105 *stream_it = detail::sam_tag_type_char[variant.
index()];
1108 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1110 *stream_it = detail::sam_tag_type_char_extra[variant.
index()];
Core alphabet concept and free function/type trait wrappers.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
Provides seqan3::detail::fast_ostreambuf_iterator.
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:63
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:125
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: type_list/traits.hpp:395
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: type_list/traits.hpp:470
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be implicityly converted to to.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::sam_file_output_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: io/exception.hpp:48
The options type defines various option members that influence the behavior of all or some formats.
Definition: sam_file/output_options.hpp:26
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: sam_file/output_options.hpp:30
bool sam_require_header
Whether to require a header for SAM files.
Definition: sam_file/output_options.hpp:44
The options type defines various option members that influence the behaviour of all or some formats.
Definition: sequence_file/output_options.hpp:26
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::ranges::to.
Provides seqan3::views::to_char.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::tuple_like.