btllib
seq_writer.hpp
1 #ifndef BTLLIB_SEQ_WRITER_HPP
2 #define BTLLIB_SEQ_WRITER_HPP
3 
4 #include "data_stream.hpp"
5 #include "seq.hpp"
6 
7 #include <cstdio>
8 #include <mutex>
9 #include <string>
10 
11 namespace btllib {
12 
14 class SeqWriter
15 {
16 
17 public:
18  enum Format
19  {
20  FASTA,
21  FASTQ
22  };
23 
24  SeqWriter(const std::string& sink_path,
25  Format format = FASTA,
26  bool append = false);
27 
28  void close();
29 
30  void write(const std::string& name,
31  const std::string& comment,
32  const std::string& seq,
33  const std::string& qual);
34 
35 private:
36  const std::string sink_path;
37  DataSink sink;
38  bool closed;
39  Format format;
40  char headerchar;
41  std::mutex mutex;
42 };
43 
44 inline SeqWriter::SeqWriter(const std::string& sink_path,
45  Format format,
46  bool append)
47  : sink_path(sink_path)
48  , sink(sink_path, append)
49  , closed(false)
50  , format(format)
51  , headerchar(format == FASTA ? '>' : '@')
52 {}
53 
54 inline void
55 SeqWriter::close()
56 {
57  if (!closed) {
58  sink.close();
59  closed = true;
60  }
61 }
62 
63 inline void
64 SeqWriter::write(const std::string& name,
65  const std::string& comment,
66  const std::string& seq,
67  const std::string& qual)
68 {
69  check_error(seq.empty(), "Attempted to write empty sequence.");
70  for (const auto& c : seq) {
71  if (!bool(COMPLEMENTS[unsigned(c)])) {
72  log_error(std::string("A sequence contains invalid IUPAC character: ") +
73  c);
74  std::exit(EXIT_FAILURE);
75  }
76  }
77 
78  std::string output;
79  output.reserve(1 + name.size() + 1 + comment.size() + 1 + seq.size() + 3 +
80  qual.size() + 1);
81  output += headerchar;
82  if (!name.empty()) {
83  output += name;
84  }
85  if (!comment.empty()) {
86  output += " ";
87  output += comment;
88  output += '\n';
89  }
90 
91  output += seq;
92  output += '\n';
93 
94  if (format == FASTQ) {
95  check_error(seq.size() != qual.size(),
96  "Quality must be the same length as sequence.");
97  output += "+\n";
98  output += qual;
99  output += '\n';
100  }
101 
102  {
103  std::unique_lock<std::mutex> lock(mutex);
104  fwrite(output.c_str(), 1, output.size(), sink);
105  }
106 }
107 
108 } // namespace btllib
109 
110 #endif
btllib::SeqWriter
Definition: seq_writer.hpp:15