libwordring
whatwg/encoding/api.hpp
1 #pragma once
2 
3 // ------------------------------------------------------------------------------------------------
4 // 7. API
5 //
6 // https://encoding.spec.whatwg.org/#api
7 // https://triple-underscore.github.io/Encoding-ja.html#api
8 // ------------------------------------------------------------------------------------------------
9 
10 #include <wordring/whatwg/encoding/encoding.hpp>
11 #include <wordring/whatwg/encoding/encoding_defs.hpp>
12 
13 #include <wordring/whatwg/infra/infra.hpp>
14 #include <wordring/whatwg/infra/unicode.hpp>
15 
16 #include <stdexcept>
17 #include <string>
18 
20 {
21  class text_decoder_common;
22 
23  std::u32string serialize_io_queue(text_decoder_common& c, io_queue<char32_t>& in);
24 
30  {
32 
33  public:
41  std::u32string encoding() const
42  {
43  return get_encoding_label(m_encoding);
44  }
45 
50  bool fatal() const { return m_error_mode == error_mode_name::Fatal; }
51 
56  bool ignore_bom() const { return m_ignore_bom; }
57 
58  protected:
59  encoding_name m_encoding = static_cast<encoding_name>(0);
60  coder m_decoder;
61  io_queue<char> m_io_queue;
62  bool m_ignore_bom = false;
63  bool m_bom_seen = false;
64  error_mode_name m_error_mode = error_mode_name::Replacement;
65  };
66 
72  {
73  // 1.
74  std::u32string output;
75  // 2.
76  while (true)
77  {
78  io_result<io_item<char32_t>> item = in.read();
79  assert(!item.m_wait);
80  if (item->m_eoq) return output;
81  if ( (c.m_encoding == encoding_name::UTF_8 || c.m_encoding == encoding_name::UTF_16BE || c.m_encoding == encoding_name::UTF_16LE)
82  && (c.m_ignore_bom == false)
83  && (c.m_bom_seen == false))
84  {
85  c.m_bom_seen = true;
86  if (item->m_value == U'\xFEFF') continue;
87  }
88  output.push_back(item->m_value);
89  }
90  assert(false);
91  return std::u32string{};
92  }
93 
99  {
100  public:
101  text_decoder(encoding_name name, bool fatal, bool ignore_bom)
102  {
103  // 1.
104  encoding_name encoding = name;
105  // 2.
106  if (encoding == static_cast<encoding_name>(0) || encoding == encoding_name::replacement) throw std::range_error("bad encoding label.");
107  // 3.
108  m_encoding = encoding;
109  // 4.
110  if (fatal == true) m_error_mode = error_mode_name::Fatal;
111  // 5.
112  m_ignore_bom = ignore_bom;
113  }
114 
119  explicit text_decoder(std::u32string_view label = U"utf-8", bool fatal = false, bool ignore_bom = false)
120  {
121  // 1.
122  encoding_name encoding = get_encoding_name(label);
123  // 2.
124  if (encoding == static_cast<encoding_name>(0) || encoding == encoding_name::replacement) throw std::range_error("bad encoding label.");
125  // 3.
126  m_encoding = encoding;
127  // 4.
128  if (fatal == true) m_error_mode = error_mode_name::Fatal;
129  // 5.
130  m_ignore_bom = ignore_bom;
131  }
132 
139  template <typename InputIterator = std::nullptr_t>
140  std::u32string decode(InputIterator first = nullptr, InputIterator last = nullptr, bool stream = false)
141  {
142  // 1.
143  if (m_do_not_flush == false)
144  {
145  m_decoder = get_encoding_decoder(m_encoding);
146  m_io_queue = io_queue<char>{};
147  io_item<char> eoq{ '\0', true };
148  m_io_queue.push(eoq);
149  m_bom_seen = false;
150  }
151  // 2.
152  m_do_not_flush = stream;
153  // 3.
154  if constexpr (std::negation_v<std::is_same<InputIterator, std::nullptr_t>>)
155  {
156  m_io_queue.push(first, last);
157  }
158  // 4.
159  io_queue<char32_t> output;
160  io_item<char32_t> eoq{ U'\0', true };
161  output.push(eoq);
162  // 5.
163  while (true)
164  {
165  // 5.1.
166  io_result<io_item<char>> item = m_io_queue.read();
167  // 5.2.
168  if (item->m_eoq && m_do_not_flush) return serialize_io_queue(*this, output);
169  // 5.3.
170  else
171  {
172  result_value rv = process_item(*item, m_decoder, m_io_queue, output, m_error_mode);
173  if (std::holds_alternative<result_finished>(rv)) return serialize_io_queue(*this, output);
174  if (std::holds_alternative<result_error>(rv)) throw std::range_error("decode error.");
175  }
176  }
177  return U"";
178  }
179 
180  protected:
181  bool m_do_not_flush = false;
182  };
183 }
wordring::whatwg::encoding::serialize_io_queue
std::u32string serialize_io_queue(text_decoder_common &c, io_queue< char32_t > &in)
IO キューを直列化する
Definition: whatwg/encoding/api.hpp:71
wordring::whatwg::encoding::process_item
result_value process_item(IoItem, Coder &, InQueue &, OutQueue &, error_mode_name)
文字アイテムを処理する
Definition: whatwg/encoding/encoding.hpp:62
wordring::whatwg::encoding
wordring::whatwg::encoding::io_item
文字にキュー終端表現を追加するラッパー
Definition: terminology.hpp:26
wordring::whatwg::encoding::text_decoder_common::fatal
bool fatal() const
Fatal フラグ
Definition: whatwg/encoding/api.hpp:50
wordring::whatwg::encoding::text_decoder::text_decoder
text_decoder(std::u32string_view label=U"utf-8", bool fatal=false, bool ignore_bom=false)
構築
Definition: whatwg/encoding/api.hpp:119
wordring::whatwg::encoding::text_decoder
TextDecoderCommon
Definition: whatwg/encoding/api.hpp:98
wordring::whatwg::encoding::text_decoder_common
TextDecoderCommon
Definition: whatwg/encoding/api.hpp:29
wordring::whatwg::encoding::io_queue::push
void push(value_type item)
Push
Definition: terminology.hpp:152
wordring::whatwg::encoding::text_decoder_common::encoding
std::u32string encoding() const
エンコーディング名
Definition: whatwg/encoding/api.hpp:41
wordring::whatwg::encoding::io_queue
入出力ストリーム
Definition: terminology.hpp:55
wordring::whatwg::encoding::text_decoder::decode
std::u32string decode(InputIterator first=nullptr, InputIterator last=nullptr, bool stream=false)
デコード
Definition: whatwg/encoding/api.hpp:140
wordring::whatwg::encoding::io_queue::read
io_result< value_type > read()
値を一つ読み取る
Definition: terminology.hpp:79
wordring::whatwg::encoding::text_decoder_common::serialize_io_queue
friend std::u32string serialize_io_queue(text_decoder_common &, io_queue< char32_t > &)
IO キューを直列化する
Definition: whatwg/encoding/api.hpp:71
wordring::whatwg::encoding::text_decoder_common::ignore_bom
bool ignore_bom() const
BOM 無視フラグ
Definition: whatwg/encoding/api.hpp:56
wordring::whatwg::encoding::io_result
待機状態を追加するラッパー
Definition: terminology.hpp:37
wordring::whatwg::encoding::get_encoding_label
std::u32string get_encoding_label(encoding_name name)
エンコーディングに対応するラベルを返す
Definition: whatwg/encoding/encoding.hpp:171
wordring::whatwg::encoding::get_encoding_decoder
coder get_encoding_decoder(encoding_name name)
Definition: whatwg/encoding/encoding.hpp:224