libwordring
coder.hpp
1 #pragma once
2 // whtawg/encoding/coder.hpp
3 
4 // https://encoding.spec.whatwg.org/
5 // https://triple-underscore.github.io/Encoding-ja.html
6 
7 #include <algorithm>
8 #include <optional>
9 #include <type_traits>
10 
11 #include <wordring/whatwg/encoding/encoding_defs.hpp>
12 #include <wordring/whatwg/encoding/indexes.hpp>
13 #include <wordring/whatwg/infra/infra.hpp>
14 
16 {
17 
18  // 5. Indexes
19 
20  template <auto const& index>
21  inline std::optional<std::uint32_t> get_index_code_point(std::uint16_t pointer)
22  {
23  if (index.size() <= pointer) return std::optional<std::uint32_t>{};
24  std::uint32_t cp = index[pointer];
25  if (cp == 4294967295u) return std::optional<std::uint32_t>{};
26  return cp;
27  }
28 
29  template <auto const& index_0, auto const& index_1>
30  inline std::optional<std::uint16_t> get_index_pointer(std::uint32_t code_point)
31  {
32  auto it = std::lower_bound(index_0.cbegin(), index_0.cend(), code_point);
33 
34  if (it != index_0.end() && *it == code_point) return *(index_1.cbegin() + (it - index_0.cbegin()));
35  return std::optional<std::uint16_t>{};
36  }
37 
38  inline std::optional<std::uint32_t> get_index_gb18030_ranges_code_point(std::uint32_t pointer)
39  {
40  // 1.
41  if ((39419u < pointer && pointer < 189000u) || 1237575u < pointer) return std::optional<std::uint32_t>{};
42  // 2.
43  if (pointer == 7457u) return std::make_optional<std::uint32_t>(0xE7C7u);
44  // 3.
45  std::multimap<std::uint32_t, std::uint32_t>::const_iterator it = index_code_point_gb18030_ranges.lower_bound(pointer);
46  // 表引きできなかった時のための追加コード
47  if (it == index_code_point_gb18030_ranges.cend())
48  {
49  assert(false);
50  return std::optional<std::uint32_t>{};
51  }
52 
53  if (it->first != pointer) --it;
54  std::uint32_t offset = it->first;
55  std::uint32_t code_point_offset = it->second;
56  // 4.
57  return std::make_optional<std::uint32_t>(code_point_offset + (pointer - offset));
58  }
59 
65  inline std::optional<std::uint32_t> get_index_gb18030_ranges_pointer(std::uint32_t code_point)
66  {
67  // 1.
68  if (code_point == 0xE7C7u) return std::make_optional<std::uint32_t>(7457u); // これは間違いではない
69 
70  // 2.
71  std::multimap<std::uint32_t, std::uint32_t>::const_iterator it = index_pointer_gb18030_ranges.lower_bound(code_point);
72  // 表引きできなかった時のための追加コード
73  if (it == index_pointer_gb18030_ranges.cend())
74  {
75  assert(false);
76  return std::optional<std::uint32_t>{};
77  }
78 
79  if (it->first != code_point) --it;
80  std::uint32_t offset = it->first;
81  std::uint32_t pointer_offset = it->second;
82 
83  // 3.
84  return std::make_optional<std::uint32_t>(pointer_offset + (code_point - offset));
85  }
86 
87  inline std::optional<std::uint16_t> get_index_shift_jis_pointer(std::uint32_t code_point)
88  {
89  return get_index_pointer<index_pointer_Shift_JIS_0, index_pointer_Shift_JIS_1>(code_point);
90  }
91 
92  inline std::optional<std::uint16_t> get_index_big5_pointer(std::uint32_t code_point)
93  {
94  return get_index_pointer<index_pointer_big5_0, index_pointer_big5_1>(code_point);
95  }
96 
97 
98  // --------------------------------------------------------------------------------------------
99  // 8. The encoding
100  //
101  // https://encoding.spec.whatwg.org/#the-encoding
102  // --------------------------------------------------------------------------------------------
103 
108  class UTF_8_decoder : public decoder
109  {
110  public:
111  UTF_8_decoder()
112  : UTF_8_code_point(0)
113  , UTF_8_bytes_seen(0)
114  , UTF_8_bytes_needed(0)
115  , UTF_8_lower_boundary(0x80u)
116  , UTF_8_upper_boundary(0xBFu)
117  {
118  }
119 
120  UTF_8_decoder(UTF_8_decoder const&) = default;
121 
122  template <typename InQueue, typename IoItem>
123  result_value run(InQueue& in, IoItem item)
124  {
125  // 1.
126  if (item.m_eoq && UTF_8_bytes_needed != 0)
127  {
128  UTF_8_bytes_needed = 0;
129  return result_error{};
130  }
131  // 2.
132  if (item.m_eoq) return result_finished{};
133 
134  std::uint8_t byte = static_cast<std::make_unsigned_t<decltype(item.m_value)>>(item.m_value);
135  // 3.
136  if (UTF_8_bytes_needed == 0)
137  {
138  if (0 <= byte && byte <= 0x7Fu) return static_cast<std::uint32_t>(byte);
139  if (0xC2u <= byte && byte <= 0xDFu)
140  {
141  UTF_8_bytes_needed = 1;
142  UTF_8_code_point = byte & 0x1Fu;
143  }
144  else if (0xE0u <= byte && byte <= 0xEFu)
145  {
146  if (byte == 0xE0u) UTF_8_lower_boundary = 0xA0u;
147  if (byte == 0xEDu) UTF_8_upper_boundary = 0x9Fu;
148  UTF_8_bytes_needed = 2;
149  UTF_8_code_point = byte & 0xFu;
150  }
151  else if (0xF0u <= byte && byte <= 0xF4u)
152  {
153  if (byte == 0xF0u) UTF_8_lower_boundary = 0x90u;
154  if (byte == 0xF4u) UTF_8_upper_boundary = 0x8Fu;
155  UTF_8_bytes_needed = 3;
156  UTF_8_code_point = byte & 0x7u;
157  }
158  else return result_error{};
159 
160  return result_continue{};
161  }
162  // 4.
163  if (!(UTF_8_lower_boundary <= byte && byte <= UTF_8_upper_boundary))
164  {
165  UTF_8_code_point = 0;
166  UTF_8_bytes_seen = 0;
167  UTF_8_bytes_needed = 0;
168  UTF_8_lower_boundary = 0x80u;
169  UTF_8_upper_boundary = 0xBFu;
170 
171  in.prepend(byte);
172 
173  return result_error{};
174  }
175  // 5.
176  UTF_8_lower_boundary = 0x80u;
177  UTF_8_upper_boundary = 0xBFu;
178  // 6.
179  UTF_8_code_point = (UTF_8_code_point << 6) | (byte & 0x3Fu);
180  // 7.
181  ++UTF_8_bytes_seen;
182  // 8.
183  if (UTF_8_bytes_seen != UTF_8_bytes_needed) return result_continue{};
184  // 9.
185  std::uint32_t cp = UTF_8_code_point;
186  // 10.
187  UTF_8_code_point = 0;
188  UTF_8_bytes_seen = 0;
189  UTF_8_bytes_needed = 0;
190  // 11.
191  return cp;
192  }
193 
194  private:
195  uint32_t UTF_8_code_point;
196  uint32_t UTF_8_bytes_seen;
197  uint32_t UTF_8_bytes_needed;
198  uint32_t UTF_8_lower_boundary;
199  uint32_t UTF_8_upper_boundary;
200  };
201 
206  class UTF_8_encoder : public encoder
207  {
208  private:
209  template <typename Array>
210  Array run(std::uint32_t cp)
211  {
212  Array result;
213  //3.
214  std::uint32_t count = result.size() - 1;
215  std::uint32_t offset = 0;
216  if (count == 1) offset = 0xC0u;
217  else if (count == 2) offset = 0xE0u;
218  else if (count == 3) offset = 0xF0u;
219  // 4.
220  result[0] = (cp >> (6 * count)) + offset;
221  // 5.
222  std::uint32_t i = 1;
223  while (0 < count)
224  {
225  std::uint32_t temp = cp >> (6 * (count - 1));
226  result[i] = 0x80u | (temp & 0x3Fu);
227  --count;
228  ++i;
229  }
230  // 6.
231  return result;
232  }
233 
234  public:
235  template <typename InQueue, typename IoItem>
236  result_value run(InQueue& in, IoItem item)
237  {
238  // 1.
239  if (item.m_eoq) return result_finished{};
240 
241  std::uint32_t cp = item.m_value;
242  // 2.
243  if (is_ascii_code_point(cp)) return static_cast<std::uint8_t>(cp);
244  // 3.
245  if (0x80u <= cp && cp <= 0x7FFu) return run<std::array<std::uint8_t, 2>>(cp);
246  else if (0x800u <= cp && cp <= 0xFFFFu) return run<std::array<std::uint8_t, 3>>(cp);
247  else if (0x10000u <= cp && cp <= 0x10FFFFu) return run<std::array<std::uint8_t, 4>>(cp);
248 
249  assert(false);
250  return result_error{};
251  }
252  };
253 
254  // --------------------------------------------------------------------------------------------
255  // 9. Legacy single-byte encodings
256  //
257  // https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
258  // --------------------------------------------------------------------------------------------
259 
264  template <auto const& index>
266  {
267  public:
268  template <typename InQueue, typename IoItem>
269  result_value run(InQueue& in, IoItem item)
270  {
271  // 1.
272  if (item.m_eoq) return result_finished{};
273 
274  std::uint8_t byte = static_cast<std::uint8_t>(item.m_value);
275  // 2.
276  if (is_ascii_byte(byte)) return static_cast<std::uint32_t>(byte);
277  // 3.
278  std::optional<std::uint32_t> cp = get_index_code_point<index>(byte - 0x80u);
279  // 4.
280  if (!cp) return result_error{};
281  // 5.
282  return cp.value();
283  }
284  };
285 
290  template <auto const& index_0, auto const& index_1>
292  {
293  public:
294  template <typename InQueue, typename IoItem>
295  result_value run(InQueue& in, IoItem item)
296  {
297  // 1.
298  if (item.m_eoq) return result_finished{};
299 
300  std::uint32_t cp = item.m_value;
301  // 2.
302  if (is_ascii_code_point(cp)) return static_cast<std::uint8_t>(cp);
303  // 3.
304  std::optional<std::uint16_t> byte = get_index_pointer<index_0, index_1>(cp);
305  // 4.
306  if(!byte) return result_error{ cp };
307  // 5.
308  return static_cast<std::uint8_t>(byte.value() + 0x80u);
309  }
310  };
311 
312  // IBM866
315 
316  // ISO_8859_2
319 
320  // ISO_8859_3
323 
324  // ISO_8859_4
327 
328  // ISO_8859_5
331 
332  // ISO_8859_6
335 
336  // ISO_8859_7
339 
340  // ISO_8859_8
343 
344  // ISO_8859_8_I
347 
348  // ISO_8859_10
351 
352  // ISO_8859_13
355 
356  // ISO_8859_14
359 
360  // ISO_8859_15
363 
364  // ISO_8859_16
367 
368  // KOI8_R
371 
372  // KOI8_U
375 
376  // macintosh
379 
380  // windows_874
383 
384  // windows_1250
387 
388  // windows_1251
391 
392  // windows_1252
395 
396  // windows_1253
399 
400  // windows_1254
403 
404  // windows_1255
407 
408  // windows_1256
411 
412  // windows_1257
415 
416  // windows_1258
419 
420  // x_mac_cyrillic
423 
424  // --------------------------------------------------------------------------------------------
425  // 10. Legacy multi-byte Chinese (simplified) encodings
426  //
427  // https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(simplified)-encodings
428  // --------------------------------------------------------------------------------------------
429 
434  class gb18030_decoder : public decoder
435  {
436  public:
438  : gb18030_first(0)
439  , gb18030_second(0)
440  , gb18030_third(0)
441  {
442  }
443 
444  template <typename InQueue, typename IoItem>
445  result_value run(InQueue& in, IoItem item)
446  {
447  // 1.
448  if (item.m_eoq)
449  {
450  if (gb18030_first == 0 && gb18030_second == 0 && gb18030_third == 0) return result_finished{};
451  // 2.
452  reset();
453  return result_error{};
454  }
455 
456  std::uint8_t byte = item.m_value;
457  // 3.
458  if (gb18030_third != 0)
459  {
460  if (!(0x30u <= byte && byte <= 0x39u))
461  {
462  std::array<std::uint8_t, 3> a = { static_cast<std::uint8_t>(gb18030_second), static_cast<std::uint8_t>(gb18030_third), byte };
463  in.prepend(a.begin(), a.end());
464  reset();
465  return result_error{};
466  }
467  std::optional<std::uint32_t> cp = get_index_gb18030_ranges_code_point(
468  ((gb18030_first - 0x81u) * 12600) + ((gb18030_second - 0x30u) * 1260) + ((gb18030_third - 0x81u) * 10) + byte - 0x30u);
469  reset();
470  if (!cp) return result_error{};
471  return cp.value();
472  }
473  // 4.
474  if (gb18030_second != 0)
475  {
476  if (0x81u <= byte && byte <= 0xFEu)
477  {
478  gb18030_third = byte;
479  return result_continue{};
480  }
481  std::array<std::uint8_t, 2> a = { static_cast<std::uint8_t>(gb18030_second), byte };
482  in.prepend(a.begin(), a.end());
483  gb18030_first = 0;
484  gb18030_second = 0;
485  return result_error{};
486  }
487  // 5.
488  if (gb18030_first != 0)
489  {
490  if (0x30u <= byte && byte <= 0x39u)
491  {
492  gb18030_second = byte;
493  return result_continue{};
494  }
495  std::uint32_t lead = gb18030_first;
496  std::optional<uint32_t> pointer{};
497  gb18030_first = 0;
498  std::uint32_t offset = (byte < 0x7Fu) ? 0x40u : 0x41u;
499  if ((0x40u <= byte && byte <= 0x7Eu) || (0x80u <= byte && byte <= 0xFEu)) pointer = (lead - 0x81) * 190 + (byte - offset);
500  std::optional<uint32_t> cp{};
501  if (pointer) cp = get_index_code_point<index_code_point_gb18030>(pointer.value());
502  if (cp) return cp.value();
503  if (is_ascii_byte(byte)) in.prepend(byte);
504  return result_error{};
505  }
506  // 6.
507  if (is_ascii_byte(byte)) return static_cast<std::uint32_t>(byte);
508  // 7.
509  if (byte == 0x80u) return static_cast<std::uint32_t>(0x20ACu);
510  // 8.
511  if (0x81u <= byte && byte <= 0xFEu)
512  {
513  gb18030_first = byte;
514  return result_continue{};
515  }
516  // 9.
517  return result_error{};
518  }
519 
520  private:
521  void reset() noexcept
522  {
523  gb18030_first = 0;
524  gb18030_second = 0;
525  gb18030_third = 0;
526  }
527 
528  private:
529  std::uint32_t gb18030_first;
530  std::uint32_t gb18030_second;
531  std::uint32_t gb18030_third;
532  };
533 
538  template <bool GBK_flag>
540  {
541  public:
542  template <typename InQueue, typename IoItem>
543  result_value run(InQueue& in, IoItem item)
544  {
545  // 1.
546  if (item.m_eoq) return result_finished{};
547 
548  std::uint32_t cp = item.m_value;
549  // 2.
550  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
551  // 3.
552  if (cp == 0xE5E5u) return result_error{ cp };
553  // 4.
554  if constexpr (GBK_flag) { if (cp == 0x20ACu) return result_byte{ 0x80u }; };
555  // 5.
556  std::optional<uint32_t> pointer{ get_index_pointer<index_pointer_gb18030_0, index_pointer_gb18030_1>(cp) };
557  // 6.
558  if (pointer.has_value())
559  {
560  std::uint32_t lead = pointer.value() / 190 + 0x81u;
561  std::uint32_t trail = pointer.value() % 190;
562  std::uint32_t offset = trail < 0x3Fu ? 0x40u : 0x41u;
563  return result_bytes_2{ static_cast<std::uint8_t>(lead), static_cast<std::uint8_t>(trail + offset) };
564  }
565  // 7.
566  if constexpr (GBK_flag) return result_error{ cp };
567  // 8.
568  pointer = get_index_gb18030_ranges_pointer(cp);
569  // 表引きできなかった時のための追加コード
570  if (!pointer.has_value())
571  {
572  assert(false);
573  return result_error{};
574  }
575  // 9.
576  std::uint8_t byte1 = static_cast<std::uint8_t>(pointer.value() / (10 * 126 * 10));
577  // 10.
578  pointer = pointer.value() % (10 * 126 * 10);
579  // 11.
580  std::uint8_t byte2 = static_cast<std::uint8_t>(pointer.value() / (10 * 126));
581  // 12.
582  pointer = pointer.value() % (10 * 126);
583  // 13.
584  std::uint8_t byte3 = static_cast<std::uint8_t>(pointer.value() / 10);
585  // 14.
586  std::uint8_t byte4 = static_cast<std::uint8_t>(pointer.value() % 10);
587  // 15.
588  return result_bytes_4{
589  static_cast<std::uint8_t>(byte1 + 0x81u),
590  static_cast<std::uint8_t>(byte2 + 0x30u),
591  static_cast<std::uint8_t>(byte3 + 0x81u),
592  static_cast<std::uint8_t>(byte4 + 0x30u) };
593  }
594  };
595 
597 
598  // GBK
601 
602  // --------------------------------------------------------------------------------------------
603  // 11. Legacy multi-byte Chinese (traditional) encodings
604  //
605  // https://encoding.spec.whatwg.org/#legacy-multi-byte-chinese-(traditional)-encodings
606  // --------------------------------------------------------------------------------------------
607 
612  class Big5_decoder : public decoder
613  {
614  public:
615  Big5_decoder() : big5_lead(0) {}
616 
617  template <typename InQueue, typename IoItem>
618  result_value run(InQueue& in, IoItem item)
619  {
620  // 1.
621  if (item.m_eoq && big5_lead != 0)
622  {
623  big5_lead = 0;
624  return result_error{};
625  }
626  // 2.
627  if (item.m_eoq) return result_finished{};
628 
629  std::uint8_t byte = item.m_value;
630  // 3.
631  if (big5_lead != 0)
632  {
633  std::uint32_t lead = big5_lead;
634  std::optional<std::uint16_t> pointer{};
635  big5_lead = 0;
636  // 3.1.
637  std::uint32_t offset = byte < 0x7Fu ? 0x40u : 0x62u;
638  // 3.2.
639  if (0x40u <= byte && byte <= 0x7Eu)
640  {
641  pointer = (lead - 0x81u) * 157 + (byte - offset);
642  // 3.3.
643  switch (pointer.value())
644  {
645  case 1133u: return result_code_points_2{ 0xCAu, 0x304u };
646  case 1135u: return result_code_points_2{ 0xCAu, 0x30Cu };
647  case 1164u: return result_code_points_2{ 0xEAu, 0x304u };
648  case 1166u: return result_code_points_2{ 0xEAu, 0x30Cu };
649  }
650  }
651  // 3.4.
652  std::optional<std::uint32_t> cp{};
653  if (pointer.has_value()) cp = get_index_code_point<index_code_point_big5>(pointer.value());
654  // 3.5.
655  if (cp.has_value()) return result_code_point{ cp.value() };
656  // 3.6.
657  if (is_ascii_byte(byte)) in.prepend(byte);
658  // 3.7.
659  return result_error{};
660  }
661  // 4.
662  if (is_ascii_byte(byte)) return result_code_point{ byte };
663  // 5.
664  if (0x81u <= byte && byte <= 0xFEu)
665  {
666  big5_lead = byte;
667  return result_continue{};
668  }
669  // 6.
670  return result_error{};
671  }
672 
673  private:
674  uint32_t big5_lead;
675  };
676 
681  class Big5_encoder : public encoder
682  {
683  public:
684  template <typename InQueue, typename IoItem>
685  result_value run(InQueue& in, IoItem item)
686  {
687  // 1.
688  if (item.m_eoq) return result_finished{};
689 
690  uint32_t cp = item.m_value;
691  // 2.
692  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
693  // 3.
694  std::optional<std::uint16_t> pointer = get_index_big5_pointer(cp);
695  // 4.
696  if (!pointer.has_value()) return result_error{ cp };
697  // 5.
698  std::uint8_t lead = static_cast<std::uint8_t>(pointer.value() / 157 + 0x81u);
699  // 6.
700  std::uint8_t trail = static_cast<std::uint8_t>(pointer.value() % 157);
701  // 7.
702  std::uint8_t offset = static_cast<std::uint8_t>(trail < 0x3Fu ? 0x40u : 0x62u);
703  // 8.
704  return result_bytes_2{ lead, static_cast<std::uint8_t>(trail + offset) };
705  }
706  };
707 
708  // --------------------------------------------------------------------------------------------
709  // 12. Legacy multi-byte Japanese encodings
710  //
711  // https://encoding.spec.whatwg.org/#legacy-multi-byte-japanese-encodings
712  // --------------------------------------------------------------------------------------------
713 
718  class EUC_JP_decoder : public decoder
719  {
720  public:
721  EUC_JP_decoder() : EUC_JP_jis0212_flag(false), EUC_JP_lead(0) {}
722 
723  template <typename InQueue, typename IoItem>
724  result_value run(InQueue& in, IoItem item)
725  {
726  // 1.
727  if (item.m_eoq && EUC_JP_lead != 0)
728  {
729  EUC_JP_lead = 0;
730  return result_error{};
731  }
732  // 2.
733  if (item.m_eoq && EUC_JP_lead == 0) return result_finished{};
734 
735  std::uint8_t byte = static_cast<uint8_t>(item.m_value);
736  // 3.
737  if (EUC_JP_lead == 0x8Eu && (0xA1u <= byte && byte <= 0xDFu))
738  {
739  EUC_JP_lead = 0;
740  return result_code_point{ 0xFF61u - 0xA1u + byte };
741  }
742  // 4.
743  if (EUC_JP_lead == 0x8Fu && (0xA1u <= byte && byte <= 0xFEu))
744  {
745  EUC_JP_jis0212_flag = true;
746  EUC_JP_lead = byte;
747  return result_continue{};
748  }
749  // 5.
750  if (EUC_JP_lead != 0)
751  {
752  std::uint8_t lead = EUC_JP_lead;
753  EUC_JP_lead = 0;
754  // 5.1.
755  std::optional<std::uint32_t> cp{};
756  // 5.2.
757  if ((0xA1u <= lead && lead <= 0xFEu) && (0xA1u <= byte && byte <= 0xFEu))
758  {
759  std::uint16_t pointer{ static_cast<std::uint16_t>((lead - 0xA1u) * 94 + byte - 0xA1u) };
760  if (EUC_JP_jis0212_flag) cp = get_index_code_point<index_code_point_jis0212>(pointer);
761  else cp = get_index_code_point<index_code_point_jis0208>(pointer);
762  }
763  // 5.3.
764  EUC_JP_jis0212_flag = false;
765  // 5.4.
766  if (cp.has_value()) return result_code_point{ cp.value() };
767  // 5.5.
768  if (is_ascii_byte(byte)) in.prepend(byte);
769  // 5.6.
770  return result_error{};
771  }
772  // 6.
773  if (is_ascii_byte(byte)) return result_code_point{ byte };
774  // 7.
775  if (byte == 0x8Eu || byte == 0x8Fu || (0xA1u <= byte && byte <= 0xFEu))
776  {
777  EUC_JP_lead = byte;
778  return result_continue{};
779  }
780  // 8.
781  return result_error{};
782  }
783 
784  private:
785  bool EUC_JP_jis0212_flag;
786  uint8_t EUC_JP_lead;
787  };
788 
793  class EUC_JP_encoder : public encoder
794  {
795  public:
796  template <typename InQueue, typename IoItem>
797  result_value run(InQueue& in, IoItem item)
798  {
799  // 1.
800  if (item.m_eoq) return result_finished{};
801 
802  std::uint32_t cp = item.m_value;
803  // 2.
804  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
805  // 3.
806  if (cp == 0xA5u) return result_byte{ 0x5Cu };
807  // 4.
808  if (cp == 0x203Eu) return result_byte{ 0x7Eu };
809  // 5.
810  if (0xFF61u <= cp && cp <= 0xFF9Fu) return result_bytes_2{ 0x8Eu, static_cast<uint8_t>(cp - 0xFF61u + 0xA1u) };
811  // 6.
812  if (cp == 0x2212u) cp = 0xFF0Du;
813  // 7.
814  std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_jis0208_0, index_pointer_jis0208_1>(cp);
815  // 8.
816  if (!pointer.has_value()) return result_error{ cp };
817  // 9.
818  std::uint8_t lead = static_cast<std::uint8_t>(pointer.value() / 94 + 0xA1u);
819  // 10.
820  std::uint8_t trail = static_cast<std::uint8_t>(pointer.value() % 94 + 0xA1u);
821  // 11.
822  return result_bytes_2{ lead, trail };
823  }
824  };
825 
826  // --------------------------------------------------------------------------------------------
827  // 12.2. ISO-2022-JP
828  //
829  // https://encoding.spec.whatwg.org/#iso-2022-jp
830  // --------------------------------------------------------------------------------------------
831 
837  {
838  private:
839  enum state : std::uint8_t
840  {
841  ASCII, Roman, katakana, Lead_byte, Trail_byte, Escape_start, Escape
842  };
843 
844  public:
846  : ISO_2022_JP_decoder_state(state::ASCII)
847  , ISO_2022_JP_decoder_output_state(state::ASCII)
848  , ISO_2022_JP_lead(0)
849  , ISO_2022_JP_output_flag(false)
850  {
851  }
852 
853  template <typename InQueue, typename IoItem>
854  result_value run(InQueue& in, IoItem item)
855  {
856  std::uint8_t byte = 0;
857 
858  switch (ISO_2022_JP_decoder_state)
859  {
860  case ASCII:
861  if (item.m_eoq) return result_finished{};
862  byte = static_cast<std::uint8_t>(item.m_value);
863  if (byte == 0x1Bu)
864  {
865  ISO_2022_JP_decoder_state = state::Escape_start;
866  return result_continue{};
867  }
868  else if (byte <= 0x7Fu && byte != 0x0Eu && byte != 0x0Fu && byte != 0x1Bu)
869  {
870  ISO_2022_JP_output_flag = false;
871  return result_code_point{ byte };
872  }
873  else
874  {
875  ISO_2022_JP_output_flag = false;
876  return result_error{};
877  }
878  break;
879  case state::Roman:
880  if (item.m_eoq) return result_finished{};
881  byte = static_cast<std::uint8_t>(item.m_value);
882  if (byte == 0x1Bu)
883  {
884  ISO_2022_JP_decoder_state = state::Escape_start;
885  return result_continue{};
886  }
887  else if (byte == 0x5Cu)
888  {
889  ISO_2022_JP_output_flag = false;
890  return result_code_point{ 0xA5u };
891  }
892  else if (byte == 0x7Eu)
893  {
894  ISO_2022_JP_output_flag = false;
895  return result_code_point{ 0x203Eu };
896  }
897  else if (byte <= 0x7Fu && byte != 0x0Eu && byte != 0x0Fu && byte != 0x1Bu && byte != 0x5Cu && byte != 0x7Eu)
898  {
899  ISO_2022_JP_output_flag = false;
900  return result_code_point{ byte };
901  }
902  else
903  {
904  ISO_2022_JP_output_flag = false;
905  return result_error{};
906  }
907  break;
908  case state::katakana:
909  if (item.m_eoq) return result_finished{};
910  byte = static_cast<std::uint8_t>(item.m_value);
911  if (byte == 0x1Bu)
912  {
913  ISO_2022_JP_decoder_state = state::Escape_start;
914  return result_continue{};
915  }
916  else if (0x21u <= byte && byte <= 0x5Fu)
917  {
918  ISO_2022_JP_output_flag = false;
919  return result_code_point{ 0xFF61u - 0x21u + byte };
920  }
921  else
922  {
923  ISO_2022_JP_output_flag = false;
924  return result_error{};
925  }
926  break;
927  case state::Lead_byte:
928  if (item.m_eoq) return result_finished{};
929  byte = static_cast<std::uint8_t>(item.m_value);
930  if (byte == 0x1Bu)
931  {
932  ISO_2022_JP_decoder_state = state::Escape_start;
933  return result_continue{};
934  }
935  else if (0x21u <= byte && byte <= 0x7Eu)
936  {
937  ISO_2022_JP_output_flag = false;
938  ISO_2022_JP_lead = byte;
939  ISO_2022_JP_decoder_state = state::Trail_byte;
940  return result_continue{};
941  }
942  else
943  {
944  ISO_2022_JP_output_flag = false;
945  return result_error{};
946  }
947  break;
948  case state::Trail_byte:
949  if (item.m_eoq)
950  {
951  ISO_2022_JP_decoder_state = state::Lead_byte;
952  IoItem tmp{};
953  tmp.m_eoq = true;
954  in.prepend(tmp); // 仕様でbyteはEOSのはず。
955  return result_error{};
956  }
957  byte = static_cast<std::uint8_t>(item.m_value);
958  if (byte == 0x1Bu)
959  {
960  ISO_2022_JP_decoder_state = state::Escape_start;
961  return result_error{};
962  }
963  else if (0x21u <= byte && byte <= 0x7Eu)
964  {
965  ISO_2022_JP_decoder_state = state::Lead_byte;
966  std::optional<std::uint16_t> pointer = static_cast<uint16_t>((ISO_2022_JP_lead - 0x21u) * 94 + byte - 0x21u);
967  std::optional<std::uint32_t> cp = get_index_code_point<index_code_point_jis0208>(pointer.value());
968  if (!cp.has_value()) return result_error{};
969  return result_code_point{ cp.value() };
970  }
971  else
972  {
973  ISO_2022_JP_decoder_state = state::Lead_byte;
974  return result_error{};
975  }
976  break;
977  case state::Escape_start:
978  if (!item.m_eoq)
979  {
980  byte = static_cast<std::uint8_t>(item.m_value);
981  if (byte == 0x24u || byte == 0x28u)
982  {
983  ISO_2022_JP_lead = byte;
984  ISO_2022_JP_decoder_state = state::Escape;
985  return result_continue{};
986  }
987  in.prepend(byte);
988  }
989  ISO_2022_JP_output_flag = false;
990  ISO_2022_JP_decoder_state = ISO_2022_JP_decoder_output_state;
991  return result_error{};
992  break;
993  case state::Escape:
994  {
995  // state::Escape になるときは必ずbyteがprependされている。
996  assert(!item.m_eoq);
997  byte = static_cast<std::uint8_t>(item.m_value);
998  std::uint8_t lead = ISO_2022_JP_lead;
999  ISO_2022_JP_lead = 0;
1000  std::optional<state> st{};
1001  if (lead == 0x28u && byte == 0x42u) st = state::ASCII;
1002  else if (lead == 0x28u && byte == 0x4Au) st = state::Roman;
1003  else if (lead == 0x28u && byte == 0x49u) st = state::katakana;
1004  else if (lead == 0x24u && (byte == 0x40u || byte == 0x42u)) st = state::Lead_byte;
1005  if (st.has_value())
1006  {
1007  ISO_2022_JP_decoder_state = st.value();
1008  ISO_2022_JP_decoder_output_state = st.value();
1009  bool output_flag = ISO_2022_JP_output_flag;
1010  ISO_2022_JP_output_flag = true;
1011  if (output_flag == false) return result_continue{};
1012  else return result_error{};
1013  }
1014  std::array<std::uint8_t, 2> a = { lead, byte };
1015  in.prepend(a.begin(), a.end());
1016  ISO_2022_JP_output_flag = false;
1017  ISO_2022_JP_decoder_state = ISO_2022_JP_decoder_output_state;
1018  return result_error{};
1019  }
1020  break;
1021  }
1022 
1023  assert(false);
1024  return result_error{};
1025  }
1026 
1027  private:
1028  state ISO_2022_JP_decoder_state;
1029  state ISO_2022_JP_decoder_output_state;
1030  uint8_t ISO_2022_JP_lead;
1031  bool ISO_2022_JP_output_flag;
1032  };
1033 
1039  {
1040  private:
1041  enum state : uint8_t
1042  {
1043  ASCII, Roman, jis0208
1044  };
1045 
1046  public:
1048  : ISO_2022_JP_encoder_state(state::ASCII)
1049  {
1050  }
1051 
1052  template <typename InQueue, typename IoItem>
1053  result_value run(InQueue& in, IoItem item)
1054  {
1055  // 1.
1056  if (item.m_eoq && ISO_2022_JP_encoder_state != state::ASCII)
1057  {
1058  IoItem c{};
1059  c.m_eoq = true;
1060  in.prepend(c);
1061  ISO_2022_JP_encoder_state = state::ASCII;
1062  return result_bytes_3{ static_cast<std::uint8_t>(0x1Bu), static_cast<std::uint8_t>(0x28u), static_cast<std::uint8_t>(0x42u) };
1063  }
1064  // 2.
1065  if (item.m_eoq && ISO_2022_JP_encoder_state == state::ASCII) return result_finished{};
1066 
1067  std::uint32_t cp = item.m_value;
1068  // 3.
1069  if ((ISO_2022_JP_encoder_state == state::ASCII || ISO_2022_JP_encoder_state == state::Roman)
1070  && (cp == 0xEu || cp == 0xFu || cp == 0x1Bu)) return result_error{ static_cast<std::uint32_t>(0xFFFDu) };
1071  // 4.
1072  if (ISO_2022_JP_encoder_state == state::ASCII && is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
1073  // 5.
1074  if(ISO_2022_JP_encoder_state == state::Roman
1075  && ((is_ascii_code_point(cp) && cp != 0x5Cu && cp != 0x7Eu) || (cp == 0xA5u || cp == 0x203Eu)))
1076  {
1077  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
1078  if (cp == 0xA5u) return result_byte{ static_cast<std::uint8_t>(0x5Cu) };
1079  if (cp == 0x203Eu) return result_byte{ static_cast<std::uint8_t>(0x7Eu) };
1080  }
1081  // 6.
1082  if (is_ascii_code_point(cp) && ISO_2022_JP_encoder_state != state::ASCII)
1083  {
1084  in.prepend(cp);
1085  ISO_2022_JP_encoder_state = state::ASCII;
1086  return result_bytes_3{ static_cast<std::uint8_t>(0x1Bu), static_cast<std::uint8_t>(0x28u), static_cast<std::uint8_t>(0x42u) };
1087  }
1088  // 7.
1089  if ((cp == 0xA5u || cp == 0x203Eu) && ISO_2022_JP_encoder_state != state::Roman)
1090  {
1091  in.prepend(cp);
1092  ISO_2022_JP_encoder_state = state::Roman;
1093  return result_bytes_3{ static_cast<uint8_t>(0x1Bu), static_cast<uint8_t>(0x28u), static_cast<uint8_t>(0x4Au) };
1094  }
1095  // 8.
1096  if (cp == 0x2212u) cp = 0xFF0Du;
1097  // 9.
1098  if (0xFF61u <= cp && cp <= 0xFF9Fu) cp = get_index_code_point<index_code_point_iso_2022_jp_katakana>(cp - 0xFF61u).value();
1099  // 10.
1100  std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_jis0208_0, index_pointer_jis0208_1>(cp);
1101  // 11.
1102  if (!pointer.has_value())
1103  {
1104  if (ISO_2022_JP_encoder_state != state::jis0208)
1105  {
1106  in.prepend(cp);
1107  ISO_2022_JP_encoder_state = state::ASCII;
1108  return result_bytes_3{ static_cast<std::uint8_t>(0x1Bu), static_cast<std::uint8_t>(0x28u), static_cast<std::uint8_t>(0x42u) };
1109  }
1110  return result_error{ cp };
1111  }
1112  // 12.
1113  if (ISO_2022_JP_encoder_state != state::jis0208)
1114  {
1115  in.prepend(cp);
1116  ISO_2022_JP_encoder_state = state::jis0208;
1117  return result_bytes_3{ static_cast<std::uint8_t>(0x1Bu), static_cast<std::uint8_t>(0x24u), static_cast<std::uint8_t>(0x42u) };
1118  }
1119  // 13.
1120  std::uint8_t lead = static_cast<std::uint8_t>(pointer.value() / 94 + 0x21u);
1121  // 14.
1122  std::uint8_t trail = static_cast<std::uint8_t>(pointer.value() % 94 + 0x21u);
1123  // 15.
1124  return result_bytes_2{ lead, trail };
1125  }
1126 
1127  private:
1128  state ISO_2022_JP_encoder_state;
1129  };
1130 
1131 
1132  // --------------------------------------------------------------------------------------------
1133  // 12.3. Shift_JIS
1134  //
1135  // https://encoding.spec.whatwg.org/#shift_jis
1136  // --------------------------------------------------------------------------------------------
1137 
1143  {
1144  public:
1146  : Shift_JIS_lead(0)
1147  {
1148  }
1149 
1150  template <typename InQueue, typename IoItem>
1151  result_value run(InQueue& in, IoItem item)
1152  {
1153  // 1.
1154  if (item.m_eoq && Shift_JIS_lead != 0)
1155  {
1156  Shift_JIS_lead = 0;
1157  return result_error{};
1158  }
1159  // 2.
1160  if (item.m_eoq && Shift_JIS_lead == 0) return result_finished{};
1161 
1162  std::uint8_t byte = item.m_value;
1163  // 3.
1164  if (Shift_JIS_lead != 0)
1165  {
1166  std::uint8_t lead = Shift_JIS_lead;
1167  std::optional<std::uint16_t> pointer{};
1168  Shift_JIS_lead = 0;
1169  // 3.1.
1170  std::uint8_t offset = byte < 0x7Fu ? 0x40u : 0x41u;
1171  // 3.2.
1172  std::uint8_t lead_offset = lead < 0xA0u ? 0x81u : 0xC1u;
1173  // 3.3.
1174  if ((0x40u <= byte && byte <= 0x7Eu) || (0x80u <= byte && byte <= 0xFCu))
1175  pointer = (lead - lead_offset) * 188 + byte - offset;
1176  // 3.4.
1177  if (pointer.has_value() && (8836 <= pointer.value() && pointer.value() <= 10715))
1178  return result_code_point{ 0xE000u - 8836 + pointer.value() };
1179  // 3.5.
1180  std::optional<uint32_t> cp{};
1181  if (pointer.has_value()) cp = get_index_code_point<index_code_point_jis0208>(pointer.value());
1182  // 3.6.
1183  if (cp.has_value()) return result_code_point{ cp.value() };
1184  // 3.7.
1185  if (is_ascii_byte(byte)) in.prepend(byte);
1186  // 3.8.
1187  return result_error{};
1188  }
1189  // 4.
1190  if (is_ascii_byte(byte) || byte == 0x80u) return result_code_point{ byte };
1191  // 5.
1192  if (0xA1u <= byte && byte <= 0xDFu) return result_code_point{ 0xFF61u - 0xA1u + byte };
1193  // 6.
1194  if ((0x81u <= byte && byte <= 0x9Fu) || (0xE0u <= byte && byte <= 0xFCu))
1195  {
1196  Shift_JIS_lead = byte;
1197  return result_continue{};
1198  }
1199  // 7.
1200  return result_error{};
1201  }
1202 
1203  private:
1204  uint8_t Shift_JIS_lead;
1205  };
1206 
1212  {
1213  public:
1214  template <typename InQueue, typename IoItem>
1215  result_value run(InQueue& in, IoItem item)
1216  {
1217  // 1.
1218  if (item.m_eoq) return result_finished{};
1219 
1220  std::uint32_t cp = item.m_value;
1221  // 2.
1222  if (is_ascii_code_point(cp) || cp == 0x80u) return result_byte{ static_cast<std::uint8_t>(cp) };
1223  // 3.
1224  if (cp == 0xA5u) return result_byte{ static_cast<std::uint8_t>(0x5Cu) };
1225  // 4.
1226  if (cp == 0x203Eu) return result_byte{ static_cast<std::uint8_t>(0x7Eu) };
1227  // 5.
1228  if (0xFF61u <= cp && cp <= 0xFF9Fu) return result_byte{ static_cast<std::uint8_t>(cp - 0xFF61u + 0xA1u) };
1229  // 6.
1230  if (cp == 0x2212u) cp = 0xFF0Du;
1231  // 7.
1232  std::optional<std::uint16_t> pointer = get_index_shift_jis_pointer(cp);
1233  // 8.
1234  if (!pointer.has_value()) return result_error{ cp };
1235  // 9.
1236  std::uint8_t lead = pointer.value() / 188;
1237  // 10.
1238  std::uint8_t lead_offset = lead < 0x1Fu ? 0x81u : 0xC1u;
1239  // 11.
1240  std::uint8_t trail = pointer.value() % 188;
1241  // 12.
1242  std::uint8_t offset = trail < 0x3Fu ? 0x40u : 0x41u;
1243  // 13.
1244  return result_bytes_2{ static_cast<std::uint8_t>(lead + lead_offset), static_cast<std::uint8_t>(trail + offset) };
1245  }
1246  };
1247 
1248  // --------------------------------------------------------------------------------------------
1249  // 13. Legacy multi-byte Korean encodings
1250  //
1251  // https://encoding.spec.whatwg.org/#legacy-multi-byte-korean-encodings
1252  // --------------------------------------------------------------------------------------------
1253 
1258  class EUC_KR_decoder : public decoder
1259  {
1260  public:
1261  EUC_KR_decoder()
1262  : EUC_KR_lead(0)
1263  {
1264  }
1265 
1266  template <typename InQueue, typename IoItem>
1267  result_value run(InQueue& in, IoItem item)
1268  {
1269  // 1.
1270  if (item.m_eoq && EUC_KR_lead != 0)
1271  {
1272  EUC_KR_lead = 0;
1273  return result_error{};
1274  }
1275  // 2.
1276  if (item.m_eoq && EUC_KR_lead == 0) return result_finished{};
1277 
1278  std::uint8_t byte = item.m_value;
1279  // 3.
1280  if (EUC_KR_lead != 0)
1281  {
1282  std::uint8_t lead = EUC_KR_lead;
1283  std::optional<std::uint16_t> pointer{};
1284  EUC_KR_lead = 0;
1285  // 3.1.
1286  if (0x41u <= byte && byte <= 0xFEu) pointer = (lead - 0x81u) * 190 + (byte - 0x41u);
1287  // 3.2.
1288  std::optional<uint32_t> cp{};
1289  if (pointer.has_value()) cp = get_index_code_point<index_code_point_euc_kr>(pointer.value());
1290  // 3.3.
1291  if (cp.has_value()) return result_code_point{ cp.value() };
1292  // 3.4.
1293  if (is_ascii_byte(byte)) in.prepend(byte);
1294  // 3.5.
1295  return result_error{};
1296  }
1297  // 4.
1298  if (is_ascii_byte(byte)) return result_code_point{ byte };
1299  // 5.
1300  if (0x81u <= byte && byte <= 0xFEu)
1301  {
1302  EUC_KR_lead = byte;
1303  return result_continue{};
1304  }
1305  // 6.
1306  return result_error{};
1307  }
1308 
1309  private:
1310  std::uint8_t EUC_KR_lead;
1311  };
1312 
1317  class EUC_KR_encoder : public encoder
1318  {
1319  public:
1320  template <typename InQueue, typename IoItem>
1321  result_value run(InQueue& in, IoItem item)
1322  {
1323  // 1.
1324  if (item.m_eoq) return result_finished{};
1325 
1326  std::uint32_t cp = item.m_value;
1327  // 2.
1328  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
1329  // 3.
1330  std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_euc_kr_0, index_pointer_euc_kr_1>(cp);
1331  // 4.
1332  if (!pointer.has_value()) return result_error{ cp };
1333  // 5.
1334  std::uint8_t lead = pointer.value() / 190 + 0x81u;
1335  // 6.
1336  std::uint8_t trail = pointer.value() % 190 + 0x41u;
1337  // 7.
1338  return result_bytes_2{ lead, trail };
1339  }
1340  };
1341 
1342  // --------------------------------------------------------------------------------------------
1343  // 14. Legacy miscellaneous encodings
1344  //
1345  // https://encoding.spec.whatwg.org/#legacy-miscellaneous-encodings
1346  // --------------------------------------------------------------------------------------------
1347 
1353  {
1354  public:
1356  : replacement_error_returned_flag(false)
1357  {
1358  }
1359 
1360  template <typename InQueue, typename IoItem>
1361  result_value run(InQueue& in, IoItem item)
1362  {
1363  // 1.
1364  if (item.m_eoq) return result_finished{};
1365  // 2.
1366  if (replacement_error_returned_flag == false)
1367  {
1368  replacement_error_returned_flag = true;
1369  return result_error{};
1370  }
1371  // 3.
1372  return result_finished{};
1373  }
1374 
1375  private:
1376  bool replacement_error_returned_flag;
1377  };
1378 
1383  template<bool UTF_16BE_decoder_flag = false>
1385  {
1386  public:
1387  template <typename InQueue, typename IoItem>
1388  result_value run(InQueue& in, IoItem item)
1389  {
1390  // 1.
1391  if (item.m_eoq && (UTF_16_lead_byte.has_value() || UTF_16_lead_surrogate.has_value()))
1392  {
1393  UTF_16_lead_byte.reset();
1394  UTF_16_lead_surrogate.reset();
1395  return result_error{};
1396  }
1397  // 2.
1398  if (item.m_eoq && !UTF_16_lead_byte.has_value() && !UTF_16_lead_surrogate.has_value())
1399  {
1400  return result_finished{};
1401  }
1402 
1403  std::uint8_t byte = item.m_value;
1404  // 3.
1405  if (!UTF_16_lead_byte.has_value())
1406  {
1407  UTF_16_lead_byte = byte;
1408  return result_continue{};
1409  }
1410  // 4.
1411  std::uint16_t code_unit = 0;
1412  if constexpr (UTF_16BE_decoder_flag) code_unit = (UTF_16_lead_byte.value() << 8) + byte;
1413  else code_unit = (byte << 8) + UTF_16_lead_byte.value();
1414  UTF_16_lead_byte.reset();
1415  // 5.
1416  if (UTF_16_lead_surrogate.has_value())
1417  {
1418  std::uint16_t lead_surrogate = UTF_16_lead_surrogate.value();
1419  UTF_16_lead_surrogate.reset();
1420  // 5.1.
1421  if (0xDC00u <= code_unit && code_unit <= 0xDFFFu)
1422  return result_code_point{ 0x10000u + ((lead_surrogate - 0xD800u) << 10) + (code_unit - 0xDC00u) };
1423  // 5.2.
1424  std::uint8_t byte1 = code_unit > 8;
1425  // 5.3.
1426  std::uint8_t byte2 = code_unit & 0x00FF;
1427  // 5.4.
1428  std::array<std::uint8_t, 2> bytes{};
1429  if constexpr (UTF_16BE_decoder_flag)
1430  {
1431  bytes[0] = byte1;
1432  bytes[1] = byte2;
1433  }
1434  else
1435  {
1436  bytes[0] = byte2;
1437  bytes[1] = byte1;
1438  }
1439  // 5.5.
1440  in.prepend(bytes.begin(), bytes.end());
1441  return result_error{};
1442  }
1443  // 6.
1444  if (0xD800u <= code_unit && code_unit <= 0xDBFFu)
1445  {
1446  UTF_16_lead_surrogate = code_unit;
1447  return result_continue{};
1448  }
1449  // 7.
1450  if (0xDC00u <= code_unit && code_unit <= 0xDFFFu) return result_error{};
1451  // 8.
1452  return result_code_point{ code_unit };
1453  }
1454 
1455  private:
1456  std::optional<std::uint8_t> UTF_16_lead_byte;
1457  std::optional<std::uint16_t> UTF_16_lead_surrogate;
1458  };
1459 
1462 
1468  {
1469  public:
1470  template <typename InQueue, typename IoItem>
1471  result_value run(InQueue& in, IoItem item)
1472  {
1473  // 1.
1474  if (item.m_eoq) return result_finished{};
1475 
1476  std::uint8_t byte = item.m_value;
1477  // 2.
1478  if (is_ascii_byte(byte)) return result_code_point{ byte };
1479  // 3.
1480  return result_code_point{ 0xF780u + byte - 0x80u };
1481  }
1482  };
1483 
1489  {
1490  public:
1491  template <typename InQueue, typename IoItem>
1492  result_value run(InQueue& in, IoItem item)
1493  {
1494  // 1.
1495  if (item.m_eoq) return result_finished{};
1496 
1497  std::uint32_t cp = item.m_value;
1498  // 2.
1499  if (is_ascii_code_point(cp)) return result_byte{ static_cast<std::uint8_t>(cp) };
1500  // 3.
1501  if (0xF780u <= cp && cp <= 0xF7FFu) return result_byte{ static_cast<std::uint8_t>(cp - 0xF780u + 0x80u) };
1502  // 4.
1503  return result_error{ cp };
1504  }
1505  };
1506 
1507  class error_decoder : public decoder
1508  {
1509  public:
1510  template <typename InQueue, typename IoItem>
1511  result_value run(InQueue& in, IoItem item) { return result_error{}; }
1512  };
1513 
1514  class error_encoder : public encoder
1515  {
1516  public:
1517  template <typename InQueue, typename IoItem>
1518  result_value run(InQueue& in, IoItem item) { return result_error{}; }
1519  };
1520 
1521  using coder = std::variant<
1522  error_encoder,
1523  error_decoder,
1524  UTF_8_decoder,
1525  UTF_8_encoder,
1582  GBK_encoder,
1583  Big5_decoder,
1584  Big5_encoder,
1598 }
wordring::whatwg::encoding::error_decoder
Definition: coder.hpp:1507
wordring::whatwg::encoding::x_user_defined_decoder
Definition: coder.hpp:1467
wordring::whatwg::encoding::EUC_KR_decoder
Definition: coder.hpp:1258
wordring::whatwg::encoding::EUC_JP_decoder
Definition: coder.hpp:718
wordring::whatwg::encoding::basic_gb18030_encoder
Definition: coder.hpp:539
wordring::whatwg::encoding::single_byte_decoder
single-byte decoder
Definition: coder.hpp:265
wordring::whatwg::encoding
wordring::whatwg::encoding::EUC_JP_encoder
Definition: coder.hpp:793
wordring::whatwg::encoding::single_byte_encoder
single-byte encoder
Definition: coder.hpp:291
wordring::whatwg::encoding::UTF_8_decoder
UTF-8 decoder
Definition: coder.hpp:108
wordring::whatwg::encoding::UTF_8_encoder
UTF-8 encoder
Definition: coder.hpp:206
wordring::whatwg::encoding::Big5_decoder
Definition: coder.hpp:612
wordring::whatwg::encoding::shared_UTF_16_decoder
Definition: coder.hpp:1384
wordring::whatwg::encoding::encoder
Definition: whatwg/encoding/encoding_defs.hpp:14
wordring::whatwg::encoding::EUC_KR_encoder
Definition: coder.hpp:1317
wordring::whatwg::encoding::decoder
Definition: whatwg/encoding/encoding_defs.hpp:15
wordring::whatwg::encoding::result_finished
Definition: whatwg/encoding/encoding_defs.hpp:17
wordring::whatwg::encoding::x_user_defined_encoder
Definition: coder.hpp:1488
wordring::whatwg::encoding::Shift_JIS_decoder
Shift_JIS decoder
Definition: coder.hpp:1142
wordring::whatwg::encoding::get_index_gb18030_ranges_pointer
std::optional< std::uint32_t > get_index_gb18030_ranges_pointer(std::uint32_t code_point)
Definition: coder.hpp:65
wordring::whatwg::encoding::error_encoder
Definition: coder.hpp:1514
wordring::whatwg::encoding::Shift_JIS_encoder
Definition: coder.hpp:1211
wordring::whatwg::encoding::replacement_decoder
Definition: coder.hpp:1352
wordring::whatwg::encoding::gb18030_decoder
gb18030 decoder
Definition: coder.hpp:434
wordring::whatwg::encoding::result_error
Definition: whatwg/encoding/encoding_defs.hpp:28
wordring::whatwg::encoding::result_continue
Definition: whatwg/encoding/encoding_defs.hpp:18
wordring::whatwg::encoding::ISO_2022_JP_encoder
Definition: coder.hpp:1038
wordring::whatwg::encoding::Big5_encoder
Definition: coder.hpp:681
wordring::whatwg::encoding::ISO_2022_JP_decoder
Definition: coder.hpp:836