11 #include <wordring/whatwg/encoding/encoding_defs.hpp>
12 #include <wordring/whatwg/encoding/indexes.hpp>
13 #include <wordring/whatwg/infra/infra.hpp>
20 template <auto const& index>
21 inline std::optional<std::uint32_t> get_index_code_point(std::uint16_t pointer)
23 if (index.size() <= pointer)
return std::optional<std::uint32_t>{};
24 std::uint32_t cp = index[pointer];
25 if (cp == 4294967295u)
return std::optional<std::uint32_t>{};
29 template <auto const& index_0, auto const& index_1>
30 inline std::optional<std::uint16_t> get_index_pointer(std::uint32_t code_point)
32 auto it = std::lower_bound(index_0.cbegin(), index_0.cend(), code_point);
34 if (it != index_0.end() && *it == code_point)
return *(index_1.cbegin() + (it - index_0.cbegin()));
35 return std::optional<std::uint16_t>{};
38 inline std::optional<std::uint32_t> get_index_gb18030_ranges_code_point(std::uint32_t pointer)
41 if ((39419u < pointer && pointer < 189000u) || 1237575u < pointer)
return std::optional<std::uint32_t>{};
43 if (pointer == 7457u)
return std::make_optional<std::uint32_t>(0xE7C7u);
45 std::multimap<std::uint32_t, std::uint32_t>::const_iterator it = index_code_point_gb18030_ranges.lower_bound(pointer);
47 if (it == index_code_point_gb18030_ranges.cend())
50 return std::optional<std::uint32_t>{};
53 if (it->first != pointer) --it;
54 std::uint32_t offset = it->first;
55 std::uint32_t code_point_offset = it->second;
57 return std::make_optional<std::uint32_t>(code_point_offset + (pointer - offset));
68 if (code_point == 0xE7C7u)
return std::make_optional<std::uint32_t>(7457u);
71 std::multimap<std::uint32_t, std::uint32_t>::const_iterator it = index_pointer_gb18030_ranges.lower_bound(code_point);
73 if (it == index_pointer_gb18030_ranges.cend())
76 return std::optional<std::uint32_t>{};
79 if (it->first != code_point) --it;
80 std::uint32_t offset = it->first;
81 std::uint32_t pointer_offset = it->second;
84 return std::make_optional<std::uint32_t>(pointer_offset + (code_point - offset));
87 inline std::optional<std::uint16_t> get_index_shift_jis_pointer(std::uint32_t code_point)
89 return get_index_pointer<index_pointer_Shift_JIS_0, index_pointer_Shift_JIS_1>(code_point);
92 inline std::optional<std::uint16_t> get_index_big5_pointer(std::uint32_t code_point)
94 return get_index_pointer<index_pointer_big5_0, index_pointer_big5_1>(code_point);
112 : UTF_8_code_point(0)
113 , UTF_8_bytes_seen(0)
114 , UTF_8_bytes_needed(0)
115 , UTF_8_lower_boundary(0x80u)
116 , UTF_8_upper_boundary(0xBFu)
122 template <
typename InQueue,
typename IoItem>
123 result_value run(InQueue& in, IoItem item)
126 if (item.m_eoq && UTF_8_bytes_needed != 0)
128 UTF_8_bytes_needed = 0;
134 std::uint8_t
byte =
static_cast<std::make_unsigned_t<decltype(item.m_value)
>>(item.m_value);
136 if (UTF_8_bytes_needed == 0)
138 if (0 <=
byte &&
byte <= 0x7Fu)
return static_cast<std::uint32_t
>(byte);
139 if (0xC2u <=
byte &&
byte <= 0xDFu)
141 UTF_8_bytes_needed = 1;
142 UTF_8_code_point =
byte & 0x1Fu;
144 else if (0xE0u <=
byte &&
byte <= 0xEFu)
146 if (
byte == 0xE0u) UTF_8_lower_boundary = 0xA0u;
147 if (
byte == 0xEDu) UTF_8_upper_boundary = 0x9Fu;
148 UTF_8_bytes_needed = 2;
149 UTF_8_code_point =
byte & 0xFu;
151 else if (0xF0u <=
byte &&
byte <= 0xF4u)
153 if (
byte == 0xF0u) UTF_8_lower_boundary = 0x90u;
154 if (
byte == 0xF4u) UTF_8_upper_boundary = 0x8Fu;
155 UTF_8_bytes_needed = 3;
156 UTF_8_code_point =
byte & 0x7u;
163 if (!(UTF_8_lower_boundary <=
byte &&
byte <= UTF_8_upper_boundary))
165 UTF_8_code_point = 0;
166 UTF_8_bytes_seen = 0;
167 UTF_8_bytes_needed = 0;
168 UTF_8_lower_boundary = 0x80u;
169 UTF_8_upper_boundary = 0xBFu;
176 UTF_8_lower_boundary = 0x80u;
177 UTF_8_upper_boundary = 0xBFu;
179 UTF_8_code_point = (UTF_8_code_point << 6) | (
byte & 0x3Fu);
185 std::uint32_t cp = UTF_8_code_point;
187 UTF_8_code_point = 0;
188 UTF_8_bytes_seen = 0;
189 UTF_8_bytes_needed = 0;
195 uint32_t UTF_8_code_point;
196 uint32_t UTF_8_bytes_seen;
197 uint32_t UTF_8_bytes_needed;
198 uint32_t UTF_8_lower_boundary;
199 uint32_t UTF_8_upper_boundary;
209 template <
typename Array>
210 Array run(std::uint32_t cp)
214 std::uint32_t count = result.size() - 1;
215 std::uint32_t offset = 0;
216 if (count == 1) offset = 0xC0u;
217 else if (count == 2) offset = 0xE0u;
218 else if (count == 3) offset = 0xF0u;
220 result[0] = (cp >> (6 * count)) + offset;
225 std::uint32_t temp = cp >> (6 * (count - 1));
226 result[i] = 0x80u | (temp & 0x3Fu);
235 template <
typename InQueue,
typename IoItem>
236 result_value run(InQueue& in, IoItem item)
241 std::uint32_t cp = item.m_value;
243 if (is_ascii_code_point(cp))
return static_cast<std::uint8_t
>(cp);
245 if (0x80u <= cp && cp <= 0x7FFu)
return run<std::array<std::uint8_t, 2>>(cp);
246 else if (0x800u <= cp && cp <= 0xFFFFu)
return run<std::array<std::uint8_t, 3>>(cp);
247 else if (0x10000u <= cp && cp <= 0x10FFFFu)
return run<std::array<std::uint8_t, 4>>(cp);
264 template <auto const& index>
268 template <
typename InQueue,
typename IoItem>
269 result_value run(InQueue& in, IoItem item)
274 std::uint8_t
byte =
static_cast<std::uint8_t
>(item.m_value);
276 if (is_ascii_byte(
byte))
return static_cast<std::uint32_t
>(
byte);
278 std::optional<std::uint32_t> cp = get_index_code_point<index>(
byte - 0x80u);
290 template <auto const& index_0, auto const& index_1>
294 template <
typename InQueue,
typename IoItem>
295 result_value run(InQueue& in, IoItem item)
300 std::uint32_t cp = item.m_value;
302 if (is_ascii_code_point(cp))
return static_cast<std::uint8_t
>(cp);
304 std::optional<std::uint16_t>
byte = get_index_pointer<index_0, index_1>(cp);
308 return static_cast<std::uint8_t
>(
byte.value() + 0x80u);
444 template <
typename InQueue,
typename IoItem>
445 result_value run(InQueue& in, IoItem item)
450 if (gb18030_first == 0 && gb18030_second == 0 && gb18030_third == 0)
return result_finished{};
456 std::uint8_t
byte = item.m_value;
458 if (gb18030_third != 0)
460 if (!(0x30u <=
byte &&
byte <= 0x39u))
462 std::array<std::uint8_t, 3> a = {
static_cast<std::uint8_t
>(gb18030_second),
static_cast<std::uint8_t
>(gb18030_third),
byte };
463 in.prepend(a.begin(), a.end());
467 std::optional<std::uint32_t> cp = get_index_gb18030_ranges_code_point(
468 ((gb18030_first - 0x81u) * 12600) + ((gb18030_second - 0x30u) * 1260) + ((gb18030_third - 0x81u) * 10) +
byte - 0x30u);
474 if (gb18030_second != 0)
476 if (0x81u <=
byte &&
byte <= 0xFEu)
478 gb18030_third = byte;
481 std::array<std::uint8_t, 2> a = {
static_cast<std::uint8_t
>(gb18030_second),
byte };
482 in.prepend(a.begin(), a.end());
488 if (gb18030_first != 0)
490 if (0x30u <=
byte &&
byte <= 0x39u)
492 gb18030_second = byte;
495 std::uint32_t lead = gb18030_first;
496 std::optional<uint32_t> pointer{};
498 std::uint32_t offset = (
byte < 0x7Fu) ? 0x40u : 0x41u;
499 if ((0x40u <=
byte &&
byte <= 0x7Eu) || (0x80u <=
byte &&
byte <= 0xFEu)) pointer = (lead - 0x81) * 190 + (
byte - offset);
500 std::optional<uint32_t> cp{};
501 if (pointer) cp = get_index_code_point<index_code_point_gb18030>(pointer.value());
502 if (cp)
return cp.value();
503 if (is_ascii_byte(
byte)) in.prepend(
byte);
507 if (is_ascii_byte(
byte))
return static_cast<std::uint32_t
>(byte);
509 if (
byte == 0x80u)
return static_cast<std::uint32_t
>(0x20ACu);
511 if (0x81u <=
byte &&
byte <= 0xFEu)
513 gb18030_first = byte;
521 void reset() noexcept
529 std::uint32_t gb18030_first;
530 std::uint32_t gb18030_second;
531 std::uint32_t gb18030_third;
538 template <
bool GBK_flag>
542 template <
typename InQueue,
typename IoItem>
543 result_value run(InQueue& in, IoItem item)
548 std::uint32_t cp = item.m_value;
550 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
554 if constexpr (GBK_flag) {
if (cp == 0x20ACu)
return result_byte{ 0x80u }; };
556 std::optional<uint32_t> pointer{ get_index_pointer<index_pointer_gb18030_0, index_pointer_gb18030_1>(cp) };
558 if (pointer.has_value())
560 std::uint32_t lead = pointer.value() / 190 + 0x81u;
561 std::uint32_t trail = pointer.value() % 190;
562 std::uint32_t offset = trail < 0x3Fu ? 0x40u : 0x41u;
563 return result_bytes_2{
static_cast<std::uint8_t
>(lead),
static_cast<std::uint8_t
>(trail + offset) };
570 if (!pointer.has_value())
576 std::uint8_t byte1 =
static_cast<std::uint8_t
>(pointer.value() / (10 * 126 * 10));
578 pointer = pointer.value() % (10 * 126 * 10);
580 std::uint8_t byte2 =
static_cast<std::uint8_t
>(pointer.value() / (10 * 126));
582 pointer = pointer.value() % (10 * 126);
584 std::uint8_t byte3 =
static_cast<std::uint8_t
>(pointer.value() / 10);
586 std::uint8_t byte4 =
static_cast<std::uint8_t
>(pointer.value() % 10);
588 return result_bytes_4{
589 static_cast<std::uint8_t
>(byte1 + 0x81u),
590 static_cast<std::uint8_t
>(byte2 + 0x30u),
591 static_cast<std::uint8_t
>(byte3 + 0x81u),
592 static_cast<std::uint8_t
>(byte4 + 0x30u) };
617 template <
typename InQueue,
typename IoItem>
618 result_value run(InQueue& in, IoItem item)
621 if (item.m_eoq && big5_lead != 0)
629 std::uint8_t
byte = item.m_value;
633 std::uint32_t lead = big5_lead;
634 std::optional<std::uint16_t> pointer{};
637 std::uint32_t offset =
byte < 0x7Fu ? 0x40u : 0x62u;
639 if (0x40u <=
byte &&
byte <= 0x7Eu)
641 pointer = (lead - 0x81u) * 157 + (
byte - offset);
643 switch (pointer.value())
645 case 1133u:
return result_code_points_2{ 0xCAu, 0x304u };
646 case 1135u:
return result_code_points_2{ 0xCAu, 0x30Cu };
647 case 1164u:
return result_code_points_2{ 0xEAu, 0x304u };
648 case 1166u:
return result_code_points_2{ 0xEAu, 0x30Cu };
652 std::optional<std::uint32_t> cp{};
653 if (pointer.has_value()) cp = get_index_code_point<index_code_point_big5>(pointer.value());
655 if (cp.has_value())
return result_code_point{ cp.value() };
657 if (is_ascii_byte(
byte)) in.prepend(
byte);
662 if (is_ascii_byte(
byte))
return result_code_point{
byte };
664 if (0x81u <=
byte &&
byte <= 0xFEu)
684 template <
typename InQueue,
typename IoItem>
685 result_value run(InQueue& in, IoItem item)
690 uint32_t cp = item.m_value;
692 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
694 std::optional<std::uint16_t> pointer = get_index_big5_pointer(cp);
698 std::uint8_t lead =
static_cast<std::uint8_t
>(pointer.value() / 157 + 0x81u);
700 std::uint8_t trail =
static_cast<std::uint8_t
>(pointer.value() % 157);
702 std::uint8_t offset =
static_cast<std::uint8_t
>(trail < 0x3Fu ? 0x40u : 0x62u);
704 return result_bytes_2{ lead,
static_cast<std::uint8_t
>(trail + offset) };
723 template <
typename InQueue,
typename IoItem>
724 result_value run(InQueue& in, IoItem item)
727 if (item.m_eoq && EUC_JP_lead != 0)
735 std::uint8_t
byte =
static_cast<uint8_t
>(item.m_value);
737 if (EUC_JP_lead == 0x8Eu && (0xA1u <=
byte &&
byte <= 0xDFu))
740 return result_code_point{ 0xFF61u - 0xA1u +
byte };
743 if (EUC_JP_lead == 0x8Fu && (0xA1u <=
byte &&
byte <= 0xFEu))
745 EUC_JP_jis0212_flag =
true;
750 if (EUC_JP_lead != 0)
752 std::uint8_t lead = EUC_JP_lead;
755 std::optional<std::uint32_t> cp{};
757 if ((0xA1u <= lead && lead <= 0xFEu) && (0xA1u <=
byte &&
byte <= 0xFEu))
759 std::uint16_t pointer{
static_cast<std::uint16_t
>((lead - 0xA1u) * 94 +
byte - 0xA1u) };
760 if (EUC_JP_jis0212_flag) cp = get_index_code_point<index_code_point_jis0212>(pointer);
761 else cp = get_index_code_point<index_code_point_jis0208>(pointer);
764 EUC_JP_jis0212_flag =
false;
766 if (cp.has_value())
return result_code_point{ cp.value() };
768 if (is_ascii_byte(
byte)) in.prepend(
byte);
773 if (is_ascii_byte(
byte))
return result_code_point{
byte };
775 if (
byte == 0x8Eu ||
byte == 0x8Fu || (0xA1u <=
byte &&
byte <= 0xFEu))
785 bool EUC_JP_jis0212_flag;
796 template <
typename InQueue,
typename IoItem>
797 result_value run(InQueue& in, IoItem item)
802 std::uint32_t cp = item.m_value;
804 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
806 if (cp == 0xA5u)
return result_byte{ 0x5Cu };
808 if (cp == 0x203Eu)
return result_byte{ 0x7Eu };
810 if (0xFF61u <= cp && cp <= 0xFF9Fu)
return result_bytes_2{ 0x8Eu,
static_cast<uint8_t
>(cp - 0xFF61u + 0xA1u) };
812 if (cp == 0x2212u) cp = 0xFF0Du;
814 std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_jis0208_0, index_pointer_jis0208_1>(cp);
818 std::uint8_t lead =
static_cast<std::uint8_t
>(pointer.value() / 94 + 0xA1u);
820 std::uint8_t trail =
static_cast<std::uint8_t
>(pointer.value() % 94 + 0xA1u);
822 return result_bytes_2{ lead, trail };
839 enum state : std::uint8_t
841 ASCII, Roman, katakana, Lead_byte, Trail_byte, Escape_start, Escape
846 : ISO_2022_JP_decoder_state(state::ASCII)
847 , ISO_2022_JP_decoder_output_state(state::ASCII)
848 , ISO_2022_JP_lead(0)
849 , ISO_2022_JP_output_flag(
false)
853 template <
typename InQueue,
typename IoItem>
854 result_value run(InQueue& in, IoItem item)
856 std::uint8_t
byte = 0;
858 switch (ISO_2022_JP_decoder_state)
862 byte =
static_cast<std::uint8_t
>(item.m_value);
865 ISO_2022_JP_decoder_state = state::Escape_start;
868 else if (
byte <= 0x7Fu &&
byte != 0x0Eu &&
byte != 0x0Fu &&
byte != 0x1Bu)
870 ISO_2022_JP_output_flag =
false;
871 return result_code_point{
byte };
875 ISO_2022_JP_output_flag =
false;
881 byte =
static_cast<std::uint8_t
>(item.m_value);
884 ISO_2022_JP_decoder_state = state::Escape_start;
887 else if (
byte == 0x5Cu)
889 ISO_2022_JP_output_flag =
false;
890 return result_code_point{ 0xA5u };
892 else if (
byte == 0x7Eu)
894 ISO_2022_JP_output_flag =
false;
895 return result_code_point{ 0x203Eu };
897 else if (
byte <= 0x7Fu &&
byte != 0x0Eu &&
byte != 0x0Fu &&
byte != 0x1Bu &&
byte != 0x5Cu &&
byte != 0x7Eu)
899 ISO_2022_JP_output_flag =
false;
900 return result_code_point{
byte };
904 ISO_2022_JP_output_flag =
false;
908 case state::katakana:
910 byte =
static_cast<std::uint8_t
>(item.m_value);
913 ISO_2022_JP_decoder_state = state::Escape_start;
916 else if (0x21u <=
byte &&
byte <= 0x5Fu)
918 ISO_2022_JP_output_flag =
false;
919 return result_code_point{ 0xFF61u - 0x21u +
byte };
923 ISO_2022_JP_output_flag =
false;
927 case state::Lead_byte:
929 byte =
static_cast<std::uint8_t
>(item.m_value);
932 ISO_2022_JP_decoder_state = state::Escape_start;
935 else if (0x21u <=
byte &&
byte <= 0x7Eu)
937 ISO_2022_JP_output_flag =
false;
938 ISO_2022_JP_lead = byte;
939 ISO_2022_JP_decoder_state = state::Trail_byte;
944 ISO_2022_JP_output_flag =
false;
948 case state::Trail_byte:
951 ISO_2022_JP_decoder_state = state::Lead_byte;
957 byte =
static_cast<std::uint8_t
>(item.m_value);
960 ISO_2022_JP_decoder_state = state::Escape_start;
963 else if (0x21u <=
byte &&
byte <= 0x7Eu)
965 ISO_2022_JP_decoder_state = state::Lead_byte;
966 std::optional<std::uint16_t> pointer =
static_cast<uint16_t
>((ISO_2022_JP_lead - 0x21u) * 94 +
byte - 0x21u);
967 std::optional<std::uint32_t> cp = get_index_code_point<index_code_point_jis0208>(pointer.value());
969 return result_code_point{ cp.value() };
973 ISO_2022_JP_decoder_state = state::Lead_byte;
977 case state::Escape_start:
980 byte =
static_cast<std::uint8_t
>(item.m_value);
981 if (
byte == 0x24u ||
byte == 0x28u)
983 ISO_2022_JP_lead = byte;
984 ISO_2022_JP_decoder_state = state::Escape;
989 ISO_2022_JP_output_flag =
false;
990 ISO_2022_JP_decoder_state = ISO_2022_JP_decoder_output_state;
997 byte =
static_cast<std::uint8_t
>(item.m_value);
998 std::uint8_t lead = ISO_2022_JP_lead;
999 ISO_2022_JP_lead = 0;
1000 std::optional<state> st{};
1001 if (lead == 0x28u &&
byte == 0x42u) st = state::ASCII;
1002 else if (lead == 0x28u &&
byte == 0x4Au) st = state::Roman;
1003 else if (lead == 0x28u &&
byte == 0x49u) st = state::katakana;
1004 else if (lead == 0x24u && (
byte == 0x40u ||
byte == 0x42u)) st = state::Lead_byte;
1007 ISO_2022_JP_decoder_state = st.value();
1008 ISO_2022_JP_decoder_output_state = st.value();
1009 bool output_flag = ISO_2022_JP_output_flag;
1010 ISO_2022_JP_output_flag =
true;
1014 std::array<std::uint8_t, 2> a = { lead,
byte };
1015 in.prepend(a.begin(), a.end());
1016 ISO_2022_JP_output_flag =
false;
1017 ISO_2022_JP_decoder_state = ISO_2022_JP_decoder_output_state;
1028 state ISO_2022_JP_decoder_state;
1029 state ISO_2022_JP_decoder_output_state;
1030 uint8_t ISO_2022_JP_lead;
1031 bool ISO_2022_JP_output_flag;
1041 enum state : uint8_t
1043 ASCII, Roman, jis0208
1048 : ISO_2022_JP_encoder_state(state::ASCII)
1052 template <
typename InQueue,
typename IoItem>
1053 result_value run(InQueue& in, IoItem item)
1056 if (item.m_eoq && ISO_2022_JP_encoder_state != state::ASCII)
1061 ISO_2022_JP_encoder_state = state::ASCII;
1062 return result_bytes_3{
static_cast<std::uint8_t
>(0x1Bu),
static_cast<std::uint8_t
>(0x28u),
static_cast<std::uint8_t
>(0x42u) };
1065 if (item.m_eoq && ISO_2022_JP_encoder_state == state::ASCII)
return result_finished{};
1067 std::uint32_t cp = item.m_value;
1069 if ((ISO_2022_JP_encoder_state == state::ASCII || ISO_2022_JP_encoder_state == state::Roman)
1070 && (cp == 0xEu || cp == 0xFu || cp == 0x1Bu))
return result_error{
static_cast<std::uint32_t
>(0xFFFDu) };
1072 if (ISO_2022_JP_encoder_state == state::ASCII && is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
1074 if(ISO_2022_JP_encoder_state == state::Roman
1075 && ((is_ascii_code_point(cp) && cp != 0x5Cu && cp != 0x7Eu) || (cp == 0xA5u || cp == 0x203Eu)))
1077 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
1078 if (cp == 0xA5u)
return result_byte{
static_cast<std::uint8_t
>(0x5Cu) };
1079 if (cp == 0x203Eu)
return result_byte{
static_cast<std::uint8_t
>(0x7Eu) };
1082 if (is_ascii_code_point(cp) && ISO_2022_JP_encoder_state != state::ASCII)
1085 ISO_2022_JP_encoder_state = state::ASCII;
1086 return result_bytes_3{
static_cast<std::uint8_t
>(0x1Bu),
static_cast<std::uint8_t
>(0x28u),
static_cast<std::uint8_t
>(0x42u) };
1089 if ((cp == 0xA5u || cp == 0x203Eu) && ISO_2022_JP_encoder_state != state::Roman)
1092 ISO_2022_JP_encoder_state = state::Roman;
1093 return result_bytes_3{
static_cast<uint8_t
>(0x1Bu),
static_cast<uint8_t
>(0x28u),
static_cast<uint8_t
>(0x4Au) };
1096 if (cp == 0x2212u) cp = 0xFF0Du;
1098 if (0xFF61u <= cp && cp <= 0xFF9Fu) cp = get_index_code_point<index_code_point_iso_2022_jp_katakana>(cp - 0xFF61u).value();
1100 std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_jis0208_0, index_pointer_jis0208_1>(cp);
1102 if (!pointer.has_value())
1104 if (ISO_2022_JP_encoder_state != state::jis0208)
1107 ISO_2022_JP_encoder_state = state::ASCII;
1108 return result_bytes_3{
static_cast<std::uint8_t
>(0x1Bu),
static_cast<std::uint8_t
>(0x28u),
static_cast<std::uint8_t
>(0x42u) };
1113 if (ISO_2022_JP_encoder_state != state::jis0208)
1116 ISO_2022_JP_encoder_state = state::jis0208;
1117 return result_bytes_3{
static_cast<std::uint8_t
>(0x1Bu),
static_cast<std::uint8_t
>(0x24u),
static_cast<std::uint8_t
>(0x42u) };
1120 std::uint8_t lead =
static_cast<std::uint8_t
>(pointer.value() / 94 + 0x21u);
1122 std::uint8_t trail =
static_cast<std::uint8_t
>(pointer.value() % 94 + 0x21u);
1124 return result_bytes_2{ lead, trail };
1128 state ISO_2022_JP_encoder_state;
1150 template <
typename InQueue,
typename IoItem>
1151 result_value run(InQueue& in, IoItem item)
1154 if (item.m_eoq && Shift_JIS_lead != 0)
1162 std::uint8_t
byte = item.m_value;
1164 if (Shift_JIS_lead != 0)
1166 std::uint8_t lead = Shift_JIS_lead;
1167 std::optional<std::uint16_t> pointer{};
1170 std::uint8_t offset =
byte < 0x7Fu ? 0x40u : 0x41u;
1172 std::uint8_t lead_offset = lead < 0xA0u ? 0x81u : 0xC1u;
1174 if ((0x40u <=
byte &&
byte <= 0x7Eu) || (0x80u <=
byte &&
byte <= 0xFCu))
1175 pointer = (lead - lead_offset) * 188 +
byte - offset;
1177 if (pointer.has_value() && (8836 <= pointer.value() && pointer.value() <= 10715))
1178 return result_code_point{ 0xE000u - 8836 + pointer.value() };
1180 std::optional<uint32_t> cp{};
1181 if (pointer.has_value()) cp = get_index_code_point<index_code_point_jis0208>(pointer.value());
1183 if (cp.has_value())
return result_code_point{ cp.value() };
1185 if (is_ascii_byte(
byte)) in.prepend(
byte);
1190 if (is_ascii_byte(
byte) ||
byte == 0x80u)
return result_code_point{
byte };
1192 if (0xA1u <=
byte &&
byte <= 0xDFu)
return result_code_point{ 0xFF61u - 0xA1u +
byte };
1194 if ((0x81u <=
byte &&
byte <= 0x9Fu) || (0xE0u <=
byte &&
byte <= 0xFCu))
1196 Shift_JIS_lead = byte;
1204 uint8_t Shift_JIS_lead;
1214 template <
typename InQueue,
typename IoItem>
1215 result_value run(InQueue& in, IoItem item)
1220 std::uint32_t cp = item.m_value;
1222 if (is_ascii_code_point(cp) || cp == 0x80u)
return result_byte{
static_cast<std::uint8_t
>(cp) };
1224 if (cp == 0xA5u)
return result_byte{
static_cast<std::uint8_t
>(0x5Cu) };
1226 if (cp == 0x203Eu)
return result_byte{
static_cast<std::uint8_t
>(0x7Eu) };
1228 if (0xFF61u <= cp && cp <= 0xFF9Fu)
return result_byte{
static_cast<std::uint8_t
>(cp - 0xFF61u + 0xA1u) };
1230 if (cp == 0x2212u) cp = 0xFF0Du;
1232 std::optional<std::uint16_t> pointer = get_index_shift_jis_pointer(cp);
1236 std::uint8_t lead = pointer.value() / 188;
1238 std::uint8_t lead_offset = lead < 0x1Fu ? 0x81u : 0xC1u;
1240 std::uint8_t trail = pointer.value() % 188;
1242 std::uint8_t offset = trail < 0x3Fu ? 0x40u : 0x41u;
1244 return result_bytes_2{
static_cast<std::uint8_t
>(lead + lead_offset),
static_cast<std::uint8_t
>(trail + offset) };
1266 template <
typename InQueue,
typename IoItem>
1267 result_value run(InQueue& in, IoItem item)
1270 if (item.m_eoq && EUC_KR_lead != 0)
1278 std::uint8_t
byte = item.m_value;
1280 if (EUC_KR_lead != 0)
1282 std::uint8_t lead = EUC_KR_lead;
1283 std::optional<std::uint16_t> pointer{};
1286 if (0x41u <=
byte &&
byte <= 0xFEu) pointer = (lead - 0x81u) * 190 + (
byte - 0x41u);
1288 std::optional<uint32_t> cp{};
1289 if (pointer.has_value()) cp = get_index_code_point<index_code_point_euc_kr>(pointer.value());
1291 if (cp.has_value())
return result_code_point{ cp.value() };
1293 if (is_ascii_byte(
byte)) in.prepend(
byte);
1298 if (is_ascii_byte(
byte))
return result_code_point{
byte };
1300 if (0x81u <=
byte &&
byte <= 0xFEu)
1310 std::uint8_t EUC_KR_lead;
1320 template <
typename InQueue,
typename IoItem>
1321 result_value run(InQueue& in, IoItem item)
1326 std::uint32_t cp = item.m_value;
1328 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
1330 std::optional<std::uint16_t> pointer = get_index_pointer<index_pointer_euc_kr_0, index_pointer_euc_kr_1>(cp);
1334 std::uint8_t lead = pointer.value() / 190 + 0x81u;
1336 std::uint8_t trail = pointer.value() % 190 + 0x41u;
1338 return result_bytes_2{ lead, trail };
1356 : replacement_error_returned_flag(
false)
1360 template <
typename InQueue,
typename IoItem>
1361 result_value run(InQueue& in, IoItem item)
1366 if (replacement_error_returned_flag ==
false)
1368 replacement_error_returned_flag =
true;
1376 bool replacement_error_returned_flag;
1383 template<
bool UTF_16BE_decoder_flag = false>
1387 template <
typename InQueue,
typename IoItem>
1388 result_value run(InQueue& in, IoItem item)
1391 if (item.m_eoq && (UTF_16_lead_byte.has_value() || UTF_16_lead_surrogate.has_value()))
1393 UTF_16_lead_byte.reset();
1394 UTF_16_lead_surrogate.reset();
1398 if (item.m_eoq && !UTF_16_lead_byte.has_value() && !UTF_16_lead_surrogate.has_value())
1403 std::uint8_t
byte = item.m_value;
1405 if (!UTF_16_lead_byte.has_value())
1407 UTF_16_lead_byte = byte;
1411 std::uint16_t code_unit = 0;
1412 if constexpr (UTF_16BE_decoder_flag) code_unit = (UTF_16_lead_byte.value() << 8) +
byte;
1413 else code_unit = (
byte << 8) + UTF_16_lead_byte.value();
1414 UTF_16_lead_byte.reset();
1416 if (UTF_16_lead_surrogate.has_value())
1418 std::uint16_t lead_surrogate = UTF_16_lead_surrogate.value();
1419 UTF_16_lead_surrogate.reset();
1421 if (0xDC00u <= code_unit && code_unit <= 0xDFFFu)
1422 return result_code_point{ 0x10000u + ((lead_surrogate - 0xD800u) << 10) + (code_unit - 0xDC00u) };
1424 std::uint8_t byte1 = code_unit > 8;
1426 std::uint8_t byte2 = code_unit & 0x00FF;
1428 std::array<std::uint8_t, 2> bytes{};
1429 if constexpr (UTF_16BE_decoder_flag)
1440 in.prepend(bytes.begin(), bytes.end());
1444 if (0xD800u <= code_unit && code_unit <= 0xDBFFu)
1446 UTF_16_lead_surrogate = code_unit;
1450 if (0xDC00u <= code_unit && code_unit <= 0xDFFFu)
return result_error{};
1452 return result_code_point{ code_unit };
1456 std::optional<std::uint8_t> UTF_16_lead_byte;
1457 std::optional<std::uint16_t> UTF_16_lead_surrogate;
1470 template <
typename InQueue,
typename IoItem>
1471 result_value run(InQueue& in, IoItem item)
1476 std::uint8_t
byte = item.m_value;
1478 if (is_ascii_byte(
byte))
return result_code_point{
byte };
1480 return result_code_point{ 0xF780u +
byte - 0x80u };
1491 template <
typename InQueue,
typename IoItem>
1492 result_value run(InQueue& in, IoItem item)
1497 std::uint32_t cp = item.m_value;
1499 if (is_ascii_code_point(cp))
return result_byte{
static_cast<std::uint8_t
>(cp) };
1501 if (0xF780u <= cp && cp <= 0xF7FFu)
return result_byte{
static_cast<std::uint8_t
>(cp - 0xF780u + 0x80u) };
1510 template <
typename InQueue,
typename IoItem>
1511 result_value run(InQueue& in, IoItem item) {
return result_error{}; }
1517 template <
typename InQueue,
typename IoItem>
1518 result_value run(InQueue& in, IoItem item) {
return result_error{}; }
1521 using coder = std::variant<