libwordring
host.hpp
1 #pragma once
2 
3 // ------------------------------------------------------------------------------------------------
4 // 3. Hosts (domains and IP addresses)
5 //
6 // https://url.spec.whatwg.org/#hosts-(domains-and-ip-addresses)
7 // https://triple-underscore.github.io/URL-ja.html#hosts-(domains-and-ip-addresses)
8 // ------------------------------------------------------------------------------------------------
9 
10 #include <wordring/whatwg/url/infra.hpp>
11 #include <wordring/whatwg/url/url_defs.hpp>
12 
13 #include <wordring/whatwg/encoding/encoding.hpp>
14 #include <wordring/whatwg/infra/infra.hpp>
15 #include <wordring/whatwg/infra/unicode.hpp>
16 
17 #include <array>
18 #include <cassert>
19 #include <cstdint>
20 #include <memory>
21 #include <optional>
22 #include <string>
23 #include <variant>
24 
25 namespace wordring::whatwg
26 {
27 
28  // --------------------------------------------------------------------------------------------
29  // 3.1. Host representation
30  //
31  // https://url.spec.whatwg.org/#host-representation
32  // --------------------------------------------------------------------------------------------
33 
34  enum class host_type_name : std::uint32_t
35  {
36  Domain = 1,
37  Ipv4Address,
38  Ipv6Address,
39  OpaqueHost,
40  EmptyHost
41  };
42 
47  struct domain_base
48  {
49  domain_base(std::u32string const& val) : m_value(val) {}
50 
51  std::u32string m_value;
52  };
53 
58  struct ipv4_address_base { std::uint32_t m_value; };
59 
64  struct ipv6_address_base { std::array<std::uint16_t, 8> m_value; };
65 
71  {
72  std::u32string m_value;
73  };
74 
79  struct empty_host_base { std::nullptr_t m_value = nullptr; };
80 
85  class host_base
86  {
87  public:
88  using value_type = std::variant<
94 
95  public:
96  template <typename Value>
97  host_base(Value const& val) : m_value(val) {}
98 
99  template <typename Value>
100  host_base(Value&& val) : m_value(std::move(val)) {}
101 
102  host_type_name type()
103  {
104  switch (m_value.index())
105  {
106  case 0: return host_type_name::Domain;
107  case 1: return host_type_name::Ipv4Address;
108  case 2: return host_type_name::Ipv6Address;
109  case 3: return host_type_name::OpaqueHost;
110  case 4: return host_type_name::EmptyHost;
111  default:
112  break;
113  }
114  return static_cast<host_type_name>(0);
115  }
116 
117  protected:
118  value_type m_value;
119  };
120 
121  // --------------------------------------------------------------------------------------------
122  // 3.2. Host miscellaneous
123  //
124  // https://url.spec.whatwg.org/#host-miscellaneous
125  // --------------------------------------------------------------------------------------------
126 
131  inline bool is_forbidden_host_code_point(std::uint32_t cp)
132  {
133  switch (cp)
134  {
135  case U'\x00':
136  case U'\x09':
137  case U'\x0A':
138  case U'\x0D':
139  case U'\x20':
140  case U'\x23':
141  case U'\x25':
142  case U'\x2F':
143  case U'\x3A':
144  case U'\x3F':
145  case U'\x40':
146  case U'\x5B':
147  case U'\x5C':
148  case U'\x5D':
149  return true;
150  }
151  return false;
152  }
153 
154  // --------------------------------------------------------------------------------------------
155  // 3.3. IDNA
156  //
157  // https://url.spec.whatwg.org/#idna
158  // --------------------------------------------------------------------------------------------
159 
189  std::u32string domain_to_ascii(std::u32string const& s, bool beStrict, std::error_code& ec);
190 
191  inline std::u32string domain_to_ascii(std::u32string const& s, std::error_code& ec)
192  {
193  return domain_to_ascii(s, false, ec);
194  }
195 
205  std::u32string domain_to_unicode(std::u32string const& s, std::error_code& ec);
206 
207 
208  // --------------------------------------------------------------------------------------------
209  // 3.4. Host writing
210  //
211  // https://url.spec.whatwg.org/#host-writing
212  // --------------------------------------------------------------------------------------------
213 
214  bool is_valid_domain_string(std::u32string const& s);
215  bool is_valid_ipv4_address_string(std::u32string const& s);
216  bool is_valid_ipv6_address_string(std::u32string const& s);
217 
222  inline bool is_valid_host_string(std::u32string const& s)
223  {
224  if (is_valid_domain_string(s) || is_valid_ipv4_address_string(s)) return true;
225  if (s.size() < 3 || s.front() != U'[' || s.back() != U']') return false;
226  return is_valid_ipv6_address_string(std::u32string(++s.begin(), --s.end()));
227  }
228 
233  bool is_valid_domain_string(std::u32string const& s);
234 
239  bool is_valid_ipv4_address_string(std::u32string const& s);
240 
245  inline bool is_valid_ipv6_address_string(std::u32string const& s)
246  {
247  return true;
248  }
249 
254  inline bool is_valid_opaque_host_string(std::u32string const& s)
255  {
256  return true;
257  }
258 
259  // --------------------------------------------------------------------------------------------
260  // 3.5. Host parsing
261  //
262  // https://url.spec.whatwg.org/#host-parsing
263  // --------------------------------------------------------------------------------------------
264 
265  std::optional<host_base> parse_ipv4(std::u32string const& in, std::error_code& ec);
266  std::optional<host_base> parse_ipv6(std::u32string const& in, std::error_code& ec);
267  std::optional<host_base> parse_opaque_host(std::u32string const& in, std::error_code& ec);
268 
269  std::pair<std::optional<std::uint32_t>, bool> parse_ipv4_number(std::u32string s);
270 
278  inline std::optional<host_base> parse_host(
279  std::u32string const& in, bool isNotSpecial, std::error_code& ec)
280  {
281  // 1.
282  if (!in.empty() && in.front() == U'[')
283  {
284  if (in.back() != U']')
285  {
287  return std::optional<host_base>();
288  }
289  return parse_ipv6(std::u32string(++in.begin(), --in.end()), ec);
290  }
291  // 2.
292  if (isNotSpecial) return parse_opaque_host(in, ec);
293  // 3.
294  assert(!in.empty());
295  // 4.
296  std::string tmp;
297  string_percent_decode(in.begin(), in.end(), std::back_inserter(tmp));
298  encoding::io_queue<char> in_q = encoding::to_io_queue_convert(tmp.begin(), tmp.end());
301  std::u32string domain;
302  encoding::from_io_queue_convert(out_q, std::back_inserter(domain));
303  // 5.
304  std::u32string asciiDomain = domain_to_ascii(domain, ec);
305  // 6.
306  if (ec) return std::optional<host_base>();
307  // 7.
308  if (std::find_if(asciiDomain.begin(), asciiDomain.end(), is_forbidden_host_code_point) != asciiDomain.end())
309  {
311  return std::optional<host_base>();
312  }
313  // 8.
314  std::optional<host_base> ipv4 = parse_ipv4(asciiDomain, ec);
315  // 9.
316  if(ec) return std::optional<host_base>();
317  // 10.
318  return std::optional<host_base>();
319  }
320 
321  inline std::optional<host_base> parse_host(std::u32string const& in, std::error_code& ec)
322  {
323  return parse_host(in, false, ec);
324  }
325 
330  inline std::optional<host_base> parse_ipv4(std::u32string const& in, std::error_code& ec)
331  {
332  // 1.
333  bool validationError = false;
334  // 2.
335  std::vector<std::u32string> parts;
336  strictly_split_on_particular_delimiter(in.begin(), in.end(), std::back_inserter(parts), U'.');
337  // 3.
338  if (!parts.empty() && parts.back().empty())
339  {
340  validationError = true;
341  if (1 < parts.size()) parts.erase(--parts.end());
342  }
343  // 4.
344  if (4 < parts.size()) return opaque_host_base{ in };
345 
346  return std::optional<host_base>();
347  }
348 
349  inline std::pair<std::optional<std::uint32_t>, bool> parse_ipv4_number(std::u32string s)
350  {
351  // 1.
352  bool validationError = false;
353  // 2.
354  std::uint32_t R = 10;
355  // 3.
356  if (2 <= s.size() && s.front() == U'0' && (s.at(1) == U'x' || s.at(1) == U'X'))
357  {
358  validationError = true;
359  s.erase(0, 2);
360  R = 16;
361  }
362  // 4.
363  else if (2 <= s.size() && s.front() == U'0')
364  {
365  validationError = true;
366  s.erase(0, 1);
367  R = 8;
368  }
369  // 5.
370  if (s.empty()) return std::make_pair(std::optional<std::uint32_t>(), validationError);
371  // 6.
372  std::u32string_view sv = R == 8 ? U"012345678" : (R == 10 ? U"0123456789" : U"0123456789abcdefABCDEF");
373  for (char32_t cp : s) if (std::find(sv.begin(), sv.end(), cp) == sv.end())
374  {
375  return std::make_pair(std::optional<std::uint32_t>(), validationError);
376  }
377  // 7.
378  std::string tmp;
379  std::copy(s.begin(), s.end(), std::back_inserter(tmp));
380  std::uint32_t i;
381  std::from_chars(tmp.data(), tmp.data() + tmp.length(), i, R);
382  // 8.
383  return std::make_pair(std::optional<std::uint32_t>(i), validationError);
384  }
385 
390  inline std::optional<host_base> parse_ipv6(std::u32string const& in, std::error_code& ec)
391  {
392  return std::optional<host_base>();
393  }
394 
400  inline std::optional<host_base> parse_opaque_host(std::u32string const& in, std::error_code& ec)
401  {
402  return std::optional<host_base>();
403  }
404 
405 }
wordring::whatwg::parse_opaque_host
std::optional< host_base > parse_opaque_host(std::u32string const &in, std::error_code &ec)
IPv6 parser
Definition: host.hpp:400
wordring::whatwg::opaque_host_base
opaque host
Definition: host.hpp:70
wordring::whatwg::domain_base
domain
Definition: host.hpp:47
wordring::whatwg::parse_ipv6
std::optional< host_base > parse_ipv6(std::u32string const &in, std::error_code &ec)
IPv6 parser
Definition: host.hpp:390
wordring::whatwg::url_error_name::ParseFailed
@ ParseFailed
wordring::whatwg::is_forbidden_host_code_point
bool is_forbidden_host_code_point(std::uint32_t cp)
forbidden host code point
Definition: host.hpp:131
wordring::whatwg
wordring::whatwg::ipv4_address_base
IPv4 address
Definition: host.hpp:58
wordring::whatwg::encoding::utf8_decode_without_bom
result_value utf8_decode_without_bom(InQueue &in, OutQueue &out)
BOM に関知せず UTF8 をデコードする
Definition: whatwg/encoding/encoding.hpp:440
wordring::whatwg::ipv6_address_base
IPv6 address
Definition: host.hpp:64
wordring::whatwg::is_valid_opaque_host_string
bool is_valid_opaque_host_string(std::u32string const &s)
妥当な不透明ホスト文字列
Definition: host.hpp:254
wordring::whatwg::strictly_split_on_particular_delimiter
void strictly_split_on_particular_delimiter(InputIterator first, InputIterator last, OutputIterator output, char32_t delimiter)
特定の区切り文字で厳密に分割する
Definition: infra/infra.hpp:472
wordring::whatwg::is_valid_ipv4_address_string
bool is_valid_ipv4_address_string(std::u32string const &s)
妥当な IPv4 アドレス文字列
wordring::whatwg::is_valid_ipv6_address_string
bool is_valid_ipv6_address_string(std::u32string const &s)
妥当な IPv6 アドレス文字列
Definition: host.hpp:245
wordring::whatwg::encoding::from_io_queue_convert
void from_io_queue_convert(io_queue< T > q, OutputIterator out)
io_queue を文字列へ変換する
Definition: terminology.hpp:223
wordring::whatwg::string_percent_decode
void string_percent_decode(InputIterator first, InputIterator last, OutputIterator output)
Definition: url/infra.hpp:120
wordring::whatwg::encoding::to_io_queue_convert
auto to_io_queue_convert(InputIterator first, InputIterator last)
文字列を io_queue へ変換する
Definition: terminology.hpp:238
wordring::whatwg::domain_to_ascii
std::u32string domain_to_ascii(std::u32string const &s, bool beStrict, std::error_code &ec)
国際化ドメイン名を ASCII ドメイン名へ変換する
wordring::whatwg::host_base
host
Definition: host.hpp:85
wordring::whatwg::encoding::io_queue< char >
wordring::whatwg::parse_host
std::optional< host_base > parse_host(std::u32string const &in, bool isNotSpecial, std::error_code &ec)
host parser
Definition: host.hpp:278
wordring::whatwg::is_valid_host_string
bool is_valid_host_string(std::u32string const &s)
妥当なホスト文字列
Definition: host.hpp:222
wordring::whatwg::domain_to_unicode
std::u32string domain_to_unicode(std::u32string const &s, std::error_code &ec)
ASCII ドメイン名を国際化ドメイン名へ変換する
wordring::whatwg::empty_host_base
empty host
Definition: host.hpp:79
wordring::whatwg::is_valid_domain_string
bool is_valid_domain_string(std::u32string const &s)
妥当なドメイン文字列
wordring::whatwg::parse_ipv4
std::optional< host_base > parse_ipv4(std::u32string const &in, std::error_code &ec)
IPv4 parser
Definition: host.hpp:330