libwordring
whatwg/html/parsing/tokenization.hpp
1 #pragma once
2 
3 // https://html.spec.whatwg.org/multipage/parsing.html
4 // https://triple-underscore.github.io/HTML-parsing-ja.html
5 
6 #include <wordring/whatwg/html/parsing/atom_tbl.hpp>
7 #include <wordring/whatwg/html/parsing/input_stream.hpp>
8 #include <wordring/whatwg/html/parsing/parser_defs.hpp>
9 #include <wordring/whatwg/html/parsing/token.hpp>
10 
11 #include <wordring/whatwg/infra/infra.hpp>
12 
13 #include <cassert>
14 #include <string>
15 #include <type_traits>
16 
18 {
19  /*
20  @par コールバック
21 
22  派生クラスは以下のメンバを持たなければならない。
23 
24  - template <typename Token> void on_emit_token(Token& token)
25  - stack_entry& adjusted_current_node()
26  - std::deque<stack_entry> m_stack
27  */
28  template <typename T, typename NodeTraits>
29  class tokenizer : public input_stream<T>
30  {
31  friend input_stream<T>;
32 
33  protected:
34  using base_type = input_stream<T>;
35  using this_type = T;
36 
37  using traits = NodeTraits;
38 
39  using state_type = void(tokenizer::*)();
40 
42  using base_type::fill;
45  using base_type::begin;
46  using base_type::end;
49  using base_type::consume;
50  using base_type::match;
51 
53  using base_type::eof;
54 
55  public:
56  state_type m_state;
57  state_type m_return_state;
58  std::u32string m_temporary_buffer;
59 
60  // トークン -----------------------------------------------------------
61 
62  DOCTYPE_token m_DOCTYPE_token; // 1
63  start_tag_token m_start_tag_token; // 2
64  end_tag_token m_end_tag_token; // 3
65  comment_token m_comment_token; // 4
66  character_token m_character_token; // 5
67  end_of_file_token m_end_of_file_token; // 6
68 
73  std::uint32_t m_current_tag_token_id;
74 
75  std::u32string m_last_start_tag_name;
76 
77  char32_t m_character_reference_code;
78 
79  //
80 
81  public:
82  tokenizer()
83  : m_state(data_state)
84  , m_return_state(nullptr)
86  , m_character_reference_code(0)
87  {
88  }
89 
92  void clear()
93  {
95 
96  m_state = data_state;
97  m_return_state = nullptr;
98  m_temporary_buffer.clear();
99 
100  m_DOCTYPE_token = DOCTYPE_token();
101  m_start_tag_token = start_tag_token();
102  m_end_tag_token = end_tag_token();
103  m_comment_token = comment_token();
104  m_character_token = character_token();
105  m_end_of_file_token = end_of_file_token();
106 
108 
109  m_last_start_tag_name.clear();
110 
111  m_character_reference_code = 0;
112  }
113 
114  // トークン -----------------------------------------------------------
115 
116  tag_token& create_start_tag_token()
117  {
119  m_start_tag_token.clear();
120  return m_start_tag_token;
121  }
122 
123  tag_token& create_end_tag_token()
124  {
126  m_end_tag_token.clear();
127  return m_start_tag_token;
128  }
129 
130  void create_comment_token(char32_t const* data = U"")
131  {
132  m_comment_token.m_data = data;
133  }
134 
135  DOCTYPE_token& create_DOCTYPE_token()
136  {
137  m_DOCTYPE_token.clear();
138  return m_DOCTYPE_token;
139  }
140 
144  {
145  if (m_current_tag_token_id == 2) return m_start_tag_token;
146  if (m_current_tag_token_id == 3) return m_end_tag_token;
147 
148  assert(false);
149  return m_start_tag_token;
150  }
151 
152  bool is_appropriate_end_tag_token(tag_token const& token)
153  {
154  assert(m_current_tag_token_id == 3);
155 
156  if (m_last_start_tag_name.empty() || (m_last_start_tag_name != token.m_tag_name)) return false;
157  return true;
158  }
159 
163  {
164  return current_tag_token().m_attributes.create();
165  }
166 
170  {
171  return current_tag_token().m_attributes.current();
172  }
173 
174  comment_token& current_comment_token()
175  {
176  return m_comment_token;
177  }
178 
179  DOCTYPE_token& current_DOCTYPE_token()
180  {
181  return m_DOCTYPE_token;
182  }
183 
184 
185  // ----------------------------------------------------------------------------------------
186  // スタック
187  //
188  // 12.2.4.2 The stack of open elements
189  // https://html.spec.whatwg.org/multipage/parsing.html#the-stack-of-open-elements
190  // ----------------------------------------------------------------------------------------
191 
194  bool in_html_namespace() const
195  {
196  this_type const* P = static_cast<this_type const*>(this);
197  return traits::get_namespace_name(P->adjusted_current_node().m_it) == ns_name::HTML;
198  }
199 
205  {
206  token_attribute_list& al = current_tag_token().m_attributes;
207  auto it1 = al.begin();
208  auto it2 = std::prev(al.end(), 1);
209  while (it1 != it2) if (it1++->m_name == al.current().m_name) al.current().m_omitted = true;
210  }
211 
212  // トークンの発送-------------------------------------------------------
213 
214  template <typename Token>
215  void emit_token(Token& token)
216  {
217  this_type* P = static_cast<this_type*>(this);
218 
219  if constexpr (std::is_base_of_v<tag_token, Token>)
220  {
221  assert(m_current_tag_token_id == 2 || m_current_tag_token_id == 3);
222 
223  tag_token& t = static_cast<tag_token&>(token);
224  auto tag_it = tag_atom_tbl.find(t.m_tag_name);
225  t.m_tag_name_id = (tag_it == tag_atom_tbl.end()) ? static_cast<tag_name>(0) : tag_it->second;
226 
227  if (m_current_tag_token_id == 2)
228  {
229  m_last_start_tag_name = m_start_tag_token.m_tag_name;
230  P->on_emit_token(m_start_tag_token);
231  }
232  else if (m_current_tag_token_id == 3) P->on_emit_token(m_end_tag_token);
233  }
234  else P->on_emit_token(token);
235 
236  // トークンが木構築段階で処理された後のチェック
237  if constexpr (std::is_same_v<Token, start_tag_token>)
238  {
239  if (token.m_self_closing_flag && !token.m_acknowledged_self_closing_flag)
240  {
241  report_error(error_name::non_void_html_element_start_tag_with_trailing_solidus);
242  }
243  }
244  }
245 
246  void emit_token(char32_t cp)
247  {
248  this_type* P = static_cast<this_type*>(this);
249 
250  m_character_token.m_data = cp;
251  P->on_emit_token(m_character_token);
252  }
253 
254  void emit_token(end_of_file_token)
255  {
256  this_type* P = static_cast<this_type*>(this);
257  P->on_emit_token(m_end_of_file_token);
258  }
259 
260  // 状態の変更 ----------------------------------------------------------
261 
262  void change_state(state_type st) { m_state = st; }
263 
264  void return_state(state_type st) { m_return_state = st; }
265 
266  state_type return_state() const { return m_return_state; }
267 
268  // 文字の再消費 --------------------------------------------------------
269 
270  void reconsume(state_type st)
271  {
272  change_state(st);
274  on_emit_code_point();
275  }
276 
277  //
278  bool consumed_as_part_of_attribute()
279  {
280  if (m_return_state == attribute_value_double_quoted_state
281  || m_return_state == attribute_value_single_quoted_state
282  || m_return_state == attribute_value_unquoted_state) return true;
283 
284  return false;
285  }
286 
287  void flush_code_points_consumed_as_character_reference()
288  {
289  if (consumed_as_part_of_attribute()) for (char32_t cp : m_temporary_buffer) current_attribute().m_value.push_back(cp);
290  else for (char32_t cp : m_temporary_buffer) emit_token(cp);
291  }
292 
293  // コールバック --------------------------------------------------------
294 
295  void on_emit_code_point()
296  {
297  (this->*m_state)();
298  }
299 
300  // 状態関数 -----------------------------------------------------------
301 
304  {
305  char32_t cp = consume();
306 
307  if (eof())
308  {
309  emit_token(end_of_file_token());
310  return;
311  }
312 
313  switch (cp)
314  {
315  case U'&':
316  return_state(data_state);
317  change_state(character_reference_state);
318  break;
319  case U'<':
320  change_state(tag_open_state);
321  break;
322  case U'\x0':
323  report_error(error_name::unexpected_null_character);
324  emit_token(cp);
325  break;
326  default:
327  emit_token(cp);
328  break;
329  }
330  }
331 
334  {
335  char32_t cp = consume();
336 
337  if (eof())
338  {
339  emit_token(end_of_file_token());
340  return;
341  }
342 
343  switch (cp)
344  {
345  case U'&':
346  return_state(RCDATA_state);
347  change_state(character_reference_state);
348  break;
349  case U'<':
350  change_state(RCDATA_less_than_sign_state);
351  break;
352  case U'\x0':
353  report_error(error_name::unexpected_null_character);
354  emit_token(U'\xFFFD');
355  break;
356  default:
357  emit_token(cp);
358  break;
359  }
360  }
361 
364  {
365  char32_t cp = consume();
366 
367  if (eof())
368  {
369  emit_token(end_of_file_token());
370  return;
371  }
372 
373  switch (cp)
374  {
375  case U'<':
376  change_state(RAWTEXT_less_than_sign_state);
377  break;
378  case U'\x0':
379  report_error(error_name::unexpected_null_character);
380  emit_token(U'\xFFFD');
381  break;
382  default:
383  emit_token(cp);
384  break;
385  }
386  }
387 
390  {
391  char32_t cp = consume();
392 
393  if (eof())
394  {
395  emit_token(end_of_file_token());
396  return;
397  }
398 
399  switch (cp)
400  {
401  case U'<':
402  change_state(script_data_less_than_sign_state);
403  break;
404  case U'\x0':
405  report_error(error_name::unexpected_null_character);
406  emit_token(U'\xFFFD');
407  break;
408  default:
409  emit_token(cp);
410  break;
411  }
412  }
413 
416  {
417  char32_t cp = consume();
418 
419  if (eof())
420  {
421  emit_token(end_of_file_token());
422  return;
423  }
424 
425  if (cp == U'\x0')
426  {
427  report_error(error_name::unexpected_null_character);
428  emit_token(U'\xFFFD');
429  return;
430  }
431 
432  emit_token(cp);
433  }
434 
437  {
438  char32_t cp = consume();
439 
440  if (eof())
441  {
442  report_error(error_name::eof_before_tag_name);
443  emit_token(U'<');
444  emit_token(end_of_file_token());
445  return;
446  }
447 
448  switch (cp)
449  {
450  case U'!':
451  change_state(markup_declaration_open_state);
452  return;
453  case U'/':
454  change_state(end_tag_open_state);
455  return;
456  case U'?':
457  report_error(error_name::unexpected_question_mark_instead_of_tag_name);
458  create_comment_token();
459  reconsume(bogus_comment_state);
460  return;
461  }
462 
463  if (is_ascii_alpha(cp))
464  {
465  create_start_tag_token();
466  reconsume(tag_name_state);
467  return;
468  }
469 
470  report_error(error_name::invalid_first_character_of_tag_name);
471  emit_token(U'<');
472  reconsume(data_state);
473  }
474 
477  {
478  char32_t cp = consume();
479 
480  if (eof())
481  {
482  report_error(error_name::eof_before_tag_name);
483  emit_token(U'<');
484  emit_token(U'/');
485  emit_token(end_of_file_token());
486  return;
487  }
488 
489  if (is_ascii_alpha(cp))
490  {
491  create_end_tag_token();
492  reconsume(tag_name_state);
493  return;
494  }
495 
496  if (cp == U'>')
497  {
498  report_error(error_name::missing_end_tag_name);
499  change_state(data_state);
500  return;
501  }
502 
503  report_error(error_name::invalid_first_character_of_tag_name);
504  create_comment_token();
505  reconsume(bogus_comment_state);
506  }
507 
510  {
511  char32_t cp = consume();
512 
513  if (eof())
514  {
515  report_error(error_name::eof_in_tag);
516  emit_token(end_of_file_token());
517  return;
518  }
519 
520  switch (cp)
521  {
522  case U'\x9': // TAB
523  case U'\xA': // LF
524  case U'\xC': // FF
525  case U'\x20': // SPACE
526  change_state(before_attribute_name_state);
527  return;
528  case U'/':
529  change_state(self_closing_start_tag_state);
530  return;
531  case U'>':
532  change_state(data_state);
533  emit_token(current_tag_token());
534  return;
535  case U'\x0':
536  report_error(error_name::unexpected_null_character);
537  current_tag_token().m_tag_name.push_back(U'\xFFFD');
538  return;
539  }
540 
541  if (is_ascii_upper_alpha(cp))
542  {
543  current_tag_token().m_tag_name.push_back(cp + 0x20);
544  return;
545  }
546 
547  current_tag_token().m_tag_name.push_back(cp);
548  }
549 
552  {
553  char32_t cp = consume();
554 
555  if (!eof() && cp == U'/')
556  {
557  m_temporary_buffer.clear();
558  change_state(RCDATA_end_tag_open_state);
559  return;
560  }
561 
562  emit_token(U'<');
563  reconsume(RCDATA_state);
564  }
565 
568  {
569  char32_t cp = consume();
570 
571  if (!eof() && is_ascii_alpha(cp))
572  {
573  create_end_tag_token();
574  reconsume(RCDATA_end_tag_name_state);
575  return;
576  }
577 
578  emit_token(U'<');
579  emit_token(U'/');
580  reconsume(RCDATA_state);
581  }
582 
585  {
586  char32_t cp = consume();
587 
588  if (!eof())
589  {
590  switch (cp)
591  {
592  case U'\x9': // TAB
593  case U'\xA': // LF
594  case U'\xC': // FF
595  case U'\x20': // SPACE
596  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
597  change_state(before_attribute_name_state);
598  return;
599  case U'/':
600  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
601  change_state(self_closing_start_tag_state);
602  return;
603  case U'>':
604  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
605  change_state(data_state);
606  emit_token(current_tag_token());
607  return;
608  }
609 
610  if (is_ascii_upper_alpha(cp))
611  {
612  current_tag_token().m_tag_name.push_back(cp + 0x20);
613  m_temporary_buffer.push_back(cp);
614  return;
615  }
616 
617  if (is_ascii_lower_alpha(cp))
618  {
619  current_tag_token().m_tag_name.push_back(cp);
620  m_temporary_buffer.push_back(cp);
621  return;
622  }
623  }
624 
625  AnythingElse:
626  emit_token(U'<');
627  emit_token(U'/');
628  for (char32_t c : m_temporary_buffer) emit_token(c);
629  reconsume(RCDATA_state);
630  }
631 
634  {
635  char32_t cp = consume();
636 
637  if (!eof() && cp == U'/')
638  {
639  m_temporary_buffer.clear();
640  change_state(RAWTEXT_end_tag_open_state);
641  return;
642  }
643 
644  emit_token(U'<');
645  reconsume(RAWTEXT_state);
646  }
647 
650  {
651  char32_t cp = consume();
652 
653  if (!eof() && is_ascii_alpha(cp))
654  {
655  create_end_tag_token();
656  reconsume(RAWTEXT_end_tag_name_state);
657  return;
658  }
659 
660  emit_token(U'<');
661  emit_token(U'/');
662  reconsume(RAWTEXT_state);
663  }
664 
667  {
668  char32_t cp = consume();
669 
670  if (!eof())
671  {
672  switch (cp)
673  {
674  case U'\x9': // TAB
675  case U'\xA': // LF
676  case U'\xC': // FF
677  case U'\x20': // SPACE
678  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
679  change_state(before_attribute_name_state);
680  return;
681  case U'/':
682  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
683  change_state(self_closing_start_tag_state);
684  return;
685  case U'>':
686  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
687  change_state(data_state);
688  emit_token(current_tag_token());
689  return;
690  }
691 
692  if (is_ascii_upper_alpha(cp))
693  {
694  current_tag_token().m_tag_name.push_back(cp + 0x20);
695  m_temporary_buffer.push_back(cp);
696  return;
697  }
698 
699  if (is_ascii_lower_alpha(cp))
700  {
701  current_tag_token().m_tag_name.push_back(cp);
702  m_temporary_buffer.push_back(cp);
703  return;
704  }
705  }
706 
707  AnythingElse:
708  emit_token(U'<');
709  emit_token(U'/');
710  for (char32_t c : m_temporary_buffer) emit_token(c);
711  reconsume(RAWTEXT_state);
712  }
713 
716  {
717  char32_t cp = consume();
718 
719  if (!eof())
720  {
721  switch (cp)
722  {
723  case U'/':
724  m_temporary_buffer.clear();
725  change_state(script_data_end_tag_open_state);
726  return;
727  case U'!':
728  change_state(script_data_escape_start_state);
729  emit_token(U'<');
730  emit_token(U'!');
731  return;
732  }
733  }
734 
735  emit_token(U'<');
736  reconsume(script_data_state);
737  }
738 
741  {
742  char32_t cp = consume();
743 
744  if (!eof())
745  {
746  if (is_ascii_alpha(cp))
747  {
748  create_end_tag_token();
749  reconsume(script_data_end_tag_name_state);
750  return;
751  }
752  }
753 
754  emit_token(U'<');
755  emit_token(U'/');
756  reconsume(script_data_state);
757  }
758 
761  {
762  char32_t cp = consume();
763 
764  if (!eof())
765  {
766  switch (cp)
767  {
768  case U'\x9': // TAB
769  case U'\xA': // LF
770  case U'\xC': // FF
771  case U'\x20': // SPACE
772  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
773  change_state(before_attribute_name_state);
774  return;
775  case U'/':
776  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
777  change_state(self_closing_start_tag_state);
778  return;
779  case U'>':
780  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
781  change_state(data_state);
782  emit_token(current_tag_token());
783  return;
784  }
785 
786  if (is_ascii_upper_alpha(cp))
787  {
788  current_tag_token().m_tag_name.push_back(cp + 0x20);
789  m_temporary_buffer.push_back(cp);
790  return;
791  }
792 
793  if (is_ascii_lower_alpha(cp))
794  {
795  current_tag_token().m_tag_name.push_back(cp);
796  m_temporary_buffer.push_back(cp);
797  return;
798  }
799  }
800 
801  AnythingElse:
802  emit_token(U'<');
803  emit_token(U'/');
804  for (char32_t c : m_temporary_buffer) emit_token(c);
805  reconsume(script_data_state);
806  }
807 
810  {
811  char32_t cp = consume();
812 
813  if (!eof() && cp == U'-')
814  {
815  change_state(script_data_escape_start_dash_state);
816  emit_token(U'-');
817  return;
818  }
819 
820  reconsume(script_data_state);
821  }
822 
825  {
826  char32_t cp = consume();
827 
828  if (!eof() && cp == U'-')
829  {
830  change_state(script_data_escaped_dash_dash_state);
831  emit_token(U'-');
832  return;
833  }
834 
835  reconsume(script_data_state);
836  }
837 
840  {
841  char32_t cp = consume();
842 
843  if (eof())
844  {
845  report_error(error_name::eof_in_script_html_comment_like_text);
846  emit_token(end_of_file_token());
847  return;
848  }
849 
850  switch (cp)
851  {
852  case U'-':
853  change_state(script_data_escaped_dash_state);
854  emit_token(U'-');
855  break;
856  case U'<':
857  change_state(script_data_escaped_less_than_sign_state);
858  break;
859  case U'\x0':
860  report_error(error_name::unexpected_null_character);
861  emit_token(U'\xFFFD');
862  break;
863  default:
864  emit_token(cp);
865  break;
866  }
867  }
868 
871  {
872  char32_t cp = consume();
873 
874  if (eof())
875  {
876  report_error(error_name::eof_in_script_html_comment_like_text);
877  emit_token(end_of_file_token());
878  return;
879  }
880 
881  switch (cp)
882  {
883  case U'-':
884  change_state(script_data_escaped_dash_dash_state);
885  emit_token(U'-');
886  break;
887  case U'<':
888  change_state(script_data_escaped_less_than_sign_state);
889  break;
890  case U'\x0':
891  report_error(error_name::unexpected_null_character);
892  change_state(script_data_escaped_state);
893  emit_token(U'\xFFFD');
894  break;
895  default:
896  change_state(script_data_escaped_state);
897  emit_token(cp);
898  break;
899  }
900  }
901 
904  {
905  char32_t cp = consume();
906 
907  if (eof())
908  {
909  report_error(error_name::eof_in_script_html_comment_like_text);
910  emit_token(end_of_file_token());
911  return;
912  }
913 
914  switch (cp)
915  {
916  case U'-':
917  emit_token(U'-');
918  break;
919  case U'<':
920  change_state(script_data_escaped_less_than_sign_state);
921  break;
922  case U'>':
923  change_state(script_data_state);
924  emit_token(U'>');
925  break;
926  case U'\x0':
927  report_error(error_name::unexpected_null_character);
928  change_state(script_data_escaped_state);
929  emit_token(U'\xFFFD');
930  break;
931  default:
932  change_state(script_data_escaped_state);
933  emit_token(cp);
934  break;
935  }
936  }
937 
940  {
941  char32_t cp = consume();
942 
943  if (!eof())
944  {
945  if (cp == U'/')
946  {
947  m_temporary_buffer.clear();
948  change_state(script_data_escaped_end_tag_open_state);
949  return;
950  }
951 
952  if (is_ascii_alpha(cp))
953  {
954  m_temporary_buffer.clear();
955  emit_token(U'<');
956  reconsume(script_data_double_escape_start_state);
957  return;
958  }
959  }
960 
961  emit_token(U'<');
962  reconsume(script_data_escaped_state);
963  }
964 
967  {
968  char32_t cp = consume();
969 
970  if (!eof())
971  {
972  if (is_ascii_alpha(cp))
973  {
974  create_end_tag_token();
975  reconsume(script_data_escaped_end_tag_name_state);
976  return;
977  }
978  }
979 
980  emit_token(U'<');
981  emit_token(U'/');
982  reconsume(script_data_escaped_state);
983  }
984 
987  {
988  char32_t cp = consume();
989 
990  if (!eof())
991  {
992  switch (cp)
993  {
994  case U'\x9': // TAB
995  case U'\xA': // LF
996  case U'\xC': // FF
997  case U'\x20': // SPACE
998  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
999  change_state(before_attribute_name_state);
1000  return;
1001  case U'/':
1002  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
1003  change_state(self_closing_start_tag_state);
1004  return;
1005  case U'>':
1006  if (!is_appropriate_end_tag_token(m_end_tag_token)) goto AnythingElse;
1007  change_state(data_state);
1008  emit_token(current_tag_token());
1009  return;
1010  }
1011 
1012  if (is_ascii_upper_alpha(cp))
1013  {
1014  current_tag_token().m_tag_name.push_back(cp + 0x20);
1015  m_temporary_buffer.push_back(cp);
1016  return;
1017  }
1018 
1019  if (is_ascii_lower_alpha(cp))
1020  {
1021  current_tag_token().m_tag_name.push_back(cp);
1022  m_temporary_buffer.push_back(cp);
1023  return;
1024  }
1025  }
1026 
1027  AnythingElse:
1028  emit_token(U'<');
1029  emit_token(U'/');
1030  for (char32_t c : m_temporary_buffer) emit_token(c);
1031  reconsume(script_data_escaped_state);
1032  }
1033 
1036  {
1037  char32_t cp = consume();
1038 
1039  if (!eof())
1040  {
1041  switch (cp)
1042  {
1043  case U'\x9': // TAB
1044  case U'\xA': // LF
1045  case U'\xC': // FF
1046  case U'\x20': // SPACE
1047  case U'/':
1048  case U'>':
1049  if (m_temporary_buffer == U"script") change_state(script_data_double_escaped_state);
1050  else change_state(before_attribute_name_state);
1051  emit_token(cp);
1052  return;
1053  }
1054 
1055  if (is_ascii_upper_alpha(cp))
1056  {
1057  m_temporary_buffer.push_back(cp + 0x20);
1058  emit_token(cp);
1059  return;
1060  }
1061 
1062  if (is_ascii_lower_alpha(cp))
1063  {
1064  m_temporary_buffer.push_back(cp);
1065  emit_token(cp);
1066  return;
1067  }
1068  }
1069 
1070  reconsume(script_data_escaped_state);
1071  }
1072 
1075  {
1076  char32_t cp = consume();
1077 
1078  if (eof())
1079  {
1080  report_error(error_name::eof_in_script_html_comment_like_text);
1081  emit_token(end_of_file_token());
1082  return;
1083  }
1084 
1085  switch (cp)
1086  {
1087  case U'-':
1088  change_state(script_data_double_escaped_dash_state);
1089  emit_token(U'-');
1090  return;
1091  case U'<':
1092  change_state(script_data_double_escaped_less_than_sign_state);
1093  emit_token(U'<');
1094  return;
1095  case U'\x0':
1096  report_error(error_name::unexpected_null_character);
1097  emit_token(U'\xFFFD');
1098  return;
1099  }
1100 
1101  emit_token(cp);
1102  }
1103 
1106  {
1107  char32_t cp = consume();
1108 
1109  if (eof())
1110  {
1111  report_error(error_name::eof_in_script_html_comment_like_text);
1112  emit_token(end_of_file_token());
1113  return;
1114  }
1115 
1116  switch (cp)
1117  {
1118  case U'-':
1119  change_state(script_data_double_escaped_dash_dash_state);
1120  emit_token(U'-');
1121  return;
1122  case U'<':
1123  change_state(script_data_double_escaped_less_than_sign_state);
1124  emit_token(U'<');
1125  return;
1126  case U'\x0':
1127  report_error(error_name::unexpected_null_character);
1128  change_state(script_data_double_escaped_state);
1129  emit_token(U'\xFFFD');
1130  return;
1131  }
1132 
1133  change_state(script_data_double_escaped_state);
1134  emit_token(cp);
1135  }
1136 
1139  {
1140  char32_t cp = consume();
1141 
1142  if (eof())
1143  {
1144  report_error(error_name::eof_in_script_html_comment_like_text);
1145  emit_token(end_of_file_token());
1146  return;
1147  }
1148 
1149  switch (cp)
1150  {
1151  case U'-':
1152  emit_token(U'-');
1153  return;
1154  case U'<':
1155  change_state(script_data_double_escaped_less_than_sign_state);
1156  emit_token(U'<');
1157  return;
1158  case U'>':
1159  change_state(script_data_state);
1160  emit_token(U'>');
1161  return;
1162  case U'\x0':
1163  report_error(error_name::unexpected_null_character);
1164  change_state(script_data_double_escaped_state);
1165  emit_token(U'\xFFFD');
1166  return;
1167  }
1168 
1169  change_state(script_data_double_escaped_state);
1170  emit_token(cp);
1171  }
1172 
1175  {
1176  char32_t cp = consume();
1177 
1178  if (!eof() && cp == U'/')
1179  {
1180  m_temporary_buffer.clear();
1181  change_state(script_data_double_escape_end_state);
1182  emit_token(U'/');
1183  return;
1184  }
1185 
1186  reconsume(script_data_double_escaped_state);
1187  }
1188 
1191  {
1192  char32_t cp = consume();
1193 
1194  if (!eof())
1195  {
1196  switch (cp)
1197  {
1198  case U'\x9': // TAB
1199  case U'\xA': // LF
1200  case U'\xC': // FF
1201  case U'\x20': // SPACE
1202  case U'/':
1203  case U'>':
1204  if (m_temporary_buffer == U"script") change_state(script_data_escaped_state);
1205  else change_state(script_data_double_escaped_state);
1206  emit_token(cp);
1207  return;
1208  }
1209 
1210  if (is_ascii_upper_alpha(cp))
1211  {
1212  m_temporary_buffer.push_back(cp + 0x20);
1213  emit_token(cp);
1214  return;
1215  }
1216 
1217  if (is_ascii_lower_alpha(cp))
1218  {
1219  m_temporary_buffer.push_back(cp);
1220  emit_token(cp);
1221  return;
1222  }
1223  }
1224 
1225  reconsume(script_data_double_escaped_state);
1226  }
1227 
1230  {
1231  char32_t cp = consume();
1232 
1233  if (eof())
1234  {
1235  reconsume(after_attribute_name_state);
1236  return;
1237  }
1238 
1239  switch (cp)
1240  {
1241  case U'\x9': // TAB
1242  case U'\xA': // LF
1243  case U'\xC': // FF
1244  case U'\x20': // SPACE
1245  return;
1246  case U'/':
1247  case U'>':
1248  reconsume(after_attribute_name_state);
1249  return;
1250  case U'=':
1251  report_error(error_name::unexpected_equals_sign_before_attribute_name);
1253  attr.m_name = cp;
1254  change_state(attribute_name_state);
1255  return;
1256  }
1257 
1258  create_attribute();
1259  reconsume(attribute_name_state);
1260  }
1261 
1264  {
1265  char32_t cp = consume();
1266 
1267  if (eof())
1268  {
1269  reconsume(after_attribute_name_state);
1270  return;
1271  }
1272 
1273  switch (cp)
1274  {
1275  case U'\x9': // TAB
1276  case U'\xA': // LF
1277  case U'\xC': // FF
1278  case U'\x20': // SPACE
1279  case U'/':
1280  case U'>':
1281  unify_attribute();
1282  reconsume(after_attribute_name_state);
1283  return;
1284  case U'=':
1285  unify_attribute();
1286  change_state(before_attribute_value_state);
1287  return;
1288  case U'\x0':
1289  report_error(error_name::unexpected_null_character);
1290  current_attribute().m_name.push_back(U'\xFFFD');
1291  return;
1292  case U'"':
1293  case U'\'':
1294  case U'<':
1295  report_error(error_name::unexpected_character_in_attribute_name);
1296  goto AnythingElse;
1297  }
1298 
1299  if (is_ascii_upper_alpha(cp))
1300  {
1301  current_attribute().m_name.push_back(cp + 0x20);
1302  return;
1303  }
1304 
1305  AnythingElse:
1306  current_attribute().m_name.push_back(cp);
1307  }
1308 
1311  {
1312  char32_t cp = consume();
1313 
1314  if (eof())
1315  {
1316  report_error(error_name::eof_in_tag);
1317  emit_token(end_of_file_token());
1318  return;
1319  }
1320 
1321  switch (cp)
1322  {
1323  case U'\x9': // TAB
1324  case U'\xA': // LF
1325  case U'\xC': // FF
1326  case U'\x20': // SPACE
1327  return;
1328  case U'/':
1329  change_state(self_closing_start_tag_state);
1330  return;
1331  case U'=':
1332  change_state(before_attribute_value_state);
1333  return;
1334  case U'>':
1335  change_state(data_state);
1336  emit_token(current_tag_token());
1337  return;
1338  }
1339 
1340  create_attribute();
1341  reconsume(attribute_name_state);
1342  }
1343 
1346  {
1347  char32_t cp = consume();
1348 
1349  if (!eof())
1350  {
1351  switch (cp)
1352  {
1353  case U'\x9': // TAB
1354  case U'\xA': // LF
1355  case U'\xC': // FF
1356  case U'\x20': // SPACE
1357  return;
1358  case U'"':
1359  change_state(attribute_value_double_quoted_state);
1360  return;
1361  case U'\'':
1362  change_state(attribute_value_single_quoted_state);
1363  return;
1364  case U'>':
1365  report_error(error_name::missing_attribute_value);
1366  change_state(data_state);
1367  emit_token(current_tag_token());
1368  return;
1369  }
1370  }
1371 
1372  reconsume(attribute_value_unquoted_state);
1373  }
1374 
1377  {
1378  char32_t cp = consume();
1379 
1380  if (eof())
1381  {
1382  report_error(error_name::eof_in_tag);
1383  emit_token(end_of_file_token());
1384  return;
1385  }
1386 
1387  switch (cp)
1388  {
1389  case U'"':
1390  change_state(after_attribute_value_quoted_state);
1391  return;
1392  case U'&':
1393  return_state(attribute_value_double_quoted_state);
1394  change_state(character_reference_state);
1395  return;
1396  case U'\x0':
1397  report_error(error_name::unexpected_null_character);
1398  current_attribute().m_value.push_back(U'\xFFFD');
1399  return;
1400  }
1401 
1402  current_attribute().m_value.push_back(cp);
1403  }
1404 
1407  {
1408  char32_t cp = consume();
1409 
1410  if (eof())
1411  {
1412  report_error(error_name::eof_in_tag);
1413  emit_token(end_of_file_token());
1414  return;
1415  }
1416 
1417  switch (cp)
1418  {
1419  case U'\'':
1420  change_state(after_attribute_value_quoted_state);
1421  return;
1422  case U'&':
1423  return_state(attribute_value_single_quoted_state);
1424  change_state(character_reference_state);
1425  return;
1426  case U'\x0':
1427  report_error(error_name::unexpected_null_character);
1428  current_attribute().m_value.push_back(U'\xFFFD');
1429  return;
1430  }
1431 
1432  current_attribute().m_value.push_back(cp);
1433  }
1434 
1437  {
1438  char32_t cp = consume();
1439 
1440  if (eof())
1441  {
1442  report_error(error_name::eof_in_tag);
1443  emit_token(end_of_file_token());
1444  return;
1445  }
1446 
1447  switch (cp)
1448  {
1449  case U'\x9': // TAB
1450  case U'\xA': // LF
1451  case U'\xC': // FF
1452  case U'\x20': // SPACE
1453  change_state(before_attribute_name_state);
1454  return;
1455  case U'&':
1456  return_state(attribute_value_unquoted_state);
1457  change_state(character_reference_state);
1458  return;
1459  case U'>':
1460  change_state(data_state);
1461  emit_token(current_tag_token());
1462  return;
1463  case U'\x0':
1464  report_error(error_name::unexpected_null_character);
1465  current_attribute().m_value.push_back(U'\xFFFD');
1466  return;
1467  case U'"':
1468  case U'\'':
1469  case U'<':
1470  case U'=':
1471  case U'`':
1472  report_error(error_name::unexpected_character_in_unquoted_attribute_value);
1473  goto AnythingElse;
1474  }
1475 
1476  AnythingElse:
1477  current_attribute().m_value.push_back(cp);
1478  }
1479 
1482  {
1483  char32_t cp = consume();
1484 
1485  if (eof())
1486  {
1487  report_error(error_name::eof_in_tag);
1488  emit_token(end_of_file_token());
1489  return;
1490  }
1491 
1492  switch (cp)
1493  {
1494  case U'\x9': // TAB
1495  case U'\xA': // LF
1496  case U'\xC': // FF
1497  case U'\x20': // SPACE
1498  change_state(before_attribute_name_state);
1499  return;
1500  case U'/':
1501  change_state(self_closing_start_tag_state);
1502  return;
1503  case U'>':
1504  change_state(data_state);
1505  emit_token(current_tag_token());
1506  return;
1507  }
1508 
1509  report_error(error_name::missing_whitespace_between_attributes);
1510  reconsume(before_attribute_name_state);
1511  }
1512 
1515  {
1516  char32_t cp = consume();
1517 
1518  if (eof())
1519  {
1520  report_error(error_name::eof_in_tag);
1521  emit_token(end_of_file_token());
1522  return;
1523  }
1524 
1525  if (cp == U'>')
1526  {
1527  current_tag_token().m_self_closing_flag = true;
1528  change_state(data_state);
1529  emit_token(current_tag_token());
1530  return;
1531  }
1532 
1533  report_error(error_name::unexpected_solidus_in_tag);
1534  reconsume(before_attribute_name_state);
1535  }
1536 
1539  {
1540  char32_t cp = consume();
1541 
1542  if (eof())
1543  {
1544  emit_token(current_comment_token());
1545  emit_token(end_of_file_token());
1546  return;
1547  }
1548 
1549  switch (cp)
1550  {
1551  case U'>':
1552  change_state(data_state);
1553  emit_token(current_comment_token());
1554  return;
1555  case U'\x0':
1556  report_error(error_name::unexpected_null_character);
1557  current_comment_token().m_data.push_back(U'\xFFFD');
1558  return;
1559  }
1560 
1561  current_comment_token().m_data.push_back(cp);
1562  }
1563 
1566  {
1567  std::size_t constexpr n = std::max({ std::size(U"--") - 1, std::size(U"doctype") - 1, std::size(U"[CDATA[") - 1 });
1568  if (!fill(n)) return;
1569 
1570  if (match(U"--", false, false))
1571  {
1572  consume(std::size(U"--") - 1);
1573  create_comment_token();
1574  change_state(comment_start_state);
1575  flush_code_point();
1576  return;
1577  }
1578 
1579  if (match(U"doctype", false, true))
1580  {
1581  consume(std::size(U"doctype") - 1);
1582  change_state(DOCTYPE_state);
1583  flush_code_point();
1584  return;
1585  }
1586 
1587  if (match(U"[CDATA[", false, false))
1588  {
1589  consume(std::size(U"[CDATA[") - 1);
1590 
1591  this_type const* P = static_cast<this_type const*>(this);
1592 
1593  if (!P->m_stack.empty())
1594  {
1595  if (in_html_namespace())
1596  {
1597  change_state(CDATA_section_state);
1598  flush_code_point();
1599  return;
1600  }
1601  }
1602 
1603  report_error(error_name::cdata_in_html_content);
1604  create_comment_token(U"[CDATA[");
1605  change_state(bogus_comment_state);
1606  flush_code_point();
1607  return;
1608  }
1609 
1610  report_error(error_name::incorrectly_opened_comment);
1611  create_comment_token();
1612  change_state(bogus_comment_state);
1613  flush_code_point();
1614  }
1615 
1618  {
1619  char32_t cp = consume();
1620 
1621  if (!eof())
1622  {
1623  switch (cp)
1624  {
1625  case U'-':
1626  change_state(comment_start_dash_state);
1627  return;
1628  case U'>':
1629  report_error(error_name::abrupt_closing_of_empty_comment);
1630  change_state(data_state);
1631  emit_token(current_comment_token());
1632  return;
1633  }
1634  }
1635 
1636  reconsume(comment_state);
1637  }
1638 
1641  {
1642  char32_t cp = consume();
1643 
1644  if (eof())
1645  {
1646  report_error(error_name::eof_in_comment);
1647  emit_token(current_comment_token());
1648  emit_token(end_of_file_token());
1649  return;
1650  }
1651 
1652  switch (cp)
1653  {
1654  case U'-':
1655  change_state(comment_end_state);
1656  return;
1657  case U'>':
1658  report_error(error_name::abrupt_closing_of_empty_comment);
1659  change_state(data_state);
1660  emit_token(current_comment_token());
1661  return;
1662  }
1663 
1664  current_comment_token().m_data.push_back(U'-');
1665  reconsume(comment_state);
1666  }
1667 
1670  {
1671  char32_t cp = consume();
1672 
1673  if (eof())
1674  {
1675  report_error(error_name::eof_in_comment);
1676  emit_token(current_comment_token());
1677  emit_token(end_of_file_token());
1678  return;
1679  }
1680 
1681  switch (cp)
1682  {
1683  case U'<':
1684  current_comment_token().m_data.push_back(cp);
1685  change_state(comment_less_than_sign_state);
1686  return;
1687  case U'-':
1688  change_state(comment_end_dash_state);
1689  return;
1690  case U'\x0':
1691  report_error(error_name::unexpected_null_character);
1692  emit_token(U'\xFFFD');
1693  return;
1694  }
1695 
1696  current_comment_token().m_data.push_back(cp);
1697  }
1698 
1701  {
1702  char32_t cp = consume();
1703 
1704  if (!eof())
1705  {
1706  switch (cp)
1707  {
1708  case U'!':
1709  current_comment_token().m_data.push_back(cp);
1710  change_state(comment_less_than_sign_bang_state);
1711  return;
1712  case U'<':
1713  current_comment_token().m_data.push_back(cp);
1714  return;
1715  }
1716  }
1717 
1718  reconsume(comment_state);
1719  }
1720 
1723  {
1724  char32_t cp = consume();
1725 
1726  if (!eof() && cp == U'-')
1727  {
1728  change_state(comment_less_than_sign_bang_dash_state);
1729  return;
1730  }
1731 
1732  reconsume(comment_state);
1733  }
1734 
1737  {
1738  char32_t cp = consume();
1739 
1740  if (!eof() && cp == U'-')
1741  {
1742  change_state(comment_less_than_sign_bang_dash_dash_state);
1743  return;
1744  }
1745 
1746  reconsume(comment_end_dash_state);
1747  }
1748 
1751  {
1752  char32_t cp = consume();
1753 
1754  if (eof())
1755  {
1756  reconsume(comment_end_state);
1757  return;
1758  }
1759 
1760  if (cp == U'>')
1761  {
1762  reconsume(comment_end_state);
1763  return;
1764  }
1765 
1766  report_error(error_name::nested_comment);
1767  reconsume(comment_end_state);
1768  }
1769 
1772  {
1773  char32_t cp = consume();
1774 
1775  if (eof())
1776  {
1777  report_error(error_name::eof_in_comment);
1778  emit_token(current_comment_token());
1779  emit_token(end_of_file_token());
1780  return;
1781  }
1782 
1783  if (cp == U'-')
1784  {
1785  change_state(comment_end_state);
1786  return;
1787  }
1788 
1789  current_comment_token().m_data.push_back(U'-');
1790  reconsume(comment_state);
1791  }
1792 
1795  {
1796  char32_t cp = consume();
1797 
1798  if (eof())
1799  {
1800  report_error(error_name::eof_in_comment);
1801  emit_token(current_comment_token());
1802  emit_token(end_of_file_token());
1803  return;
1804  }
1805 
1806  switch (cp)
1807  {
1808  case U'>':
1809  change_state(data_state);
1810  emit_token(current_comment_token());
1811  return;
1812  case U'!':
1813  change_state(comment_end_bang_state);
1814  return;
1815  case U'-':
1816  current_comment_token().m_data.push_back(U'-');
1817  return;
1818  }
1819 
1820  current_comment_token().m_data.push_back(U'-');
1821  current_comment_token().m_data.push_back(U'-');
1822  reconsume(comment_state);
1823  }
1824 
1827  {
1828  char32_t cp = consume();
1829 
1830  if (eof())
1831  {
1832  report_error(error_name::eof_in_comment);
1833  emit_token(current_comment_token());
1834  emit_token(end_of_file_token());
1835  return;
1836  }
1837 
1838  switch (cp)
1839  {
1840  case U'-':
1841  current_comment_token().m_data.push_back(U'-');
1842  current_comment_token().m_data.push_back(U'-');
1843  current_comment_token().m_data.push_back(U'!');
1844  change_state(comment_end_dash_state);
1845  return;
1846  case U'>':
1847  report_error(error_name::incorrectly_closed_comment);
1848  change_state(data_state);
1849  emit_token(current_comment_token());
1850  return;
1851  }
1852 
1853  current_comment_token().m_data.push_back(U'-');
1854  current_comment_token().m_data.push_back(U'-');
1855  current_comment_token().m_data.push_back(U'!');
1856  reconsume(comment_state);
1857  }
1858 
1861  {
1862  char32_t cp = consume();
1863 
1864  if (eof())
1865  {
1866  report_error(error_name::eof_in_doctype);
1867  DOCTYPE_token& d = create_DOCTYPE_token();
1868  d.m_force_quirks_flag = true;
1869  emit_token(d);
1870  emit_token(end_of_file_token());
1871  return;
1872  }
1873 
1874  switch (cp)
1875  {
1876  case U'\x9': // TAB
1877  case U'\xA': // LF
1878  case U'\xC': // FF
1879  case U'\x20': // SPACE
1880  change_state(before_DOCTYPE_name_state);
1881  return;
1882  case U'>':
1883  reconsume(before_DOCTYPE_name_state);
1884  return;
1885  }
1886 
1887  report_error(error_name::missing_whitespace_before_doctype_name);
1888  reconsume(before_DOCTYPE_name_state);
1889  }
1890 
1893  {
1894  char32_t cp = consume();
1895 
1896  if (eof())
1897  {
1898  report_error(error_name::eof_in_doctype);
1899  DOCTYPE_token& d = create_DOCTYPE_token();
1900  d.m_force_quirks_flag = true;
1901  emit_token(d);
1902  emit_token(end_of_file_token());
1903  return;
1904  }
1905 
1906  switch (cp)
1907  {
1908  case U'\x9': // TAB
1909  case U'\xA': // LF
1910  case U'\xC': // FF
1911  case U'\x20': // SPACE
1912  return;
1913  case U'\x0':
1914  report_error(error_name::unexpected_null_character);
1915  create_DOCTYPE_token();
1916  current_DOCTYPE_token().m_name = U'\xFFFD';
1917  change_state(DOCTYPE_name_state);
1918  return;
1919  case U'>':
1920  report_error(error_name::missing_doctype_name);
1921  create_DOCTYPE_token();
1922  current_DOCTYPE_token().m_force_quirks_flag = true;
1923  change_state(data_state);
1924  emit_token(current_DOCTYPE_token());
1925  return;
1926  }
1927 
1928  if (is_ascii_upper_alpha(cp))
1929  {
1930  create_DOCTYPE_token();
1931  current_DOCTYPE_token().m_name = cp + 0x20;
1932  change_state(DOCTYPE_name_state);
1933  return;
1934  }
1935 
1936  DOCTYPE_token& d = create_DOCTYPE_token();
1937  d.m_name = cp;
1938  change_state(DOCTYPE_name_state);
1939  }
1940 
1943  {
1944  char32_t cp = consume();
1945 
1946  if (eof())
1947  {
1948  report_error(error_name::eof_in_doctype);
1949  current_DOCTYPE_token().m_force_quirks_flag = true;
1950  emit_token(current_DOCTYPE_token());
1951  emit_token(end_of_file_token());
1952  return;
1953  }
1954 
1955  switch (cp)
1956  {
1957  case U'\x9': // TAB
1958  case U'\xA': // LF
1959  case U'\xC': // FF
1960  case U'\x20': // SPACE
1961  change_state(after_DOCTYPE_name_state);
1962  return;
1963  case U'>':
1964  change_state(data_state);
1965  emit_token(current_DOCTYPE_token());
1966  return;
1967  case U'\x0':
1968  report_error(error_name::unexpected_null_character);
1969  current_DOCTYPE_token().m_name.push_back(U'\xFFFD');
1970  return;
1971  }
1972 
1973  if (is_ascii_upper_alpha(cp))
1974  {
1975  current_DOCTYPE_token().m_name.push_back(cp + 0x20);
1976  return;
1977  }
1978 
1979  current_DOCTYPE_token().m_name.push_back(cp);
1980  }
1981 
1984  {
1985  std::size_t constexpr n = std::max(std::size(U"public") - 1, std::size(U"system") - 1);
1986  if (!fill(n)) return;
1987 
1988  char32_t cp = consume();
1989 
1990  if (eof())
1991  {
1992  report_error(error_name::eof_in_doctype);
1993  current_DOCTYPE_token().m_force_quirks_flag = true;
1994  emit_token(current_DOCTYPE_token());
1995  emit_token(end_of_file_token());
1996  return;
1997  }
1998 
1999  switch (cp)
2000  {
2001  case U'\x9': // TAB
2002  case U'\xA': // LF
2003  case U'\xC': // FF
2004  case U'\x20': // SPACE
2005  flush_code_point();
2006  return;
2007  case U'>':
2008  change_state(data_state);
2009  emit_token(current_DOCTYPE_token());
2010  flush_code_point();
2011  return;
2012  }
2013 
2014  if (match(U"public", true, true))
2015  {
2016  consume(std::size(U"public") - 2);
2017  change_state(after_DOCTYPE_public_keyword_state);
2018  flush_code_point();
2019  return;
2020  }
2021 
2022  if (match(U"system", true, true))
2023  {
2024  consume(std::size(U"system") - 2);
2025  change_state(after_DOCTYPE_system_keyword_state);
2026  flush_code_point();
2027  return;
2028  }
2029 
2030  report_error(error_name::invalid_character_sequence_after_doctype_name);
2031  current_DOCTYPE_token().m_force_quirks_flag = true;
2032  reconsume(bogus_DOCTYPE_state);
2033  flush_code_point();
2034  }
2035 
2038  {
2039  char32_t cp = consume();
2040 
2041  if (eof())
2042  {
2043  report_error(error_name::eof_in_doctype);
2044  current_DOCTYPE_token().m_force_quirks_flag = true;
2045  emit_token(current_DOCTYPE_token());
2046  emit_token(end_of_file_token());
2047  return;
2048  }
2049 
2050  switch (cp)
2051  {
2052  case U'\x9': // TAB
2053  case U'\xA': // LF
2054  case U'\xC': // FF
2055  case U'\x20': // SPACE
2056  change_state(before_DOCTYPE_public_identifier_state);
2057  return;
2058  case U'"':
2059  report_error(error_name::missing_whitespace_after_doctype_public_keyword);
2060  current_DOCTYPE_token().m_public_identifier.clear();
2061  change_state(DOCTYPE_public_identifier_double_quoted_state);
2062  return;
2063  case U'\'':
2064  report_error(error_name::missing_whitespace_after_doctype_public_keyword);
2065  current_DOCTYPE_token().m_public_identifier.clear();
2066  change_state(DOCTYPE_public_identifier_single_quoted_state);
2067  return;
2068  case U'>':
2069  report_error(error_name::missing_doctype_public_identifier);
2070  current_DOCTYPE_token().m_force_quirks_flag = true;
2071  change_state(data_state);
2072  emit_token(current_DOCTYPE_token());
2073  return;
2074  }
2075 
2076  report_error(error_name::missing_quote_before_doctype_public_identifier);
2077  current_DOCTYPE_token().m_force_quirks_flag = true;
2078  reconsume(bogus_DOCTYPE_state);
2079  }
2080 
2083  {
2084  char32_t cp = consume();
2085 
2086  if (eof())
2087  {
2088  report_error(error_name::eof_in_doctype);
2089  current_DOCTYPE_token().m_force_quirks_flag = true;
2090  emit_token(current_DOCTYPE_token());
2091  emit_token(end_of_file_token());
2092  return;
2093  }
2094 
2095  switch (cp)
2096  {
2097  case U'\x9': // TAB
2098  case U'\xA': // LF
2099  case U'\xC': // FF
2100  case U'\x20': // SPACE
2101  return;
2102  case U'"':
2103  current_DOCTYPE_token().m_public_identifier.clear();
2104  change_state(DOCTYPE_public_identifier_double_quoted_state);
2105  return;
2106  case U'\'':
2107  current_DOCTYPE_token().m_public_identifier.clear();
2108  change_state(DOCTYPE_public_identifier_single_quoted_state);
2109  return;
2110  case U'>':
2111  report_error(error_name::missing_doctype_public_identifier);
2112  current_DOCTYPE_token().m_force_quirks_flag = true;
2113  change_state(data_state);
2114  emit_token(current_DOCTYPE_token());
2115  return;
2116  }
2117 
2118  report_error(error_name::missing_quote_before_doctype_public_identifier);
2119  current_DOCTYPE_token().m_force_quirks_flag = true;
2120  reconsume(bogus_DOCTYPE_state);
2121  }
2122 
2125  {
2126  char32_t cp = consume();
2127 
2128  if (eof())
2129  {
2130  report_error(error_name::eof_in_doctype);
2131  current_DOCTYPE_token().m_force_quirks_flag = true;
2132  emit_token(current_DOCTYPE_token());
2133  emit_token(end_of_file_token());
2134  return;
2135  }
2136 
2137  switch (cp)
2138  {
2139  case U'"':
2140  change_state(after_DOCTYPE_public_identifier_state);
2141  return;
2142  case U'\x0':
2143  report_error(error_name::unexpected_null_character);
2144  current_DOCTYPE_token().m_public_identifier.push_back(U'\xFFFD');
2145  return;
2146  case U'>':
2147  report_error(error_name::abrupt_doctype_public_identifier);
2148  current_DOCTYPE_token().m_force_quirks_flag = true;
2149  change_state(data_state);
2150  emit_token(current_DOCTYPE_token());
2151  return;
2152  }
2153 
2154  current_DOCTYPE_token().m_public_identifier.push_back(cp);
2155  }
2156 
2159  {
2160  char32_t cp = consume();
2161 
2162  if (eof())
2163  {
2164  report_error(error_name::eof_in_doctype);
2165  current_DOCTYPE_token().m_force_quirks_flag = true;
2166  emit_token(current_DOCTYPE_token());
2167  emit_token(end_of_file_token());
2168  return;
2169  }
2170 
2171  switch (cp)
2172  {
2173  case U'\'':
2174  change_state(after_DOCTYPE_public_identifier_state);
2175  return;
2176  case U'\x0':
2177  report_error(error_name::unexpected_null_character);
2178  current_DOCTYPE_token().m_public_identifier.push_back(U'\xFFFD');
2179  return;
2180  case U'>':
2181  report_error(error_name::abrupt_doctype_public_identifier);
2182  current_DOCTYPE_token().m_force_quirks_flag = true;
2183  change_state(data_state);
2184  emit_token(current_DOCTYPE_token());
2185  return;
2186  }
2187 
2188  current_DOCTYPE_token().m_public_identifier.push_back(cp);
2189  }
2190 
2193  {
2194  char32_t cp = consume();
2195 
2196  if (eof())
2197  {
2198  report_error(error_name::eof_in_doctype);
2199  current_DOCTYPE_token().m_force_quirks_flag = true;
2200  emit_token(current_DOCTYPE_token());
2201  emit_token(end_of_file_token());
2202  return;
2203  }
2204 
2205  switch (cp)
2206  {
2207  case U'\x9': // TAB
2208  case U'\xA': // LF
2209  case U'\xC': // FF
2210  case U'\x20': // SPACE
2211  change_state(between_DOCTYPE_public_and_system_identifiers_state);
2212  return;
2213  case U'>':
2214  change_state(data_state);
2215  emit_token(current_DOCTYPE_token());
2216  return;
2217  case U'"':
2218  report_error(error_name::missing_whitespace_between_doctype_public_and_system_identifiers);
2219  current_DOCTYPE_token().m_system_identifier.clear();
2220  change_state(DOCTYPE_system_identifier_double_quoted_state);
2221  return;
2222  case U'\'':
2223  report_error(error_name::missing_whitespace_between_doctype_public_and_system_identifiers);
2224  current_DOCTYPE_token().m_system_identifier.clear();
2225  change_state(DOCTYPE_system_identifier_single_quoted_state);
2226  return;
2227  }
2228 
2229  report_error(error_name::missing_quote_before_doctype_system_identifier);
2230  current_DOCTYPE_token().m_force_quirks_flag = true;
2231  reconsume(bogus_DOCTYPE_state);
2232  }
2233 
2236  {
2237  char32_t cp = consume();
2238 
2239  if (eof())
2240  {
2241  report_error(error_name::eof_in_doctype);
2242  current_DOCTYPE_token().m_force_quirks_flag = true;
2243  emit_token(current_DOCTYPE_token());
2244  emit_token(end_of_file_token());
2245  return;
2246  }
2247 
2248  switch (cp)
2249  {
2250  case U'\x9': // TAB
2251  case U'\xA': // LF
2252  case U'\xC': // FF
2253  case U'\x20': // SPACE
2254  return;
2255  case U'>':
2256  change_state(data_state);
2257  emit_token(current_DOCTYPE_token());
2258  return;
2259  case U'"':
2260  current_DOCTYPE_token().m_system_identifier.clear();
2261  change_state(DOCTYPE_system_identifier_double_quoted_state);
2262  return;
2263  case U'\'':
2264  current_DOCTYPE_token().m_system_identifier.clear();
2265  change_state(DOCTYPE_system_identifier_single_quoted_state);
2266  return;
2267  }
2268 
2269  report_error(error_name::missing_quote_before_doctype_system_identifier);
2270  current_DOCTYPE_token().m_force_quirks_flag = true;
2271  reconsume(bogus_DOCTYPE_state);
2272  }
2273 
2276  {
2277  char32_t cp = consume();
2278 
2279  if (eof())
2280  {
2281  report_error(error_name::eof_in_doctype);
2282  current_DOCTYPE_token().m_force_quirks_flag = true;
2283  emit_token(current_DOCTYPE_token());
2284  emit_token(end_of_file_token());
2285  return;
2286  }
2287 
2288  switch (cp)
2289  {
2290  case U'\x9': // TAB
2291  case U'\xA': // LF
2292  case U'\xC': // FF
2293  case U'\x20': // SPACE
2294  change_state(before_DOCTYPE_system_identifier_state);
2295  return;
2296  case U'"':
2297  report_error(error_name::missing_whitespace_after_doctype_system_keyword);
2298  current_DOCTYPE_token().m_system_identifier.clear();
2299  change_state(DOCTYPE_system_identifier_double_quoted_state);
2300  return;
2301  case U'\'':
2302  report_error(error_name::missing_whitespace_after_doctype_system_keyword);
2303  current_DOCTYPE_token().m_system_identifier.clear();
2304  change_state(DOCTYPE_system_identifier_single_quoted_state);
2305  return;
2306  case U'>':
2307  report_error(error_name::missing_doctype_system_identifier);
2308  current_DOCTYPE_token().m_force_quirks_flag = true;
2309  change_state(data_state);
2310  emit_token(current_DOCTYPE_token());
2311  return;
2312  }
2313 
2314  report_error(error_name::missing_quote_before_doctype_system_identifier);
2315  current_DOCTYPE_token().m_force_quirks_flag = true;
2316  reconsume(bogus_DOCTYPE_state);
2317  }
2318 
2321  {
2322  char32_t cp = consume();
2323 
2324  if (eof())
2325  {
2326  report_error(error_name::eof_in_doctype);
2327  current_DOCTYPE_token().m_force_quirks_flag = true;
2328  emit_token(current_DOCTYPE_token());
2329  emit_token(end_of_file_token());
2330  return;
2331  }
2332 
2333  switch (cp)
2334  {
2335  case U'\x9': // TAB
2336  case U'\xA': // LF
2337  case U'\xC': // FF
2338  case U'\x20': // SPACE
2339  return;
2340  case U'"':
2341  current_DOCTYPE_token().m_system_identifier.clear();
2342  change_state(DOCTYPE_system_identifier_double_quoted_state);
2343  return;
2344  case U'\'':
2345  current_DOCTYPE_token().m_system_identifier.clear();
2346  change_state(DOCTYPE_system_identifier_single_quoted_state);
2347  return;
2348  case U'>':
2349  report_error(error_name::missing_doctype_system_identifier);
2350  current_DOCTYPE_token().m_force_quirks_flag = true;
2351  change_state(data_state);
2352  emit_token(current_DOCTYPE_token());
2353  return;
2354  }
2355 
2356  report_error(error_name::missing_quote_before_doctype_system_identifier);
2357  current_DOCTYPE_token().m_force_quirks_flag = true;
2358  reconsume(bogus_DOCTYPE_state);
2359  }
2360 
2363  {
2364  char32_t cp = consume();
2365 
2366  if (eof())
2367  {
2368  report_error(error_name::eof_in_doctype);
2369  current_DOCTYPE_token().m_force_quirks_flag = true;
2370  emit_token(current_DOCTYPE_token());
2371  emit_token(end_of_file_token());
2372  return;
2373  }
2374 
2375  switch (cp)
2376  {
2377  case U'"':
2378  change_state(after_DOCTYPE_system_identifier_state);
2379  return;
2380  case U'\x0':
2381  report_error(error_name::unexpected_null_character);
2382  current_DOCTYPE_token().m_system_identifier.push_back(U'\xFFFD');
2383  return;
2384  case U'>':
2385  report_error(error_name::abrupt_doctype_system_identifier);
2386  current_DOCTYPE_token().m_force_quirks_flag = true;
2387  change_state(data_state);
2388  emit_token(current_DOCTYPE_token());
2389  return;
2390  }
2391 
2392  current_DOCTYPE_token().m_system_identifier.push_back(cp);
2393  }
2394 
2397  {
2398  char32_t cp = consume();
2399 
2400  if (eof())
2401  {
2402  report_error(error_name::eof_in_doctype);
2403  current_DOCTYPE_token().m_force_quirks_flag = true;
2404  emit_token(current_DOCTYPE_token());
2405  emit_token(end_of_file_token());
2406  return;
2407  }
2408 
2409  switch (cp)
2410  {
2411  case U'\'':
2412  change_state(after_DOCTYPE_system_identifier_state);
2413  return;
2414  case U'\x0':
2415  report_error(error_name::unexpected_null_character);
2416  current_DOCTYPE_token().m_system_identifier.push_back(U'\xFFFD');
2417  return;
2418  case U'>':
2419  report_error(error_name::abrupt_doctype_system_identifier);
2420  current_DOCTYPE_token().m_force_quirks_flag = true;
2421  change_state(data_state);
2422  emit_token(current_DOCTYPE_token());
2423  return;
2424  }
2425 
2426  current_DOCTYPE_token().m_system_identifier.push_back(cp);
2427  }
2428 
2431  {
2432  char32_t cp = consume();
2433 
2434  if (eof())
2435  {
2436  report_error(error_name::eof_in_doctype);
2437  current_DOCTYPE_token().m_force_quirks_flag = true;
2438  emit_token(current_DOCTYPE_token());
2439  emit_token(end_of_file_token());
2440  return;
2441  }
2442 
2443  switch (cp)
2444  {
2445  case U'\x9': // TAB
2446  case U'\xA': // LF
2447  case U'\xC': // FF
2448  case U'\x20': // SPACE
2449  return;
2450  case U'>':
2451  change_state(data_state);
2452  emit_token(current_DOCTYPE_token());
2453  return;
2454  }
2455 
2456  report_error(error_name::unexpected_character_after_doctype_system_identifier);
2457  reconsume(bogus_DOCTYPE_state);
2458  }
2459 
2462  {
2463  char32_t cp = consume();
2464 
2465  if (eof())
2466  {
2467  emit_token(current_DOCTYPE_token());
2468  emit_token(end_of_file_token());
2469  return;
2470  }
2471 
2472  switch (cp)
2473  {
2474  case U'>':
2475  change_state(data_state);
2476  emit_token(current_DOCTYPE_token());
2477  return;
2478  case U'\x0':
2479  report_error(error_name::unexpected_null_character);
2480  return;
2481  }
2482  }
2483 
2486  {
2487  char32_t cp = consume();
2488 
2489  if (eof())
2490  {
2491  report_error(error_name::eof_in_cdata);
2492  emit_token(end_of_file_token());
2493  return;
2494  }
2495 
2496  if (cp == U']')
2497  {
2498  change_state(CDATA_section_bracket_state);
2499  return;
2500  }
2501 
2502  emit_token(cp);
2503  }
2504 
2507  {
2508  char32_t cp = consume();
2509 
2510  if (!eof() && cp == U']')
2511  {
2512  change_state(CDATA_section_end_state);
2513  return;
2514  }
2515 
2516  emit_token(U']');
2517  reconsume(CDATA_section_state);
2518  }
2519 
2522  {
2523  char32_t cp = consume();
2524 
2525  if (!eof())
2526  {
2527  switch (cp)
2528  {
2529  case U']':
2530  emit_token(U']');
2531  return;
2532  case U'>':
2533  change_state(data_state);
2534  return;
2535  }
2536  }
2537 
2538  emit_token(U']');
2539  emit_token(U']');
2540  reconsume(CDATA_section_state);
2541  }
2542 
2545  {
2546  m_temporary_buffer.clear();
2547  m_temporary_buffer.push_back(U'&');
2548 
2549  char32_t cp = consume();
2550 
2551  if (!eof())
2552  {
2553  if (is_ascii_alphanumeric(cp))
2554  {
2555  reconsume(named_character_reference_state);
2556  return;
2557  }
2558 
2559  if (cp == U'#')
2560  {
2561  m_temporary_buffer.push_back(cp);
2562  change_state(numeric_character_reference_state);
2563  return;
2564  }
2565  }
2566 
2567  flush_code_points_consumed_as_character_reference();
2568  reconsume(return_state());
2569  }
2570 
2573  {
2574  if (!fill(named_character_reference_max_length + 1)) return;
2575 
2576  std::uint32_t len = 0;
2577  std::array<char32_t, 2> a = match_named_character_reference(len);
2578 
2579  if (len != 0) // matched
2580  {
2581  char32_t tail = *(begin() + len - 1);
2582 
2583  auto it1 = begin();
2584  auto it2 = begin() + len;
2585  while (it1 != it2) m_temporary_buffer.push_back(*it1++);
2586  consume(len);
2587 
2588  if (begin() != end())
2589  {
2590  char32_t cp = next_input_character();
2591  if (consumed_as_part_of_attribute() && tail != U';' && (cp == U'=' || is_ascii_alphanumeric(cp)))
2592  {
2593  flush_code_points_consumed_as_character_reference();
2594  change_state(return_state());
2595  flush_code_point();
2596  return;
2597  }
2598  }
2599 
2600  if (tail != U';') report_error(error_name::missing_semicolon_after_character_reference);
2601 
2602  m_temporary_buffer.clear();
2603  m_temporary_buffer.push_back(a[0]);
2604  if (a[1] != 0) m_temporary_buffer.push_back(a[1]);
2605 
2606  flush_code_points_consumed_as_character_reference();
2607  change_state(return_state());
2608  flush_code_point();
2609  return;
2610  }
2611 
2612  flush_code_points_consumed_as_character_reference();
2613  change_state(ambiguous_ampersand_state);
2614 
2615  flush_code_point();
2616  }
2617 
2620  {
2621  char32_t cp = consume();
2622 
2623  if (!eof())
2624  {
2625  if (is_ascii_alphanumeric(cp))
2626  {
2627  if (consumed_as_part_of_attribute()) current_attribute().m_value.push_back(cp);
2628  else emit_token(cp);
2629  return;
2630  }
2631 
2632  if (cp == U';')
2633  {
2634  report_error(error_name::unknown_named_character_reference);
2635  reconsume(return_state());
2636  return;
2637  }
2638  }
2639 
2640  reconsume(return_state());
2641  }
2642 
2645  {
2646  m_character_reference_code = 0;
2647 
2648  char32_t cp = consume();
2649 
2650  if (!eof())
2651  {
2652  switch (cp)
2653  {
2654  case U'x':
2655  case U'X':
2656  m_temporary_buffer.push_back(cp);
2657  change_state(hexadecimal_character_reference_start_state);
2658  return;
2659  }
2660  }
2661 
2662  reconsume(decimal_character_reference_start_state);
2663  }
2664 
2667  {
2668  char32_t cp = consume();
2669 
2670  if (!eof() && is_ascii_hex_digit(cp))
2671  {
2672  reconsume(hexadecimal_character_reference_state);
2673  return;
2674  }
2675 
2676  report_error(error_name::absence_of_digits_in_numeric_character_reference);
2677  flush_code_points_consumed_as_character_reference();
2678  reconsume(return_state());
2679  }
2680 
2683  {
2684  char32_t cp = consume();
2685 
2686  if (!eof() && is_ascii_digit(cp))
2687  {
2688  reconsume(decimal_character_reference_state);
2689  return;
2690  }
2691 
2692  report_error(error_name::absence_of_digits_in_numeric_character_reference);
2693  flush_code_points_consumed_as_character_reference();
2694  reconsume(return_state());
2695  }
2696 
2699  {
2700  char32_t cp = consume();
2701 
2702  if (!eof())
2703  {
2704  if (is_ascii_hex_digit(cp))
2705  {
2706  char32_t c = cp;
2707 
2708  if (is_ascii_digit(cp)) c -= 0x30;
2709  else if (is_ascii_upper_hex_digit(cp)) c -= 0x37;
2710  else c -= 0x57;
2711 
2712  m_character_reference_code = (m_character_reference_code * 16) + c;
2713 
2714  return;
2715  }
2716 
2717  if (cp == U';')
2718  {
2719  change_state(numeric_character_reference_end_state);
2720  return;
2721  }
2722  }
2723 
2724  report_error(error_name::missing_semicolon_after_character_reference);
2725  reconsume(numeric_character_reference_end_state);
2726  }
2727 
2730  {
2731  char32_t cp = consume();
2732 
2733  if (!eof())
2734  {
2735  if (is_ascii_digit(cp))
2736  {
2737  m_character_reference_code = (m_character_reference_code * 10) + (cp - 0x30);
2738  return;
2739  }
2740 
2741  if (cp == U';')
2742  {
2743  change_state(numeric_character_reference_end_state);
2744  return;
2745  }
2746  }
2747 
2748  report_error(error_name::missing_semicolon_after_character_reference);
2749  reconsume(numeric_character_reference_end_state);
2750  }
2751 
2754  {
2755  char32_t c = m_character_reference_code;
2756 
2757  if (c == 0x0)
2758  {
2759  report_error(error_name::null_character_reference);
2760  m_character_reference_code = U'\xFFFD';
2761  }
2762 
2763  if(0x10FFFF < c)
2764  {
2765  report_error(error_name::character_reference_outside_unicode_range);
2766  m_character_reference_code = U'\xFFFD';
2767  }
2768 
2769  if (is_surrogate(c))
2770  {
2771  report_error(error_name::surrogate_character_reference);
2772  m_character_reference_code = U'\xFFFD';
2773  }
2774 
2775  if (is_noncharacter(c))
2776  {
2777  report_error(error_name::noncharacter_character_reference);
2778  }
2779 
2780  if (c == 0xD || (is_control(c) && !is_ascii_white_space(c)))
2781  {
2782  report_error(error_name::control_character_reference);
2783  }
2784 
2785  auto it = character_reference_code_tbl.find(c);
2786  if (it != character_reference_code_tbl.end()) m_character_reference_code = it->second;
2787 
2788  m_temporary_buffer.assign(1, m_character_reference_code);
2789  flush_code_points_consumed_as_character_reference();
2790  change_state(return_state());
2791  }
2792 
2793  // 状態番号 -----------------------------------------------------------
2794 
2795  static state_type constexpr data_state = &tokenizer::on_data_state;
2796  static state_type constexpr RCDATA_state = &tokenizer::on_RCDATA_state;
2797  static state_type constexpr RAWTEXT_state = &tokenizer::on_RAWTEXT_state;
2798  static state_type constexpr script_data_state = &tokenizer::on_script_data_state;
2799  static state_type constexpr PLAINTEXT_state = &tokenizer::on_PLAINTEXT_state;
2800  static state_type constexpr tag_open_state = &tokenizer::on_tag_open_state;
2801  static state_type constexpr end_tag_open_state = &tokenizer::on_end_tag_open_state;
2802  static state_type constexpr tag_name_state = &tokenizer::on_tag_name_state;
2803  static state_type constexpr RCDATA_less_than_sign_state = &tokenizer::on_RCDATA_less_than_sign_state;
2804  static state_type constexpr RCDATA_end_tag_open_state = &tokenizer::on_RCDATA_end_tag_open_state;
2805  static state_type constexpr RCDATA_end_tag_name_state = &tokenizer::on_RCDATA_end_tag_name_state;
2806  static state_type constexpr RAWTEXT_less_than_sign_state = &tokenizer::on_RAWTEXT_less_than_sign_state;
2807  static state_type constexpr RAWTEXT_end_tag_open_state = &tokenizer::on_RAWTEXT_end_tag_open_state;
2808  static state_type constexpr RAWTEXT_end_tag_name_state = &tokenizer::on_RAWTEXT_end_tag_name_state;
2809  static state_type constexpr script_data_less_than_sign_state = &tokenizer::on_script_data_less_than_sign_state;
2810  static state_type constexpr script_data_end_tag_open_state = &tokenizer::on_script_data_end_tag_open_state;
2811  static state_type constexpr script_data_end_tag_name_state = &tokenizer::on_script_data_end_tag_name_state;
2812  static state_type constexpr script_data_escape_start_state = &tokenizer::on_script_data_escape_start_state;
2813  static state_type constexpr script_data_escape_start_dash_state = &tokenizer::on_script_data_escape_start_dash_state;
2814  static state_type constexpr script_data_escaped_state = &tokenizer::on_script_data_escaped_state;
2815  static state_type constexpr script_data_escaped_dash_state = &tokenizer::on_script_data_escaped_dash_state;
2816  static state_type constexpr script_data_escaped_dash_dash_state = &tokenizer::on_script_data_escaped_dash_dash_state;
2817  static state_type constexpr script_data_escaped_less_than_sign_state = &tokenizer::on_script_data_escaped_less_than_sign_state;
2818  static state_type constexpr script_data_escaped_end_tag_open_state = &tokenizer::on_script_data_escaped_end_tag_open_state;
2819  static state_type constexpr script_data_escaped_end_tag_name_state = &tokenizer::on_script_data_escaped_end_tag_name_state;
2820  static state_type constexpr script_data_double_escape_start_state = &tokenizer::on_script_data_double_escape_start_state;
2821  static state_type constexpr script_data_double_escaped_state = &tokenizer::on_script_data_double_escaped_state;
2822  static state_type constexpr script_data_double_escaped_dash_state = &tokenizer::on_script_data_double_escaped_dash_state;
2823  static state_type constexpr script_data_double_escaped_dash_dash_state = &tokenizer::on_script_data_double_escaped_dash_dash_state;
2824  static state_type constexpr script_data_double_escaped_less_than_sign_state = &tokenizer::on_script_data_double_escaped_less_than_sign_state;
2825  static state_type constexpr script_data_double_escape_end_state = &tokenizer::on_script_data_double_escape_end_state;
2826  static state_type constexpr before_attribute_name_state = &tokenizer::on_before_attribute_name_state;
2827  static state_type constexpr attribute_name_state = &tokenizer::on_attribute_name_state;
2828  static state_type constexpr after_attribute_name_state = &tokenizer::on_after_attribute_name_state;
2829  static state_type constexpr before_attribute_value_state = &tokenizer::on_before_attribute_value_state;
2830  static state_type constexpr attribute_value_double_quoted_state = &tokenizer::on_attribute_value_double_quoted_state;
2831  static state_type constexpr attribute_value_single_quoted_state = &tokenizer::on_attribute_value_single_quoted_state;
2832  static state_type constexpr attribute_value_unquoted_state = &tokenizer::on_attribute_value_unquoted_state;
2833  static state_type constexpr after_attribute_value_quoted_state = &tokenizer::on_after_attribute_value_quoted_state;
2834  static state_type constexpr self_closing_start_tag_state = &tokenizer::on_self_closing_start_tag_state;
2835  static state_type constexpr bogus_comment_state = &tokenizer::on_bogus_comment_state;
2836  static state_type constexpr markup_declaration_open_state = &tokenizer::on_markup_declaration_open_state;
2837  static state_type constexpr comment_start_state = &tokenizer::on_comment_start_state;
2838  static state_type constexpr comment_start_dash_state = &tokenizer::on_comment_start_dash_state;
2839  static state_type constexpr comment_state = &tokenizer::on_comment_state;
2840  static state_type constexpr comment_less_than_sign_state = &tokenizer::on_comment_less_than_sign_state;
2841  static state_type constexpr comment_less_than_sign_bang_state = &tokenizer::on_comment_less_than_sign_bang_state;
2842  static state_type constexpr comment_less_than_sign_bang_dash_state = &tokenizer::on_comment_less_than_sign_bang_dash_state;
2843  static state_type constexpr comment_less_than_sign_bang_dash_dash_state = &tokenizer::on_comment_less_than_sign_bang_dash_dash_state;
2844  static state_type constexpr comment_end_dash_state = &tokenizer::on_comment_end_dash_state;
2845  static state_type constexpr comment_end_state = &tokenizer::on_comment_end_state;
2846  static state_type constexpr comment_end_bang_state = &tokenizer::on_comment_end_bang_state;
2847  static state_type constexpr DOCTYPE_state = &tokenizer::on_DOCTYPE_state;
2848  static state_type constexpr before_DOCTYPE_name_state = &tokenizer::on_before_DOCTYPE_name_state;
2849  static state_type constexpr DOCTYPE_name_state = &tokenizer::on_DOCTYPE_name_state;
2850  static state_type constexpr after_DOCTYPE_name_state = &tokenizer::on_after_DOCTYPE_name_state;
2851  static state_type constexpr after_DOCTYPE_public_keyword_state = &tokenizer::on_after_DOCTYPE_public_keyword_state;
2852  static state_type constexpr before_DOCTYPE_public_identifier_state = &tokenizer::on_before_DOCTYPE_public_identifier_state;
2853  static state_type constexpr DOCTYPE_public_identifier_double_quoted_state = &tokenizer::on_DOCTYPE_public_identifier_double_quoted_state;
2854  static state_type constexpr DOCTYPE_public_identifier_single_quoted_state = &tokenizer::on_DOCTYPE_public_identifier_single_quoted_state;
2855  static state_type constexpr after_DOCTYPE_public_identifier_state = &tokenizer::on_after_DOCTYPE_public_identifier_state;
2856  static state_type constexpr between_DOCTYPE_public_and_system_identifiers_state = &tokenizer::on_between_DOCTYPE_public_and_system_identifiers_state;
2857  static state_type constexpr after_DOCTYPE_system_keyword_state = &tokenizer::on_after_DOCTYPE_system_keyword_state;
2858  static state_type constexpr before_DOCTYPE_system_identifier_state = &tokenizer::on_before_DOCTYPE_system_identifier_state;
2859  static state_type constexpr DOCTYPE_system_identifier_double_quoted_state = &tokenizer::on_DOCTYPE_system_identifier_double_quoted_state;
2860  static state_type constexpr DOCTYPE_system_identifier_single_quoted_state = &tokenizer::on_DOCTYPE_system_identifier_single_quoted_state;
2861  static state_type constexpr after_DOCTYPE_system_identifier_state = &tokenizer::on_after_DOCTYPE_system_identifier_state;
2862  static state_type constexpr bogus_DOCTYPE_state = &tokenizer::on_bogus_DOCTYPE_state;
2863  static state_type constexpr CDATA_section_state = &tokenizer::on_CDATA_section_state;
2864  static state_type constexpr CDATA_section_bracket_state = &tokenizer::on_CDATA_section_bracket_state;
2865  static state_type constexpr CDATA_section_end_state = &tokenizer::on_CDATA_section_end_state;
2866  static state_type constexpr character_reference_state = &tokenizer::on_character_reference_state;
2867  static state_type constexpr named_character_reference_state = &tokenizer::on_named_character_reference_state;
2868  static state_type constexpr ambiguous_ampersand_state = &tokenizer::on_ambiguous_ampersand_state;
2869  static state_type constexpr numeric_character_reference_state = &tokenizer::on_numeric_character_reference_state;
2870  static state_type constexpr hexadecimal_character_reference_start_state = &tokenizer::on_hexadecimal_character_reference_start_state;
2871  static state_type constexpr decimal_character_reference_start_state = &tokenizer::on_decimal_character_reference_start_state;
2872  static state_type constexpr hexadecimal_character_reference_state = &tokenizer::on_hexadecimal_character_reference_state;
2873  static state_type constexpr decimal_character_reference_state = &tokenizer::on_decimal_character_reference_state;
2874  static state_type constexpr numeric_character_reference_end_state = &tokenizer::on_numeric_character_reference_end_state;
2875  };
2876 }
wordring::whatwg::html::parsing::end_tag_token
Definition: whatwg/html/parsing/token.hpp:185
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_system_identifier_double_quoted_state
void on_DOCTYPE_system_identifier_double_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:2362
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_public_identifier_single_quoted_state
void on_DOCTYPE_public_identifier_single_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:2158
wordring::whatwg::html::parsing::tokenizer::unify_attribute
void unify_attribute()
属性の重複を削る
Definition: whatwg/html/parsing/tokenization.hpp:204
wordring::whatwg::html::parsing::tokenizer::create_attribute
token_attribute & create_attribute()
現在のタグ・トークン上で新しい属性を開始する
Definition: whatwg/html/parsing/tokenization.hpp:162
wordring::whatwg::html::parsing::input_stream::match_named_character_reference
std::array< char32_t, 2 > match_named_character_reference(std::uint32_t &len)
名前付き文字参照とストリーム・バッファ内の文字列を比較する
Definition: whatwg/html/parsing/input_stream.hpp:332
wordring::whatwg::html::parsing::token_attribute
Definition: whatwg/html/parsing/token.hpp:52
wordring::whatwg::html::parsing::tokenizer::on_RAWTEXT_end_tag_name_state
void on_RAWTEXT_end_tag_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:666
wordring::whatwg::html::parsing::tokenizer::on_RAWTEXT_state
void on_RAWTEXT_state()
Definition: whatwg/html/parsing/tokenization.hpp:363
wordring::whatwg::html::parsing::tokenizer::on_comment_end_state
void on_comment_end_state()
Definition: whatwg/html/parsing/tokenization.hpp:1794
wordring::whatwg::html::parsing::input_stream::eof
bool eof() const
ストリーム終端に達しているか調べる
Definition: whatwg/html/parsing/input_stream.hpp:221
wordring::whatwg::html::parsing::tokenizer::on_after_DOCTYPE_system_keyword_state
void on_after_DOCTYPE_system_keyword_state()
Definition: whatwg/html/parsing/tokenization.hpp:2275
wordring::whatwg::html::parsing::tokenizer::in_html_namespace
bool in_html_namespace() const
カレント・ノードがHTML名前空間に属するか調べる
Definition: whatwg/html/parsing/tokenization.hpp:194
wordring::whatwg::html::parsing::tokenizer::on_before_DOCTYPE_system_identifier_state
void on_before_DOCTYPE_system_identifier_state()
Definition: whatwg/html/parsing/tokenization.hpp:2320
wordring::whatwg::html::parsing::tokenizer::on_RCDATA_state
void on_RCDATA_state()
Definition: whatwg/html/parsing/tokenization.hpp:333
wordring::whatwg::html::parsing::tokenizer::on_before_DOCTYPE_public_identifier_state
void on_before_DOCTYPE_public_identifier_state()
Definition: whatwg/html/parsing/tokenization.hpp:2082
wordring::whatwg::html::parsing::tokenizer::on_attribute_value_single_quoted_state
void on_attribute_value_single_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:1406
wordring::whatwg::html::parsing::tokenizer::on_attribute_value_unquoted_state
void on_attribute_value_unquoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:1436
wordring::whatwg::html::parsing::tokenizer::on_comment_less_than_sign_bang_state
void on_comment_less_than_sign_bang_state()
Definition: whatwg/html/parsing/tokenization.hpp:1722
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_dash_dash_state
void on_script_data_escaped_dash_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:903
wordring::whatwg::html::parsing::input_stream::consume
value_type consume()
次の入力文字を消費する
Definition: whatwg/html/parsing/input_stream.hpp:241
wordring::whatwg::html::parsing::tokenizer::on_tag_open_state
void on_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:436
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escape_end_state
void on_script_data_double_escape_end_state()
Definition: whatwg/html/parsing/tokenization.hpp:1190
wordring::whatwg::html::parsing::input_stream::flush_code_point
void flush_code_point()
ストリーム・バッファ内のコード・ポイントをすべて発送する
Definition: whatwg/html/parsing/input_stream.hpp:177
wordring::whatwg::html::parsing::tokenizer::on_comment_less_than_sign_bang_dash_dash_state
void on_comment_less_than_sign_bang_dash_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1750
wordring::whatwg::html::parsing::token_attribute_list
Definition: whatwg/html/parsing/token.hpp:77
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_state
void on_script_data_escaped_state()
Definition: whatwg/html/parsing/tokenization.hpp:839
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_state
void on_DOCTYPE_state()
Definition: whatwg/html/parsing/tokenization.hpp:1860
wordring::whatwg::html::parsing::tokenizer::on_RAWTEXT_end_tag_open_state
void on_RAWTEXT_end_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:649
wordring::whatwg::html::parsing::tokenizer::on_attribute_name_state
void on_attribute_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1263
wordring::whatwg::html::parsing::tokenizer::on_PLAINTEXT_state
void on_PLAINTEXT_state()
Definition: whatwg/html/parsing/tokenization.hpp:415
wordring::whatwg::html::parsing::start_tag_token
Definition: whatwg/html/parsing/token.hpp:172
wordring::whatwg::html::parsing::tokenizer::on_after_DOCTYPE_public_identifier_state
void on_after_DOCTYPE_public_identifier_state()
Definition: whatwg/html/parsing/tokenization.hpp:2192
wordring::whatwg::html::parsing::tokenizer::on_markup_declaration_open_state
void on_markup_declaration_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:1565
wordring::whatwg::html::parsing::tokenizer::on_named_character_reference_state
void on_named_character_reference_state()
Definition: whatwg/html/parsing/tokenization.hpp:2572
wordring::whatwg::html::parsing::tokenizer::on_after_attribute_name_state
void on_after_attribute_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1310
wordring::whatwg::html::parsing::tokenizer::on_RAWTEXT_less_than_sign_state
void on_RAWTEXT_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:633
wordring::whatwg::html::parsing::tokenizer::on_comment_end_bang_state
void on_comment_end_bang_state()
Definition: whatwg/html/parsing/tokenization.hpp:1826
wordring::whatwg::html::parsing::input_stream::reconsume
void reconsume()
現在の入力文字を再消費する
Definition: whatwg/html/parsing/input_stream.hpp:270
wordring::whatwg::html::parsing::end_of_file_token
Definition: whatwg/html/parsing/token.hpp:229
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_public_identifier_double_quoted_state
void on_DOCTYPE_public_identifier_double_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:2124
wordring::whatwg::html::parsing::tokenizer::on_bogus_comment_state
void on_bogus_comment_state()
Definition: whatwg/html/parsing/tokenization.hpp:1538
wordring::whatwg::html::parsing::tokenizer::on_numeric_character_reference_end_state
void on_numeric_character_reference_end_state()
Definition: whatwg/html/parsing/tokenization.hpp:2753
wordring::whatwg::html::parsing::tokenizer::clear
void clear()
初期状態に戻し、再利用可能とする
Definition: whatwg/html/parsing/tokenization.hpp:92
wordring::whatwg::html::parsing::tokenizer::on_comment_less_than_sign_state
void on_comment_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:1700
wordring::whatwg::html::parsing::tokenizer::on_comment_less_than_sign_bang_dash_state
void on_comment_less_than_sign_bang_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1736
wordring::whatwg::html::parsing::tokenizer::on_script_data_state
void on_script_data_state()
Definition: whatwg/html/parsing/tokenization.hpp:389
wordring::whatwg::html::parsing::tokenizer::on_script_data_less_than_sign_state
void on_script_data_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:715
wordring::whatwg::html::parsing::tokenizer::on_RCDATA_end_tag_name_state
void on_RCDATA_end_tag_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:584
wordring::whatwg::html::parsing::tokenizer::m_current_tag_token_id
std::uint32_t m_current_tag_token_id
現在のタグ・トークンを識別する
Definition: whatwg/html/parsing/tokenization.hpp:73
wordring::whatwg::html::parsing::tokenizer::on_before_attribute_name_state
void on_before_attribute_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1229
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escaped_state
void on_script_data_double_escaped_state()
Definition: whatwg/html/parsing/tokenization.hpp:1074
wordring::whatwg::html::parsing::tokenizer::on_script_data_end_tag_open_state
void on_script_data_end_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:740
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_end_tag_open_state
void on_script_data_escaped_end_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:966
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_less_than_sign_state
void on_script_data_escaped_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:939
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_system_identifier_single_quoted_state
void on_DOCTYPE_system_identifier_single_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:2396
wordring::whatwg::html::parsing::tokenizer::on_character_reference_state
void on_character_reference_state()
Definition: whatwg/html/parsing/tokenization.hpp:2544
wordring::whatwg::html::parsing::tokenizer::on_comment_state
void on_comment_state()
Definition: whatwg/html/parsing/tokenization.hpp:1669
wordring::whatwg::html::parsing::tokenizer::on_numeric_character_reference_state
void on_numeric_character_reference_state()
Definition: whatwg/html/parsing/tokenization.hpp:2644
wordring::whatwg::html::parsing::tokenizer::on_before_DOCTYPE_name_state
void on_before_DOCTYPE_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1892
wordring::whatwg::html::parsing::tokenizer::on_hexadecimal_character_reference_state
void on_hexadecimal_character_reference_state()
Definition: whatwg/html/parsing/tokenization.hpp:2698
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_end_tag_name_state
void on_script_data_escaped_end_tag_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:986
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escaped_dash_dash_state
void on_script_data_double_escaped_dash_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1138
wordring::whatwg::html::parsing::tokenizer::on_script_data_end_tag_name_state
void on_script_data_end_tag_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:760
wordring::whatwg::html::parsing::input_stream::match
bool match(std::u32string_view label, bool with_current, bool case_insensitive)
与えられた文字列とストリーム・バッファ内の文字列を比較する
Definition: whatwg/html/parsing/input_stream.hpp:293
wordring::whatwg::html::parsing::tokenizer::on_tag_name_state
void on_tag_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:509
wordring::whatwg::html::parsing::tokenizer::on_end_tag_open_state
void on_end_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:476
wordring::whatwg::html::parsing::input_stream::clear
void clear()
初期状態に戻し、再利用可能とする
Definition: whatwg/html/parsing/input_stream.hpp:78
wordring::whatwg::html::parsing::tokenizer::on_bogus_DOCTYPE_state
void on_bogus_DOCTYPE_state()
Definition: whatwg/html/parsing/tokenization.hpp:2461
wordring::whatwg::html::parsing::input_stream::next_input_character
value_type next_input_character() const
次の入力文字を返す
Definition: whatwg/html/parsing/input_stream.hpp:211
wordring::whatwg::html::parsing::tokenizer::on_script_data_escaped_dash_state
void on_script_data_escaped_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:870
wordring::whatwg::html::parsing::tokenizer::on_after_DOCTYPE_system_identifier_state
void on_after_DOCTYPE_system_identifier_state()
Definition: whatwg/html/parsing/tokenization.hpp:2430
wordring::whatwg::html::parsing::input_stream::named_character_reference
std::array< char32_t, 2 > named_character_reference(std::uint32_t idx)
名前付き文字参照のコード・ポイントを取得する
Definition: whatwg/html/parsing/input_stream.hpp:383
wordring::whatwg::html::parsing::DOCTYPE_token
Definition: whatwg/html/parsing/token.hpp:22
wordring::whatwg::html::parsing::tokenizer::on_decimal_character_reference_state
void on_decimal_character_reference_state()
Definition: whatwg/html/parsing/tokenization.hpp:2729
wordring::whatwg::html::parsing::tokenizer::on_decimal_character_reference_start_state
void on_decimal_character_reference_start_state()
Definition: whatwg/html/parsing/tokenization.hpp:2682
wordring::whatwg::html::parsing::tokenizer::on_after_attribute_value_quoted_state
void on_after_attribute_value_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:1481
wordring::whatwg::html::parsing::tokenizer::on_CDATA_section_state
void on_CDATA_section_state()
Definition: whatwg/html/parsing/tokenization.hpp:2485
wordring::whatwg::html::parsing::input_stream
HTML5 パーサー用のユニコード・コード・ポイント入力ストリーム
Definition: whatwg/html/parsing/input_stream.hpp:32
wordring::whatwg::html::parsing::tokenizer::on_DOCTYPE_name_state
void on_DOCTYPE_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1942
wordring::whatwg::html::parsing::tokenizer::on_data_state
void on_data_state()
Definition: whatwg/html/parsing/tokenization.hpp:303
wordring::whatwg::html::parsing::input_stream::fill
bool fill(std::uint32_t n)
バッファに指定文字数貯まっているか調べる
Definition: whatwg/html/parsing/input_stream.hpp:229
wordring::whatwg::html::parsing::character_token
Definition: whatwg/html/parsing/token.hpp:217
wordring::whatwg::html::parsing::tokenizer::on_script_data_escape_start_dash_state
void on_script_data_escape_start_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:824
wordring::whatwg::html::parsing::tokenizer::on_after_DOCTYPE_name_state
void on_after_DOCTYPE_name_state()
Definition: whatwg/html/parsing/tokenization.hpp:1983
wordring::whatwg::html::parsing::token_attribute_list::create
token_attribute & create()
新しい属性を開始する
Definition: whatwg/html/parsing/token.hpp:92
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escaped_dash_state
void on_script_data_double_escaped_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1105
wordring::whatwg::html::parsing::input_stream::report_error
void report_error(error_name ec=static_cast< error_name >(0))
エラー報告する
Definition: whatwg/html/parsing/input_stream.hpp:97
wordring::whatwg::html::parsing::tokenizer::on_hexadecimal_character_reference_start_state
void on_hexadecimal_character_reference_start_state()
Definition: whatwg/html/parsing/tokenization.hpp:2666
wordring::whatwg::html::parsing::tokenizer::on_ambiguous_ampersand_state
void on_ambiguous_ampersand_state()
Definition: whatwg/html/parsing/tokenization.hpp:2619
wordring::whatwg::html::parsing::tokenizer::on_comment_start_state
void on_comment_start_state()
Definition: whatwg/html/parsing/tokenization.hpp:1617
wordring::whatwg::html::parsing
wordring::whatwg::html::parsing::tokenizer::on_CDATA_section_end_state
void on_CDATA_section_end_state()
Definition: whatwg/html/parsing/tokenization.hpp:2521
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escaped_less_than_sign_state
void on_script_data_double_escaped_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:1174
wordring::whatwg::html::parsing::tokenizer::on_script_data_escape_start_state
void on_script_data_escape_start_state()
Definition: whatwg/html/parsing/tokenization.hpp:809
wordring::whatwg::html::parsing::tokenizer::on_attribute_value_double_quoted_state
void on_attribute_value_double_quoted_state()
Definition: whatwg/html/parsing/tokenization.hpp:1376
wordring::whatwg::html::parsing::tokenizer::on_before_attribute_value_state
void on_before_attribute_value_state()
Definition: whatwg/html/parsing/tokenization.hpp:1345
wordring::whatwg::html::parsing::tag_token
Definition: whatwg/html/parsing/token.hpp:121
wordring::whatwg::html::parsing::tokenizer::on_RCDATA_less_than_sign_state
void on_RCDATA_less_than_sign_state()
Definition: whatwg/html/parsing/tokenization.hpp:551
wordring::whatwg::html::parsing::tokenizer::on_script_data_double_escape_start_state
void on_script_data_double_escape_start_state()
Definition: whatwg/html/parsing/tokenization.hpp:1035
wordring::whatwg::html::parsing::tokenizer::current_tag_token
tag_token & current_tag_token()
現在のタグ・トークンを返す
Definition: whatwg/html/parsing/tokenization.hpp:143
wordring::whatwg::html::parsing::comment_token
Definition: whatwg/html/parsing/token.hpp:205
wordring::whatwg::html::parsing::tokenizer::current_attribute
token_attribute & current_attribute()
現在の属性を返す
Definition: whatwg/html/parsing/tokenization.hpp:169
wordring::whatwg::html::parsing::tokenizer::on_self_closing_start_tag_state
void on_self_closing_start_tag_state()
Definition: whatwg/html/parsing/tokenization.hpp:1514
wordring::whatwg::html::parsing::tokenizer::on_comment_start_dash_state
void on_comment_start_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1640
wordring::whatwg::html::parsing::tokenizer::on_RCDATA_end_tag_open_state
void on_RCDATA_end_tag_open_state()
Definition: whatwg/html/parsing/tokenization.hpp:567
wordring::whatwg::html::parsing::tokenizer::on_between_DOCTYPE_public_and_system_identifiers_state
void on_between_DOCTYPE_public_and_system_identifiers_state()
Definition: whatwg/html/parsing/tokenization.hpp:2235
wordring::whatwg::html::parsing::tokenizer
Definition: whatwg/html/parsing/tokenization.hpp:29
wordring::whatwg::html::parsing::tokenizer::on_comment_end_dash_state
void on_comment_end_dash_state()
Definition: whatwg/html/parsing/tokenization.hpp:1771
wordring::whatwg::html::parsing::tokenizer::on_CDATA_section_bracket_state
void on_CDATA_section_bracket_state()
Definition: whatwg/html/parsing/tokenization.hpp:2506
wordring::whatwg::html::parsing::input_stream::current_input_character
value_type current_input_character() const
現在の入力文字を返す
Definition: whatwg/html/parsing/input_stream.hpp:200
wordring::whatwg::html::parsing::tokenizer::on_after_DOCTYPE_public_keyword_state
void on_after_DOCTYPE_public_keyword_state()
Definition: whatwg/html/parsing/tokenization.hpp:2037