libwordring
wwwc/css_syntax/token.hpp
1 #pragma once
2 
3 #include <cassert>
4 #include <memory>
5 #include <string>
6 #include <variant>
7 #include <vector>
8 
9 namespace wordring::wwwc::css
10 {
11  class syntax_primitive;
12 
13  // --------------------------------------------------------------------------------------------
14  // 4. Tokenization
15  //
16  // https://drafts.csswg.org/css-syntax-3/#tokenization
17  // https://triple-underscore.github.io/css-syntax-ja.html#tokenization
18  // --------------------------------------------------------------------------------------------
19 
20  enum class syntax_primitive_name : std::size_t
21  {
22  Token = 1,
23 
24  IdentToken,
27  HashToken,
28  StringToken,
30  UrlToken,
31  BadUrlToken,
32  DelimToken,
33  NumberToken,
37  CdoToken,
38  CdcToken,
39  ColonToken,
41  CommaToken,
48  EofToken,
49 
50  SyntaxItem,
51 
52  AtRule,
54  Declaration,
57  Function,
58  SimpleBlock,
59  };
60 
66  enum class number_type_flag_name { integer = 1, number };
67 
69  struct ident_token { std::u32string m_value; };
70 
72  struct function_token { std::u32string m_value; };
73 
75  struct at_keyword_token { std::u32string m_value; };
76 
78  struct hash_token
79  {
80  enum class type_flag_name
81  {
82  id = 1,
84  };
85 
86  std::u32string m_value;
88  };
89 
91  struct string_token { std::u32string m_value; };
92 
94  struct bad_string_token {};
95 
97  struct url_token { std::u32string m_value; };
98 
100  struct bad_url_token {};
101 
103  struct delim_token { char32_t m_value = 0; };
104 
107  {
108  double m_value = 0;
109  number_type_flag_name m_type_flag = number_type_flag_name::integer;
110  };
111 
113  struct percentage_token { double m_value; };
114 
116  {
117  double m_value = 0;
118  std::u32string m_unit;
119  number_type_flag_name m_type_flag = number_type_flag_name::integer;
120  };
121 
123  struct whitespace_token {};
124 
126  struct CDO_token {};
127 
129  struct CDC_token {};
130 
132  struct colon_token {};
133 
135  struct semicolon_token {};
136 
138  struct comma_token {};
139 
141  struct open_square_token {};
142 
145 
147  struct open_paren_token {};
148 
150  struct close_paren_token {};
151 
153  struct open_curly_token {};
154 
156  struct close_curly_token {};
157 
159  struct eof_token {};
160 
161  // --------------------------------------------------------------------------------------------
162  // 5. Parsing
163  //
164  // https://drafts.csswg.org/css-syntax-3/#parsing
165  // https://triple-underscore.github.io/css-syntax-ja.html#parsing
166  // --------------------------------------------------------------------------------------------
167 
168  class component_value;
169 
172  struct function
173  {
174  std::u32string m_name;
175  std::vector<component_value> m_value;
176  };
177 
181  {
182  char32_t m_associated_token = 0;
183  std::vector<component_value> m_value;
184  };
185 
187  {
188  using value_type = std::variant<
189  ident_token,
191  hash_token,
192  string_token,
194  url_token,
196  delim_token,
197  number_token,
201  CDO_token,
202  CDC_token,
203  colon_token,
205  comma_token,
209  eof_token>;
210  };
211 
219  {
220  public:
222  {
223  container_fn() = default;
224  container_fn(container_fn const& rhs);
225  container_fn(container_fn&& rhs) = default;
226  container_fn(function const& fn);
227  ~container_fn();
228 
229  container_fn& operator=(container_fn const& rhs);
230  container_fn& operator=(container_fn&& rhs) = default;
231 
232  std::unique_ptr<function> m_value;
233  };
234 
236  {
237  container_sb() = default;
238  container_sb(container_sb const& rhs);
239  container_sb(container_sb&& rhs) = default;
240  container_sb(simple_block const& fn);
241  ~container_sb();
242 
243  container_sb& operator=(container_sb const& rhs);
244  container_sb& operator=(container_sb&& rhs) = default;
245 
246  std::unique_ptr<simple_block> m_value;
247  };
248 
249  public:
250  using value_type = std::variant<
251  std::monostate,
252  ident_token,
254  hash_token,
255  string_token,
257  url_token,
259  delim_token,
260  number_token,
264  CDO_token,
265  CDC_token,
266  colon_token,
268  comma_token,
272  eof_token,
273 
274  container_fn,
275  container_sb>;
276 
277  public:
278  component_value() = default;
279  component_value(component_value const&) = default;
280  component_value(component_value&&) = default;
281 
282  explicit component_value(function const& value)
283  : m_value(container_fn(value))
284  {
285  }
286 
287  explicit component_value(simple_block const& value)
288  : m_value(container_sb(value))
289  {
290  }
291 
292  explicit component_value(syntax_primitive const& value);
293 
294  template <typename T>
295  explicit component_value(T const& value)
296  : m_value(value)
297  {
298  }
299 
300  ~component_value();
301 
302  component_value& operator=(component_value const&) = default;
303 
304  syntax_primitive_name type() const
305  {
306  switch (m_value.index())
307  {
308  case 1: return syntax_primitive_name::IdentToken;
310  case 3: return syntax_primitive_name::HashToken;
311  case 4: return syntax_primitive_name::StringToken;
313  case 6: return syntax_primitive_name::UrlToken;
314  case 7: return syntax_primitive_name::BadUrlToken;
315  case 8: return syntax_primitive_name::DelimToken;
316  case 9: return syntax_primitive_name::NumberToken;
320  case 13: return syntax_primitive_name::CdoToken;
321  case 14: return syntax_primitive_name::CdcToken;
322  case 15: return syntax_primitive_name::ColonToken;
324  case 17: return syntax_primitive_name::CommaToken;
328  case 21: return syntax_primitive_name::EofToken;
329 
330  case 22: return syntax_primitive_name::Function;
331  case 23: return syntax_primitive_name::SimpleBlock;
332 
333  default:
334  break;
335  }
336  return static_cast<syntax_primitive_name>(0);
337  }
338 
339  bool has_value() const { return m_value.index() != 0; }
340 
341  template <typename T>
342  T& get()
343  {
344  if constexpr (std::is_same_v<T, function>)
345  {
346  assert(std::holds_alternative<container_fn>(m_value));
347  return *std::get_if<container_fn>(&m_value)->m_value;
348  }
349  else if constexpr (std::is_same_v<T, simple_block>)
350  {
351  assert(std::holds_alternative<container_sb>(m_value));
352  return *std::get_if<container_sb>(&m_value)->m_value;
353  }
354  else
355  {
356  assert(std::holds_alternative<T>(m_value));
357  return *std::get_if<T>(&m_value);
358  }
359  }
360 
361  template <typename T>
362  T const& get() const
363  {
364  if constexpr (std::is_same_v<T, function>)
365  {
366  assert(std::holds_alternative<container_fn>(m_value));
367  return *std::get_if<container_fn>(&m_value)->m_value;
368  }
369  else if constexpr (std::is_same_v<T, simple_block>)
370  {
371  assert(std::holds_alternative<container_sb>(m_value));
372  return *std::get_if<container_sb>(&m_value)->m_value;
373  }
374  else
375  {
376  assert(std::holds_alternative<T>(m_value));
377  return *std::get_if<T>(&m_value);
378  }
379  }
380 
381  private:
382  value_type m_value;
383  };
384 
409  struct at_rule
410  {
411  at_rule()
412  : m_block()
413  {
414  }
415 
416  at_rule(at_rule const& rhs)
417  : m_name(rhs.m_name)
418  , m_prelude(rhs.m_prelude)
419  , m_block()
420  {
421  if (rhs.m_block) m_block = std::make_unique<simple_block>(*rhs.m_block.get());
422  }
423 
424  at_rule(at_rule&& rhs) = default;
425 
426  at_rule& operator=(at_rule const& rhs)
427  {
428  m_name = rhs.m_name;
429  m_prelude = rhs.m_prelude;
430  m_block = std::make_unique<simple_block>(*rhs.m_block);
431 
432  return *this;
433  }
434 
435  at_rule& operator=(at_rule&& rhs) = default;
436 
437  ~at_rule();
438 
439  std::u32string m_name;
440  std::vector<component_value> m_prelude;
441  std::unique_ptr<simple_block> m_block;
442  };
443 
459  {
460  std::vector<component_value> m_prelude;
461  simple_block m_block;
462  };
463 
473  struct declaration
474  {
475  std::u32string m_name;
476  std::vector<component_value> m_value;
477  bool m_important_flag = false;
478  };
479 
483  {
484  using value_type = std::variant<
485  std::monostate,
486  ident_token,
489  hash_token,
490  string_token,
492  url_token,
494  delim_token,
495  number_token,
499  CDO_token,
500  CDC_token,
501  colon_token,
503  comma_token,
510  eof_token,
511 
513  function,
514  simple_block,
516  at_rule,
518  declaration>;
519 
520  public:
521  syntax_primitive() = default;
522  syntax_primitive(syntax_primitive const& rhs) = default;
523  syntax_primitive(syntax_primitive&& rhs) = default;
524 
525  template <typename T>
526  syntax_primitive(T const& token)
527  : m_value(token)
528  {
529  }
530 
531  syntax_primitive(component_value const& token);
532 
533  void operator=(syntax_primitive const& rhs) { m_value = rhs.m_value; }
534 
535  syntax_primitive_name type() const
536  {
537  switch (m_value.index())
538  {
539  case 1: return syntax_primitive_name::IdentToken;
542  case 4: return syntax_primitive_name::HashToken;
543  case 5: return syntax_primitive_name::StringToken;
545  case 7: return syntax_primitive_name::UrlToken;
546  case 8: return syntax_primitive_name::BadUrlToken;
547  case 9: return syntax_primitive_name::DelimToken;
548  case 10: return syntax_primitive_name::NumberToken;
552  case 14: return syntax_primitive_name::CdoToken;
553  case 15: return syntax_primitive_name::CdcToken;
554  case 16: return syntax_primitive_name::ColonToken;
556  case 18: return syntax_primitive_name::CommaToken;
563  case 25: return syntax_primitive_name::EofToken;
564 
566  case 27: return syntax_primitive_name::Function;
567  case 28: return syntax_primitive_name::SimpleBlock;
569  case 30: return syntax_primitive_name::AtRule;
570  case 31: return syntax_primitive_name::QualifiedRule;
571  case 32: return syntax_primitive_name::Declaration;
572 
573  default:
574  break;
575  }
576  return static_cast<syntax_primitive_name>(0);
577  }
578 
579  bool has_value() const { return m_value.index() != 0; }
580 
581  template <typename T>
582  T& get()
583  {
584  assert(std::holds_alternative<T>(m_value));
585  return *std::get_if<T>(&m_value);
586  }
587 
588  template <typename T>
589  T const& get() const
590  {
591  assert(std::holds_alternative<T>(m_value));
592  return *std::get_if<T>(&m_value);
593  }
594 
595  private:
596  value_type m_value;
597  };
598 
602  {
603  public:
604  using container = std::vector<syntax_primitive>;
605  //using iterator = typename container::iterator;
606  using const_iterator = typename container::const_iterator;
608 
609  public:
610  syntax_primitive_stream() = default;
613 
614  syntax_primitive_stream(container& c);
615 
616  syntax_primitive_stream(const_iterator first, const_iterator last);
617 
618  operator bool() const;
619 
620  bool operator !() const;
621 
622  std::size_t size() const;
623 
624  const_iterator begin() const;
625 
626  const_iterator end() const;
627 
628  syntax_primitive const& current() const;
629 
634  syntax_primitive const& consume();
635 
636  void reconsume();
637 
638  void advance(const_iterator it);
639 
640  bool skip_whitespace();
641 
642  private:
643  const_iterator m_first;
644  const_iterator m_last;
645 
646  syntax_primitive m_eof;
647  };
648 }
wordring::wwwc::css::syntax_primitive_name::PercentageToken
@ PercentageToken
数字%
wordring::wwwc::css::syntax_primitive_name::BadStringToken
@ BadStringToken
悪い文字列
wordring::wwwc::css::component_value::container_sb
Definition: wwwc/css_syntax/token.hpp:235
wordring::wwwc::css::string_token
文字列トークン
Definition: wwwc/css_syntax/token.hpp:91
wordring::wwwc::css::number_token
数値トークン
Definition: wwwc/css_syntax/token.hpp:106
wordring::wwwc::css::function_token
関数トークン
Definition: wwwc/css_syntax/token.hpp:72
wordring::wwwc::css::close_curly_token
'}' トークン
Definition: wwwc/css_syntax/token.hpp:156
wordring::wwwc::css::syntax_primitive_name::AtRule
@ AtRule
at-rule https://triple-underscore.github.io/css-syntax-ja.html#at-rule
wordring::wwwc::css::syntax_primitive_name::HashToken
@ HashToken
#xxx
wordring::wwwc::css::component_value::container_fn
Definition: wwwc/css_syntax/token.hpp:221
wordring::wwwc::css::string_token::m_value
std::u32string m_value
Definition: wwwc/css_syntax/token.hpp:91
wordring::wwwc::css::colon_token
':' トークン
Definition: wwwc/css_syntax/token.hpp:132
wordring::wwwc::css::declaration
宣言を表現する AST ノード
Definition: wwwc/css_syntax/token.hpp:473
wordring::wwwc::css::syntax_primitive_name::OpenCurlyToken
@ OpenCurlyToken
'{'
wordring::wwwc::css::syntax_primitive
トークンや構文アイテムを表現するクラス
Definition: wwwc/css_syntax/token.hpp:482
wordring::wwwc::css::number_type_flag_name
number_type_flag_name
整数と小数を識別するためのフラグ値
Definition: wwwc/css_syntax/token.hpp:66
wordring::wwwc::css::percentage_token::m_value
double m_value
Definition: wwwc/css_syntax/token.hpp:113
wordring::wwwc::css::syntax_primitive_name::CloseSquareToken
@ CloseSquareToken
']'
wordring::wwwc::css::syntax_primitive_name::WhitespaceToken
@ WhitespaceToken
空白文字
wordring::wwwc::css::url_token
URL トークン
Definition: wwwc/css_syntax/token.hpp:97
wordring::wwwc::css::open_curly_token
'{' トークン
Definition: wwwc/css_syntax/token.hpp:153
wordring::wwwc::css::syntax_primitive_name::FunctionToken
@ FunctionToken
関数
wordring::wwwc::css::comma_token
',' トークン
Definition: wwwc/css_syntax/token.hpp:138
wordring::wwwc::css::syntax_primitive_name::SimpleBlock
@ SimpleBlock
simple block
wordring::wwwc::css::open_paren_token
'(' トークン
Definition: wwwc/css_syntax/token.hpp:147
wordring::wwwc::css::CDC_token
"-->" トークン
Definition: wwwc/css_syntax/token.hpp:129
wordring::wwwc::css::open_square_token
'[' トークン
Definition: wwwc/css_syntax/token.hpp:141
wordring::wwwc::css::syntax_primitive_name::Declaration
@ Declaration
declaration
wordring::wwwc::css::number_token::m_type_flag
number_type_flag_name m_type_flag
Definition: wwwc/css_syntax/token.hpp:109
wordring::wwwc::css::preserved_tokens
Definition: wwwc/css_syntax/token.hpp:186
wordring::wwwc::css::syntax_primitive_name::CloseParenToken
@ CloseParenToken
')'
wordring::wwwc::css::url_token::m_value
std::u32string m_value
Definition: wwwc/css_syntax/token.hpp:97
wordring::wwwc::css
wordring::wwwc::css::syntax_primitive_name::AtKeywordToken
@ AtKeywordToken
@xxx
wordring::wwwc::css::syntax_primitive_name::OpenSquareToken
@ OpenSquareToken
'['
wordring::wwwc::css::at_keyword_token
@xxxトークン
Definition: wwwc/css_syntax/token.hpp:75
wordring::wwwc::css::hash_token
#xxxトークン
Definition: wwwc/css_syntax/token.hpp:78
wordring::wwwc::css::syntax_primitive_name::EofToken
@ EofToken
EOF
wordring::wwwc::css::CDO_token
"<!--" トークン
Definition: wwwc/css_syntax/token.hpp:126
wordring::wwwc::css::syntax_primitive_name::StringToken
@ StringToken
文字列
wordring::wwwc::css::syntax_primitive_name::SemicolonToken
@ SemicolonToken
';'
wordring::wwwc::css::syntax_primitive_name::QualifiedRule
@ QualifiedRule
qualified rule https://triple-underscore.github.io/css-syntax-ja.html#qualified-rule
wordring::wwwc::css::function
関数を表現する構文アイテム
Definition: wwwc/css_syntax/token.hpp:172
wordring::wwwc::css::close_paren_token
')' トークン
Definition: wwwc/css_syntax/token.hpp:150
wordring::wwwc::css::whitespace_token
空白文字トークン
Definition: wwwc/css_syntax/token.hpp:123
wordring::wwwc::css::semicolon_token
';' トークン
Definition: wwwc/css_syntax/token.hpp:135
wordring::wwwc::css::dimension_token::m_type_flag
number_type_flag_name m_type_flag
Definition: wwwc/css_syntax/token.hpp:119
wordring::wwwc::css::bad_string_token
異常文字列トークン
Definition: wwwc/css_syntax/token.hpp:94
wordring::wwwc::css::function_token::m_value
std::u32string m_value
Definition: wwwc/css_syntax/token.hpp:72
wordring::wwwc::css::eof_token
EOF トークン
Definition: wwwc/css_syntax/token.hpp:159
wordring::wwwc::css::delim_token
区切り文字トークン
Definition: wwwc/css_syntax/token.hpp:103
wordring::wwwc::css::delim_token::m_value
char32_t m_value
Definition: wwwc/css_syntax/token.hpp:103
wordring::wwwc::css::syntax_primitive_name::PreservedTokens
@ PreservedTokens
preserved tokens
wordring::wwwc::css::syntax_primitive_name::CloseCurlyToken
@ CloseCurlyToken
'}'
wordring::wwwc::css::syntax_primitive_name
syntax_primitive_name
Definition: wwwc/css_syntax/token.hpp:20
wordring::wwwc::css::hash_token::type_flag_name::unrestricted
@ unrestricted
wordring::wwwc::css::syntax_primitive_name::CdcToken
@ CdcToken
"-->"
wordring::wwwc::css::hash_token::type_flag_name
type_flag_name
Definition: wwwc/css_syntax/token.hpp:80
wordring::wwwc::css::syntax_primitive_name::DimensionToken
@ DimensionToken
数字 単位
wordring::wwwc::css::dimension_token::m_unit
std::u32string m_unit
Definition: wwwc/css_syntax/token.hpp:118
wordring::wwwc::css::syntax_primitive_name::NumberToken
@ NumberToken
数字
wordring::wwwc::css::syntax_primitive_name::BadUrlToken
@ BadUrlToken
悪いURL
wordring::wwwc::css::syntax_primitive_name::Token
@ Token
これ以降 Rule までトークン
wordring::wwwc::css::dimension_token::m_value
double m_value
Definition: wwwc/css_syntax/token.hpp:117
wordring::wwwc::css::syntax_primitive_name::ComponentValue
@ ComponentValue
component value
wordring::wwwc::css::syntax_primitive_name::Function
@ Function
function
wordring::wwwc::css::syntax_primitive_stream
syntax_primitive の配列をストリーム化するアダプタ
Definition: wwwc/css_syntax/token.hpp:601
wordring::wwwc::css::syntax_primitive_name::CdoToken
@ CdoToken
"<!--"
wordring::wwwc::css::syntax_primitive_name::CommaToken
@ CommaToken
','
wordring::wwwc::css::syntax_primitive_name::DelimToken
@ DelimToken
区切り文字
wordring::wwwc::css::bad_url_token
異常 URL トークン
Definition: wwwc/css_syntax/token.hpp:100
wordring::wwwc::css::component_value
CSSコンポーネント値を表現する AST ノード
Definition: wwwc/css_syntax/token.hpp:218
wordring::wwwc::css::simple_block
単純ブロックを表現する AST ノード
Definition: wwwc/css_syntax/token.hpp:180
wordring::wwwc::css::syntax_primitive_name::SyntaxItem
@ SyntaxItem
これ以降、構文アイテム
wordring::wwwc::css::qualified_rule
修飾規則を表現する AST ノード
Definition: wwwc/css_syntax/token.hpp:458
wordring::wwwc::css::dimension_token
Definition: wwwc/css_syntax/token.hpp:115
wordring::wwwc::css::syntax_primitive_name::ColonToken
@ ColonToken
':'
wordring::wwwc::css::ident_token
識別子トークン
Definition: wwwc/css_syntax/token.hpp:69
wordring::wwwc::css::ident_token::m_value
std::u32string m_value
Definition: wwwc/css_syntax/token.hpp:69
wordring::wwwc::css::syntax_primitive_name::OpenParenToken
@ OpenParenToken
'('
wordring::wwwc::css::at_rule
@規則を表現する AST ノード
Definition: wwwc/css_syntax/token.hpp:409
wordring::wwwc::css::percentage_token
数値 % トークン
Definition: wwwc/css_syntax/token.hpp:113
wordring::wwwc::css::number_token::m_value
double m_value
Definition: wwwc/css_syntax/token.hpp:108
wordring::wwwc::css::syntax_primitive_name::IdentToken
@ IdentToken
識別子
wordring::wwwc::css::syntax_primitive_name::UrlToken
@ UrlToken
URL
wordring::wwwc::css::close_square_token
']' トークン
Definition: wwwc/css_syntax/token.hpp:144
wordring::wwwc::css::syntax_primitive_stream::consume
syntax_primitive const & consume()
トークンを一つ消費する