libwordring
stable_trie_base.hpp
1 #pragma once
2 
3 #include <wordring/trie/stable_trie_base_iterator.hpp>
4 #include <wordring/trie/trie_heap.hpp>
5 
6 #include <algorithm>
7 #include <cassert>
8 #include <climits>
9 #include <cstdint>
10 #include <istream>
11 #include <iterator>
12 #include <memory>
13 #include <ostream>
14 #include <stdexcept>
15 #include <type_traits>
16 #include <utility>
17 
18 namespace wordring::detail
19 {
20  // ------------------------------------------------------------------------
21  // stable_trie_base
22  // ------------------------------------------------------------------------
23 
63  template <typename Allocator = std::allocator<trie_node>>
64  class stable_trie_base : public trie_heap<Allocator>
65  {
66  template <typename Allocator1>
67  friend std::ostream& operator<<(std::ostream&, stable_trie_base<Allocator1> const&);
68 
69  template <typename Allocator1>
70  friend std::istream& operator>>(std::istream&, stable_trie_base<Allocator1>&);
71 
72  protected:
74 
75  using typename base_type::container;
76  using typename base_type::label_vector;
77  using typename base_type::index_type;
78  using typename base_type::node_type;
79 
80  using base_type::null_value;
81 
82  public:
83  using label_type = typename base_type::label_type;
84  using value_type = std::uint32_t;
85  using size_type = typename container::size_type;
86  using allocator_type = Allocator;
88  using const_reference = trie_value_proxy const;
90 
91  public:
92  using typename base_type::serialize_iterator;
93 
95  using base_type::ibegin;
96  using base_type::iend;
97  using base_type::clear;
98 
99  protected:
100  using base_type::limit;
101  using base_type::free;
102  using base_type::has_child;
103  using base_type::has_null;
104  using base_type::at;
105  using base_type::add;
106 
107  using base_type::m_c;
108 
109  public:
118  : base_type()
119  {
120  }
121 
131  explicit stable_trie_base(allocator_type const& alloc)
132  : base_type(alloc)
133  {
134  }
135 
157  template <typename InputIterator, typename std::enable_if_t<std::is_integral_v<typename std::iterator_traits<InputIterator>::value_type>, std::nullptr_t> = nullptr>
158  stable_trie_base(InputIterator first, InputIterator last, allocator_type const& alloc = allocator_type())
159  : base_type(alloc)
160  {
161  assign(first, last);
162  }
163 
180  template <typename InputIterator, typename std::enable_if_t<std::negation_v<std::is_integral<typename std::iterator_traits<InputIterator>::value_type>>, std::nullptr_t> = nullptr>
181  stable_trie_base(InputIterator first, InputIterator last, allocator_type const& alloc = allocator_type())
182  : base_type(alloc)
183  {
184  assign(first, last);
185  }
186 
199  stable_trie_base(std::initializer_list<trie_node> il, allocator_type const& alloc = allocator_type())
200  : base_type(il, alloc)
201  {
202  }
203 
227  template <typename InputIterator, typename std::enable_if_t<std::is_integral_v<typename std::iterator_traits<InputIterator>::value_type>, std::nullptr_t> = nullptr>
228  void assign(InputIterator first, InputIterator last)
229  {
230  base_type::assign(first, last);
231  }
232 
249  template <typename InputIterator, typename std::enable_if_t<std::negation_v<std::is_integral<typename std::iterator_traits<InputIterator>::value_type>>, std::nullptr_t> = nullptr>
250  void assign(InputIterator first, InputIterator last)
251  {
252  clear();
253  while (first != last)
254  {
255  insert(*first);
256  ++first;
257  }
258  }
259 
260  // 要素アクセス --------------------------------------------------------
261 
279  reference at(const_iterator pos, index_type& idx)
280  {
281  node_type* d = m_c.data();
282 
283  idx = (d + pos.m_index)->m_base + null_value;
284  assert(1 < idx && idx < limit());
285 
286  return reference(d + idx);
287  }
288 
303  {
304  index_type idx = 0;
305  return at(pos, idx);
306  }
307 
316  const_reference at(const_iterator pos) const
317  {
318  return const_cast<stable_trie_base*>(this)->at(pos);
319  }
320 
334  template <typename InputIterator>
335  reference at(InputIterator first, InputIterator last)
336  {
337  auto it = find(first, last);
338  if (it == cend()) throw std::out_of_range("");
339 
340  return at(it);
341  }
342 
352  template <typename InputIterator>
353  const_reference at(InputIterator first, InputIterator last) const
354  {
355  return const_cast<stable_trie_base*>(this)->at(first, last);
356  }
357 
370  template <typename Key>
371  reference at(Key const& key)
372  {
373  return at(std::begin(key), std::end(key));
374  }
375 
386  template <typename Key>
387  const_reference at(Key const& key) const
388  {
389  return at(std::begin(key), std::end(key));
390  }
391 
402  template <typename Key>
403  reference operator[](Key const& key)
404  {
405  auto it = find(key);
406  if (it == cend()) it = insert(key);
407 
408  node_type* d = m_c.data();
409  index_type idx = (d + it.m_index)->m_base + null_value;
410  assert(1 < idx && idx < limit());
411 
412  return reference(d + idx);
413  }
414 
415  // イテレータ ----------------------------------------------------------
416 
423  const_iterator begin() const noexcept { return const_iterator(m_c, 1); }
424 
431  const_iterator cbegin() const noexcept { return const_iterator(m_c, 1); }
432 
440  const_iterator end() const noexcept { return const_iterator(m_c, 0); }
441 
449  const_iterator cend() const noexcept { return const_iterator(m_c, 0); }
450 
451  // 容量 ---------------------------------------------------------------
452 
457  bool empty() const noexcept { return size() == 0; }
458 
463  size_type size() const noexcept { return m_c.front().m_base; }
464 
469  static constexpr size_type max_size() noexcept
470  {
471  return std::numeric_limits<std::int32_t>::max() / sizeof(node_type);
472  }
473 
474  // 変更 ---------------------------------------------------------------
475 
490  template <typename InputIterator>
491  const_iterator insert(InputIterator first, InputIterator last, value_type value = 0)
492  {
493  assert(value <= static_cast<value_type>(std::numeric_limits<int32_t>::max()));
494 
495  if (first == last) return cend();
496 
497  index_type parent = 1;
498 
499  // 登録済み遷移をスキップする
500  for (index_type idx = at(parent, static_cast<std::uint8_t>(*first)); idx != 0; idx = at(parent, static_cast<std::uint8_t>(*first)))
501  {
502  parent = idx;
503  ++first;
504  if (first == last) break;
505  }
506 
507  if (!has_null(parent) || first != last)
508  {
509  // 新規文字列による遷移を追加
510  while (first != last)
511  {
512  std::uint16_t label = static_cast<std::uint8_t>(*first++);
513  index_type base = add(parent, label);
514  parent = base + label;
515  }
516  assert(parent != 1);
517 
518  // 終端の空遷移を追加
519  index_type idx = add(parent, null_value) + null_value;
520  (m_c.data() + idx)->m_base = -static_cast<index_type>(value);
521 
522  ++m_c.front().m_base;
523  }
524 
525  return const_iterator(m_c, parent);
526  }
527 
539  template <typename Key>
540  const_iterator insert(Key const& key, value_type value = 0)
541  {
542  return insert(std::begin(key), std::end(key), value);
543  }
544 
552  {
553  index_type idx = pos.m_index;
554  assert(0 <= idx && idx < limit());
555 
556  if (idx <= 1 || !has_null(idx)) return;
557  assert(has_null(idx));
558 
559  index_type term = (m_c.data() + idx)->m_base + null_value;
560  assert((m_c.data() + term)->m_check == idx);
561  free(term);
562 
563  while (idx != 1 && !has_null(idx) && !has_child(idx))
564  {
565  index_type parent = (m_c.data() + idx)->m_check;
566  free(idx);
567  idx = parent;
568  }
569 
570  --m_c.front().m_base;
571  if (empty()) clear();
572  }
573 
581  template <typename InputIterator>
582  void erase(InputIterator first, InputIterator last)
583  {
584  erase(find(first, last));
585  }
586 
593  template <typename Key>
594  void erase(Key const& key)
595  {
596  erase(std::begin(key), std::end(key));
597  }
598 
599  void swap(stable_trie_base& other)
600  {
601  base_type::swap(other);
602  }
603 
604  // 検索 ---------------------------------------------------------------
605 
606  protected:
621  template <typename InputIterator>
622  auto lookup(InputIterator first, InputIterator last, std::uint32_t& i) const
623  {
624  assert(i == 0);
625 
626  index_type parent = 1;
627 
628  while (first != last)
629  {
630  std::uint16_t ch = static_cast<std::uint8_t>(*first);
631  index_type idx = at(parent, ch);
632  if (idx == 0) break;
633  ++first;
634  ++i;
635  parent = idx;
636  assert(1 <= parent && parent < limit());
637  }
638 
639  return std::make_pair(const_iterator(m_c, parent), first);
640  }
641 
642  public:
652  template <typename InputIterator>
653  auto lookup(InputIterator first, InputIterator last) const
654  {
655  std::uint32_t i = 0;
656  return lookup(first, last, i);
657  }
658 
667  template <typename Key>
668  const_iterator search(Key const& key) const
669  {
670  auto it1 = std::begin(key);
671  auto it2 = std::end(key);
672 
673  auto pair = lookup(it1, it2);
674 
675  return (pair.second == it2)
676  ? pair.first
677  : cend();
678  }
679 
689  template <typename InputIterator>
690  const_iterator find(InputIterator first, InputIterator last) const
691  {
692  auto pair = lookup(first, last);
693 
694  auto it = pair.second;
695  index_type idx = pair.first.m_index;
696 
697  return (it == last && has_null(idx))
698  ? pair.first
699  : cend();
700  }
701 
710  template <typename Key>
711  const_iterator find(Key const& key) const
712  {
713  return find(std::begin(key), std::end(key));
714  }
715 
723  template <typename InputIterator>
724  bool contains(InputIterator first, InputIterator last) const
725  {
726  auto pair = lookup(first, last);
727 
728  auto it = pair.second;
729  index_type idx = pair.first.m_index;
730 
731  return !(it != last || !has_null(idx));
732  }
733 
740  template <typename Key>
741  bool contains(Key const& key) const
742  {
743  return contains(std::begin(key), std::end(key));
744  }
745  };
746 
751  template <typename Allocator1>
752  inline std::ostream& operator<<(std::ostream& os, stable_trie_base<Allocator1> const& trie)
753  {
754  typename stable_trie_base<Allocator1>::base_type const& heap = trie;
755  return os << heap;
756  }
757 
762  template <typename Allocator1>
763  inline std::istream& operator>>(std::istream& is, stable_trie_base<Allocator1>& trie)
764  {
766  return is >> heap;
767  }
768 }
wordring::detail::stable_trie_base::cbegin
const_iterator cbegin() const noexcept
根を指すイテレータを返す
Definition: stable_trie_base.hpp:431
wordring::detail::stable_trie_base::at
reference at(Key const &key)
葉の値への参照を返す
Definition: stable_trie_base.hpp:371
wordring::detail::stable_trie_base::stable_trie_base
stable_trie_base(std::initializer_list< trie_node > il, allocator_type const &alloc=allocator_type())
初期化子リストからの構築
Definition: stable_trie_base.hpp:199
wordring::detail::trie_heap::has_null
bool has_null(index_type parent) const
Definition: trie_heap.hpp:690
wordring::detail::stable_trie_base::assign
void assign(InputIterator first, InputIterator last)
直列化データからの割り当て
Definition: stable_trie_base.hpp:228
wordring::detail::trie_heap::free
index_type free(index_type idx, index_type before=0)
Definition: trie_heap.hpp:526
wordring::detail::stable_trie_base::at
const_reference at(const_iterator pos) const
葉の値への参照を返す
Definition: stable_trie_base.hpp:316
wordring::detail::stable_trie_base::lookup
auto lookup(InputIterator first, InputIterator last, std::uint32_t &i) const
部分一致検索
Definition: stable_trie_base.hpp:622
wordring::detail::stable_trie_base::operator<<
friend std::ostream & operator<<(std::ostream &, stable_trie_base< Allocator1 > const &)
ストリームへ出力する
Definition: stable_trie_base.hpp:752
wordring::detail::trie_heap::has_child
bool has_child(index_type parent) const
Definition: trie_heap.hpp:664
wordring::detail::stable_trie_base::lookup
auto lookup(InputIterator first, InputIterator last) const
部分一致検索
Definition: stable_trie_base.hpp:653
wordring::detail::stable_trie_base::erase
void erase(const_iterator pos)
キー文字列を削除する
Definition: stable_trie_base.hpp:551
wordring::detail::stable_trie_base::at
reference at(const_iterator pos, index_type &idx)
葉の値への参照を返す
Definition: stable_trie_base.hpp:279
wordring::trie
basic_trie< Label, detail::trie_base< Allocator > > trie
メモリー使用量削減を目標とする汎用Trie
Definition: trie.hpp:1026
wordring::detail::stable_trie_base::max_size
static constexpr size_type max_size() noexcept
キー文字列の格納可能な最大数を調べる
Definition: stable_trie_base.hpp:469
wordring::detail::stable_trie_base::stable_trie_base
stable_trie_base()
空のコンテナを構築する
Definition: stable_trie_base.hpp:117
wordring::static_vector< std::uint16_t, 257 >
wordring::detail::stable_trie_base::end
const_iterator end() const noexcept
根の終端を指すイテレータを返す
Definition: stable_trie_base.hpp:440
wordring::detail::operator<<
std::ostream & operator<<(std::ostream &os, stable_trie_base< Allocator1 > const &trie)
ストリームへ出力する
Definition: stable_trie_base.hpp:752
wordring::detail::const_stable_trie_base_iterator
Definition: stable_trie_base_iterator.hpp:8
wordring::detail::stable_trie_base::at
reference at(InputIterator first, InputIterator last)
葉の値への参照を返す
Definition: stable_trie_base.hpp:335
wordring::detail::trie_heap::assign
void assign(InputIterator first, InputIterator last)
直列化データから割り当てる
Definition: trie_heap.hpp:260
wordring::detail::stable_trie_base::find
const_iterator find(Key const &key) const
完全一致検索
Definition: stable_trie_base.hpp:711
wordring::detail::trie_heap::clear
void clear() noexcept
すべての要素を削除する
Definition: trie_heap.hpp:364
wordring::detail::stable_trie_base::search
const_iterator search(Key const &key) const
前方一致検索
Definition: stable_trie_base.hpp:668
wordring::detail::stable_trie_base::operator>>
friend std::istream & operator>>(std::istream &, stable_trie_base< Allocator1 > &)
ストリームから入力する
Definition: stable_trie_base.hpp:763
wordring::detail::stable_trie_base::erase
void erase(Key const &key)
キー文字列を削除する
Definition: stable_trie_base.hpp:594
wordring::detail::stable_trie_base::insert
const_iterator insert(Key const &key, value_type value=0)
キー文字列を挿入する
Definition: stable_trie_base.hpp:540
wordring::basic_trie
任意の整数型をラベルとして用いることが出来る汎用Trie
Definition: trie.hpp:130
wordring::detail::trie_heap::get_allocator
allocator_type get_allocator() const
コンテナに関連付けられているアロケータを返す
Definition: trie_heap.hpp:226
wordring::detail::trie_node
Definition: trie_heap.hpp:23
wordring::detail::stable_trie_base::erase
void erase(InputIterator first, InputIterator last)
キー文字列を削除する
Definition: stable_trie_base.hpp:582
wordring::detail::stable_trie_base::contains
bool contains(InputIterator first, InputIterator last) const
キー文字列が格納されているか調べる
Definition: stable_trie_base.hpp:724
wordring::detail::stable_trie_base::insert
const_iterator insert(InputIterator first, InputIterator last, value_type value=0)
キー文字列を挿入する
Definition: stable_trie_base.hpp:491
wordring::detail::trie_heap::ibegin
serialize_iterator ibegin() const
直列化用のイテレータを返す
Definition: trie_heap.hpp:342
wordring::detail
wordring::detail::stable_trie_base::stable_trie_base
stable_trie_base(InputIterator first, InputIterator last, allocator_type const &alloc=allocator_type())
直列化データからの構築
Definition: stable_trie_base.hpp:158
wordring::detail::stable_trie_base::contains
bool contains(Key const &key) const
キー文字列が格納されているか調べる
Definition: stable_trie_base.hpp:741
wordring::detail::stable_trie_base::operator[]
reference operator[](Key const &key)
葉の値への参照を返す
Definition: stable_trie_base.hpp:403
wordring::detail::trie_heap
ダブル・アレイによるTrie実装のメモリー管理を行う
Definition: trie_heap.hpp:200
wordring::detail::trie_heap::at
index_type at(index_type parent, std::uint16_t label) const
Definition: trie_heap.hpp:766
wordring::detail::trie_value_proxy
Definition: trie_heap.hpp:40
wordring::detail::stable_trie_base::cend
const_iterator cend() const noexcept
根の終端を指すイテレータを返す
Definition: stable_trie_base.hpp:449
wordring::detail::stable_trie_base::at
const_reference at(InputIterator first, InputIterator last) const
葉の値への参照を返す
Definition: stable_trie_base.hpp:353
wordring::detail::stable_trie_base::stable_trie_base
stable_trie_base(allocator_type const &alloc)
アロケータを指定して空のコンテナを構築する
Definition: stable_trie_base.hpp:131
wordring::detail::trie_heap::iend
serialize_iterator iend() const
直列化用のイテレータを返す
Definition: trie_heap.hpp:353
wordring::detail::stable_trie_base::empty
bool empty() const noexcept
キー文字列を格納していないことを調べる
Definition: stable_trie_base.hpp:457
wordring::detail::stable_trie_base::size
size_type size() const noexcept
格納しているキー文字列数を調べる
Definition: stable_trie_base.hpp:463
wordring::detail::stable_trie_base::begin
const_iterator begin() const noexcept
根を指すイテレータを返す
Definition: stable_trie_base.hpp:423
wordring::detail::stable_trie_base
挿入や削除によって葉を指すイテレータが無効とならない安定なTrie木の実装
Definition: stable_trie_base.hpp:64
wordring::detail::operator>>
std::istream & operator>>(std::istream &is, stable_trie_base< Allocator1 > &trie)
ストリームから入力する
Definition: stable_trie_base.hpp:763
wordring::detail::stable_trie_base::at
const_reference at(Key const &key) const
葉の値への参照を返す
Definition: stable_trie_base.hpp:387
wordring::detail::trie_heap_serialize_iterator
Definition: trie_heap.hpp:78
wordring::detail::stable_trie_base::at
reference at(const_iterator pos)
葉の値への参照を返す
Definition: stable_trie_base.hpp:302
wordring::detail::stable_trie_base::find
const_iterator find(InputIterator first, InputIterator last) const
完全一致検索
Definition: stable_trie_base.hpp:690