summaryrefslogtreecommitdiffstats
path: root/src/parser/string_pool.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser/string_pool.cpp')
-rw-r--r--src/parser/string_pool.cpp137
1 files changed, 137 insertions, 0 deletions
diff --git a/src/parser/string_pool.cpp b/src/parser/string_pool.cpp
new file mode 100644
index 0000000..e438da5
--- /dev/null
+++ b/src/parser/string_pool.cpp
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <orcus/string_pool.hpp>
+#include <orcus/exception.hpp>
+
+#include <iostream>
+#include <unordered_set>
+#include <vector>
+#include <memory>
+#include <cassert>
+#include <algorithm>
+#include <string_view>
+
+#include <boost/pool/object_pool.hpp>
+
+namespace orcus {
+
+using std::cout;
+using std::endl;
+
+using string_set_type = std::unordered_set<std::string_view>;
+using string_store_type = boost::object_pool<std::string>;
+using string_stores_type = std::vector<std::unique_ptr<string_store_type>>;
+
+struct string_pool::impl
+{
+ string_stores_type m_stores;
+ string_set_type m_set;
+
+ impl()
+ {
+ // first element is the active store used for the current instance.
+ m_stores.push_back(std::make_unique<string_store_type>(256, 0));
+ }
+};
+
+string_pool::string_pool() : mp_impl(std::make_unique<impl>()) {}
+
+string_pool::string_pool(string_pool&& other) : mp_impl(std::move(other.mp_impl)) {}
+
+string_pool::~string_pool() = default;
+
+std::pair<std::string_view, bool> string_pool::intern(std::string_view str)
+{
+ if (str.empty())
+ return std::pair<std::string_view, bool>(std::string_view(), false);
+
+ string_set_type::const_iterator itr = mp_impl->m_set.find(str);
+ if (itr == mp_impl->m_set.end())
+ {
+ // This string has not been interned. Intern it.
+ string_store_type& store = *mp_impl->m_stores[0];
+ std::string* p = store.construct(str);
+ if (!p)
+ throw general_error("failed to intern a new string instance.");
+
+ std::pair<string_set_type::iterator,bool> r =
+ mp_impl->m_set.emplace(p->data(), p->size());
+ if (!r.second)
+ throw general_error("failed to intern a new string instance.");
+
+ std::string_view ps = *r.first;
+ assert(ps == str);
+
+ return std::pair<std::string_view, bool>(ps, true);
+ }
+
+ // This string has already been interned.
+
+ std::string_view stored_str = *itr;
+ assert(stored_str == str);
+ return std::pair<std::string_view, bool>(stored_str, false);
+}
+
+std::vector<std::string_view> string_pool::get_interned_strings() const
+{
+ std::vector<std::string_view> sorted;
+ sorted.reserve(mp_impl->m_set.size());
+
+ for (std::string_view ps : mp_impl->m_set)
+ sorted.push_back(ps);
+
+ std::sort(sorted.begin(), sorted.end());
+
+ return sorted;
+}
+
+void string_pool::dump() const
+{
+ auto sorted = get_interned_strings();
+
+ cout << "interned string count: " << sorted.size() << endl;
+
+ // Dump them all to stdout.
+ size_t counter = 0;
+ for (std::string_view s : sorted)
+ cout << counter++ << ": '" << s << "'" << endl;
+}
+
+void string_pool::clear()
+{
+ mp_impl = std::make_unique<impl>();
+}
+
+size_t string_pool::size() const
+{
+ return mp_impl->m_set.size();
+}
+
+void string_pool::swap(string_pool& other)
+{
+ std::swap(mp_impl, other.mp_impl);
+}
+
+void string_pool::merge(string_pool& other)
+{
+ while (!other.mp_impl->m_stores.empty())
+ {
+ mp_impl->m_stores.push_back(
+ std::move(other.mp_impl->m_stores.back()));
+ other.mp_impl->m_stores.pop_back();
+ }
+
+ for (std::string_view p : other.mp_impl->m_set)
+ mp_impl->m_set.insert(p);
+
+ other.mp_impl->m_set.clear();
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */