summaryrefslogtreecommitdiffstats
path: root/xpcom/ds/test/test_dafsa.py
diff options
context:
space:
mode:
Diffstat (limited to 'xpcom/ds/test/test_dafsa.py')
-rw-r--r--xpcom/ds/test/test_dafsa.py546
1 files changed, 546 insertions, 0 deletions
diff --git a/xpcom/ds/test/test_dafsa.py b/xpcom/ds/test/test_dafsa.py
new file mode 100644
index 0000000000..9becdd6c06
--- /dev/null
+++ b/xpcom/ds/test/test_dafsa.py
@@ -0,0 +1,546 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+from io import StringIO
+
+import mozunit
+from incremental_dafsa import Dafsa, Node
+
+
+def _node_to_string(node: Node, prefix, buffer, cache):
+ if not node.is_end_node:
+ prefix += (
+ str(ord(node.character)) if ord(node.character) < 10 else node.character
+ )
+ else:
+ prefix += "$"
+ cached = cache.get(id(node))
+ buffer.write("{}{}".format(prefix, "=" if cached else "").strip() + "\n")
+
+ if not cached:
+ cache[id(node)] = node
+ if node:
+ for node in sorted(node.children.values(), key=lambda n: n.character):
+ _node_to_string(node, prefix, buffer, cache)
+
+
+def _dafsa_to_string(dafsa: Dafsa):
+ """Encodes the dafsa into a string notation.
+
+ Each node is printed on its own line with all the nodes that precede it.
+ The end node is designated with the "$" character.
+ If it joins into an existing node, the end of the line is adorned with a "=".
+ Though this doesn't carry information about which other prefix it has joined with,
+ it has seemed to be precise enough for testing.
+
+ For example, with the input data of:
+ * a1
+ * ac1
+ * bc1
+
+ [root] --- a ---- 1 --- [end]
+ | | /
+ -- b -- c---
+
+ The output will be:
+ a
+ a1
+ a1$ <- end node was found
+ ac
+ ac1= <- joins with the "a1" prefix
+ b
+ bc= <- joins with the "ac" prefix
+ """
+ buffer = StringIO()
+ cache = {}
+
+ for node in sorted(dafsa.root_node.children.values(), key=lambda n: n.character):
+ _node_to_string(node, "", buffer, cache)
+ return buffer.getvalue().strip()
+
+
+def _to_words(data):
+ return [line.strip() for line in data.strip().split("\n")]
+
+
+def _assert_dafsa(data, expected):
+ words = _to_words(data)
+ dafsa = Dafsa.from_tld_data(words)
+
+ expected = expected.strip()
+ expected = "\n".join([line.strip() for line in expected.split("\n")])
+ as_string = _dafsa_to_string(dafsa)
+ assert as_string == expected
+
+
+class TestDafsa(unittest.TestCase):
+ def test_1(self):
+ _assert_dafsa(
+ """
+ a1
+ ac1
+ acc1
+ bd1
+ bc1
+ bcc1
+ """,
+ """
+ a
+ a1
+ a1$
+ ac
+ ac1=
+ acc
+ acc1=
+ b
+ bc=
+ bd
+ bd1=
+ """,
+ )
+
+ def test_2(self):
+ _assert_dafsa(
+ """
+ ab1
+ b1
+ bb1
+ bbb1
+ """,
+ """
+ a
+ ab
+ ab1
+ ab1$
+ b
+ b1=
+ bb
+ bb1=
+ bbb=
+ """,
+ )
+
+ def test_3(self):
+ _assert_dafsa(
+ """
+ a.ca1
+ a.com1
+ c.corg1
+ b.ca1
+ b.com1
+ b.corg1
+ """,
+ """
+ a
+ a.
+ a.c
+ a.ca
+ a.ca1
+ a.ca1$
+ a.co
+ a.com
+ a.com1=
+ b
+ b.
+ b.c
+ b.ca=
+ b.co
+ b.com=
+ b.cor
+ b.corg
+ b.corg1=
+ c
+ c.
+ c.c
+ c.co
+ c.cor=
+ """,
+ )
+
+ def test_4(self):
+ _assert_dafsa(
+ """
+ acom1
+ bcomcom1
+ acomcom1
+ """,
+ """
+ a
+ ac
+ aco
+ acom
+ acom1
+ acom1$
+ acomc
+ acomco
+ acomcom
+ acomcom1=
+ b
+ bc
+ bco
+ bcom
+ bcomc=
+ """,
+ )
+
+ def test_5(self):
+ _assert_dafsa(
+ """
+ a.d1
+ a.c.d1
+ b.d1
+ b.c.d1
+ """,
+ """
+ a
+ a.
+ a.c
+ a.c.
+ a.c.d
+ a.c.d1
+ a.c.d1$
+ a.d=
+ b
+ b.=
+ """,
+ )
+
+ def test_6(self):
+ _assert_dafsa(
+ """
+ a61
+ a661
+ b61
+ b661
+ """,
+ """
+ a
+ a6
+ a61
+ a61$
+ a66
+ a661=
+ b
+ b6=
+ """,
+ )
+
+ def test_7(self):
+ _assert_dafsa(
+ """
+ a61
+ a6661
+ b61
+ b6661
+ """,
+ """
+ a
+ a6
+ a61
+ a61$
+ a66
+ a666
+ a6661=
+ b
+ b6=
+ """,
+ )
+
+ def test_8(self):
+ _assert_dafsa(
+ """
+ acc1
+ bc1
+ bccc1
+ """,
+ """
+ a
+ ac
+ acc
+ acc1
+ acc1$
+ b
+ bc
+ bc1=
+ bcc=
+ """,
+ )
+
+ def test_9(self):
+ _assert_dafsa(
+ """
+ acc1
+ bc1
+ bcc1
+ """,
+ """
+ a
+ ac
+ acc
+ acc1
+ acc1$
+ b
+ bc
+ bc1=
+ bcc=
+ """,
+ )
+
+ def test_10(self):
+ _assert_dafsa(
+ """
+ acc1
+ cc1
+ cccc1
+ """,
+ """
+ a
+ ac
+ acc
+ acc1
+ acc1$
+ c
+ cc
+ cc1=
+ ccc=
+ """,
+ )
+
+ def test_11(self):
+ _assert_dafsa(
+ """
+ ac1
+ acc1
+ bc1
+ bcc1
+ """,
+ """
+ a
+ ac
+ ac1
+ ac1$
+ acc
+ acc1=
+ b
+ bc=
+ """,
+ )
+
+ def test_12(self):
+ _assert_dafsa(
+ """
+ acd1
+ bcd1
+ bcdd1
+ """,
+ """
+ a
+ ac
+ acd
+ acd1
+ acd1$
+ b
+ bc
+ bcd
+ bcd1=
+ bcdd=
+ """,
+ )
+
+ def test_13(self):
+ _assert_dafsa(
+ """
+ ac1
+ acc1
+ bc1
+ bcc1
+ bccc1
+ """,
+ """
+ a
+ ac
+ ac1
+ ac1$
+ acc
+ acc1=
+ b
+ bc
+ bc1=
+ bcc=
+ """,
+ )
+
+ def test_14(self):
+ _assert_dafsa(
+ """
+ acc1
+ acccc1
+ bcc1
+ bcccc1
+ bcccccc1
+ """,
+ """
+ a
+ ac
+ acc
+ acc1
+ acc1$
+ accc
+ acccc
+ acccc1=
+ b
+ bc
+ bcc
+ bcc1=
+ bccc=
+ """,
+ )
+
+ def test_15(self):
+ _assert_dafsa(
+ """
+ ac1
+ bc1
+ acac1
+ """,
+ """
+ a
+ ac
+ ac1
+ ac1$
+ aca
+ acac
+ acac1=
+ b
+ bc=
+ """,
+ )
+
+ def test_16(self):
+ _assert_dafsa(
+ """
+ bat1
+ t1
+ tbat1
+ """,
+ """
+ b
+ ba
+ bat
+ bat1
+ bat1$
+ t
+ t1=
+ tb=
+ """,
+ )
+
+ def test_17(self):
+ _assert_dafsa(
+ """
+ acow1
+ acat1
+ t1
+ tcat1
+ acatcat1
+ """,
+ """
+ a
+ ac
+ aca
+ acat
+ acat1
+ acat1$
+ acatc
+ acatca
+ acatcat
+ acatcat1=
+ aco
+ acow
+ acow1=
+ t=
+ """,
+ )
+
+ def test_18(self):
+ _assert_dafsa(
+ """
+ bc1
+ abc1
+ abcxyzc1
+ """,
+ """
+ a
+ ab
+ abc
+ abc1
+ abc1$
+ abcx
+ abcxy
+ abcxyz
+ abcxyzc
+ abcxyzc1=
+ b
+ bc=
+ """,
+ )
+
+ def test_19(self):
+ _assert_dafsa(
+ """
+ a.z1
+ a.y1
+ c.z1
+ d.z1
+ d.y1
+ """,
+ """
+ a
+ a.
+ a.y
+ a.y1
+ a.y1$
+ a.z
+ a.z1=
+ c
+ c.
+ c.z=
+ d
+ d.=
+ """,
+ )
+
+ def test_20(self):
+ _assert_dafsa(
+ """
+ acz1
+ acy1
+ accz1
+ acccz1
+ bcz1
+ bcy1
+ bccz1
+ bcccz1
+ """,
+ """
+ a
+ ac
+ acc
+ accc
+ acccz
+ acccz1
+ acccz1$
+ accz=
+ acy
+ acy1=
+ acz=
+ b
+ bc=
+ """,
+ )
+
+
+if __name__ == "__main__":
+ mozunit.main()