From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- xpcom/tests/gtest/TestTokenizer.cpp | 1447 +++++++++++++++++++++++++++++++++++ 1 file changed, 1447 insertions(+) create mode 100644 xpcom/tests/gtest/TestTokenizer.cpp (limited to 'xpcom/tests/gtest/TestTokenizer.cpp') diff --git a/xpcom/tests/gtest/TestTokenizer.cpp b/xpcom/tests/gtest/TestTokenizer.cpp new file mode 100644 index 0000000000..1457b82fff --- /dev/null +++ b/xpcom/tests/gtest/TestTokenizer.cpp @@ -0,0 +1,1447 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Tokenizer.h" +#include "mozilla/IncrementalTokenizer.h" +#include "mozilla/Unused.h" +#include "gtest/gtest.h" + +using namespace mozilla; + +template +static bool IsOperator(Char const c) { + return c == '+' || c == '*'; +} + +static bool HttpHeaderCharacter(char const c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || (c == '_') || (c == '-'); +} + +TEST(Tokenizer, HTTPResponse) +{ + Tokenizer::Token t; + + // Real life test, HTTP response + + Tokenizer p( + nsLiteralCString("HTTP/1.0 304 Not modified\r\n" + "ETag: hallo\r\n" + "Content-Length: 16\r\n" + "\r\n" + "This is the body")); + + EXPECT_TRUE(p.CheckWord("HTTP")); + EXPECT_TRUE(p.CheckChar('/')); + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 1); + EXPECT_TRUE(p.CheckChar('.')); + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 0); + p.SkipWhites(); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 304); + p.SkipWhites(); + + p.Record(); + while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL) { + ; + } + EXPECT_FALSE(p.HasFailed()); + nsAutoCString h; + p.Claim(h); + EXPECT_TRUE(h == "Not modified"); + + p.Record(); + while (p.CheckChar(HttpHeaderCharacter)) { + ; + } + p.Claim(h, Tokenizer::INCLUDE_LAST); + EXPECT_TRUE(h == "ETag"); + p.SkipWhites(); + EXPECT_TRUE(p.CheckChar(':')); + p.SkipWhites(); + p.Record(); + while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL) { + ; + } + EXPECT_FALSE(p.HasFailed()); + p.Claim(h); + EXPECT_TRUE(h == "hallo"); + + p.Record(); + while (p.CheckChar(HttpHeaderCharacter)) { + ; + } + p.Claim(h, Tokenizer::INCLUDE_LAST); + EXPECT_TRUE(h == "Content-Length"); + p.SkipWhites(); + EXPECT_TRUE(p.CheckChar(':')); + p.SkipWhites(); + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t)); + EXPECT_TRUE(t.AsInteger() == 16); + EXPECT_TRUE(p.CheckEOL()); + + EXPECT_TRUE(p.CheckEOL()); + + p.Record(); + while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOF) { + ; + } + nsAutoCString b; + p.Claim(b); + EXPECT_TRUE(b == "This is the body"); +} + +TEST(Tokenizer, Main) +{ + Tokenizer::Token t; + + // Synthetic code-specific test + + Tokenizer p("test123 ,15 \t*\r\n%xx,-15\r\r"_ns); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_WORD); + EXPECT_TRUE(t.AsString() == "test123"); + + Tokenizer::Token u; + EXPECT_FALSE(p.Check(u)); + + EXPECT_FALSE(p.CheckChar('!')); + + EXPECT_FALSE(p.Check(Tokenizer::Token::Number(123))); + + EXPECT_TRUE(p.CheckWhite()); + + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15))); + + p.Rollback(); + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15))); + + p.Rollback(); + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 15); + + EXPECT_FALSE(p.CheckChar(IsOperator)); + + EXPECT_TRUE(p.CheckWhite()); + + p.SkipWhites(); + + EXPECT_FALSE(p.CheckWhite()); + + p.Rollback(); + + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + + p.Record(Tokenizer::EXCLUDE_LAST); + + EXPECT_TRUE(p.CheckChar(IsOperator)); + + p.Rollback(); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == '*'); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == '%'); + + nsAutoCString claim; + p.Claim(claim, Tokenizer::EXCLUDE_LAST); + EXPECT_TRUE(claim == "*\r\n"); + p.Claim(claim, Tokenizer::INCLUDE_LAST); + EXPECT_TRUE(claim == "*\r\n%"); + + p.Rollback(); + EXPECT_TRUE(p.CheckChar('%')); + + p.Record(Tokenizer::INCLUDE_LAST); + + EXPECT_FALSE(p.CheckWord("xy")); + + EXPECT_TRUE(p.CheckWord("xx")); + + p.Claim(claim, Tokenizer::INCLUDE_LAST); + EXPECT_TRUE(claim == "%xx"); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == ','); + + EXPECT_TRUE(p.CheckChar('-')); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 15); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF); + + EXPECT_FALSE(p.Next(t)); + + p.Rollback(); + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF); + + EXPECT_FALSE(p.Next(t)); + + p.Rollback(); + EXPECT_TRUE(p.CheckEOF()); + + EXPECT_FALSE(p.CheckEOF()); +} + +TEST(Tokenizer, Main16) +{ + Tokenizer16::Token t; + + // Synthetic code-specific test + + Tokenizer16 p(u"test123 ,15 \t*\r\n%xx,-15\r\r"_ns); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_WORD); + EXPECT_TRUE(t.AsString() == u"test123"_ns); + + Tokenizer16::Token u; + EXPECT_FALSE(p.Check(u)); + + EXPECT_FALSE(p.CheckChar('!')); + + EXPECT_FALSE(p.Check(Tokenizer16::Token::Number(123))); + + EXPECT_TRUE(p.CheckWhite()); + + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.Check(Tokenizer16::Token::Number(15))); + + p.Rollback(); + EXPECT_TRUE(p.Check(Tokenizer16::Token::Number(15))); + + p.Rollback(); + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 15); + + EXPECT_FALSE(p.CheckChar(IsOperator)); + + EXPECT_TRUE(p.CheckWhite()); + + p.SkipWhites(); + + EXPECT_FALSE(p.CheckWhite()); + + p.Rollback(); + + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + + p.Record(Tokenizer16::EXCLUDE_LAST); + + EXPECT_TRUE(p.CheckChar(IsOperator)); + + p.Rollback(); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == '*'); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == '%'); + + nsAutoString claim; + p.Claim(claim, Tokenizer16::EXCLUDE_LAST); + EXPECT_TRUE(claim == u"*\r\n"_ns); + p.Claim(claim, Tokenizer16::INCLUDE_LAST); + EXPECT_TRUE(claim == u"*\r\n%"_ns); + + p.Rollback(); + EXPECT_TRUE(p.CheckChar('%')); + + p.Record(Tokenizer16::INCLUDE_LAST); + + EXPECT_FALSE(p.CheckWord(u"xy"_ns)); + + EXPECT_TRUE(p.CheckWord(u"xx"_ns)); + + p.Claim(claim, Tokenizer16::INCLUDE_LAST); + EXPECT_TRUE(claim == u"%xx"_ns); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_CHAR); + EXPECT_TRUE(t.AsChar() == ','); + + EXPECT_TRUE(p.CheckChar('-')); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_INTEGER); + EXPECT_TRUE(t.AsInteger() == 15); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_EOL); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_EOF); + + EXPECT_FALSE(p.Next(t)); + + p.Rollback(); + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer16::TOKEN_EOF); + + EXPECT_FALSE(p.Next(t)); + + p.Rollback(); + EXPECT_TRUE(p.CheckEOF()); + + EXPECT_FALSE(p.CheckEOF()); +} + +TEST(Tokenizer, SingleWord) +{ + // Single word with numbers in it test + + Tokenizer p("test123"_ns); + + EXPECT_TRUE(p.CheckWord("test123")); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, EndingAfterNumber) +{ + // An end handling after a number + + Tokenizer p("123"_ns); + + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(123))); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, BadInteger) +{ + Tokenizer::Token t; + + // A bad integer test + + Tokenizer p("189234891274981758617846178651647620587135"_ns); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_ERROR); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, CheckExpectedTokenValue) +{ + Tokenizer::Token t; + + // Check expected token value test + + Tokenizer p("blue velvet"_ns); + + EXPECT_FALSE(p.Check(Tokenizer::TOKEN_INTEGER, t)); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t)); + EXPECT_TRUE(t.AsString() == "blue"); + + EXPECT_FALSE(p.Check(Tokenizer::TOKEN_WORD, t)); + + EXPECT_TRUE(p.CheckWhite()); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t)); + EXPECT_TRUE(t.AsString() == "velvet"); + + EXPECT_TRUE(p.CheckEOF()); + + EXPECT_FALSE(p.Next(t)); +} + +TEST(Tokenizer, HasFailed) +{ + Tokenizer::Token t; + + // HasFailed test + + Tokenizer p1("a b"_ns); + + while (p1.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR) { + ; + } + EXPECT_TRUE(p1.HasFailed()); + + Tokenizer p2("a b ?!c"_ns); + + EXPECT_FALSE(p2.CheckChar('c')); + EXPECT_TRUE(p2.HasFailed()); + EXPECT_TRUE(p2.CheckChar(HttpHeaderCharacter)); + EXPECT_FALSE(p2.HasFailed()); + p2.SkipWhites(); + EXPECT_FALSE(p2.HasFailed()); + EXPECT_FALSE(p2.CheckChar('c')); + EXPECT_TRUE(p2.HasFailed()); + EXPECT_TRUE(p2.Next(t)); + EXPECT_FALSE(p2.HasFailed()); + EXPECT_TRUE(p2.Next(t)); + EXPECT_FALSE(p2.HasFailed()); + EXPECT_FALSE(p2.CheckChar('c')); + EXPECT_TRUE(p2.HasFailed()); + EXPECT_TRUE(p2.Check(Tokenizer::TOKEN_CHAR, t)); + EXPECT_FALSE(p2.HasFailed()); + EXPECT_FALSE(p2.CheckChar('#')); + EXPECT_TRUE(p2.HasFailed()); + t = Tokenizer::Token::Char('!'); + EXPECT_TRUE(p2.Check(t)); + EXPECT_FALSE(p2.HasFailed()); + + while (p2.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR) { + ; + } + EXPECT_TRUE(p2.HasFailed()); +} + +TEST(Tokenizer, Construction) +{ + { + nsCString a("test"); + Tokenizer p1(a); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + nsAutoCString a("test"); + Tokenizer p1(a); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + static const char _a[] = "test"; + nsDependentCString a(_a); + Tokenizer p1(a); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + static const char* _a = "test"; + nsDependentCString a(_a); + Tokenizer p1(a); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + Tokenizer p1(nsDependentCString("test")); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + Tokenizer p1("test"_ns); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } + + { + Tokenizer p1("test"); + EXPECT_TRUE(p1.CheckWord("test")); + EXPECT_TRUE(p1.CheckEOF()); + } +} + +TEST(Tokenizer, Customization) +{ + Tokenizer p1("test-custom*words and\tdefault-whites"_ns, nullptr, "-*"); + EXPECT_TRUE(p1.CheckWord("test-custom*words")); + EXPECT_TRUE(p1.CheckWhite()); + EXPECT_TRUE(p1.CheckWord("and")); + EXPECT_TRUE(p1.CheckWhite()); + EXPECT_TRUE(p1.CheckWord("default-whites")); + + Tokenizer p2("test, custom,whites"_ns, ", "); + EXPECT_TRUE(p2.CheckWord("test")); + EXPECT_TRUE(p2.CheckWhite()); + EXPECT_TRUE(p2.CheckWhite()); + EXPECT_TRUE(p2.CheckWord("custom")); + EXPECT_TRUE(p2.CheckWhite()); + EXPECT_TRUE(p2.CheckWord("whites")); + + Tokenizer p3("test, custom, whites-and#word-chars"_ns, ",", "-#"); + EXPECT_TRUE(p3.CheckWord("test")); + EXPECT_TRUE(p3.CheckWhite()); + EXPECT_FALSE(p3.CheckWhite()); + EXPECT_TRUE(p3.CheckChar(' ')); + EXPECT_TRUE(p3.CheckWord("custom")); + EXPECT_TRUE(p3.CheckWhite()); + EXPECT_FALSE(p3.CheckWhite()); + EXPECT_TRUE(p3.CheckChar(' ')); + EXPECT_TRUE(p3.CheckWord("whites-and#word-chars")); +} + +TEST(Tokenizer, ShortcutChecks) +{ + Tokenizer p("test1 test2,123"); + + nsAutoCString test1; + nsDependentCSubstring test2; + char comma; + uint32_t integer; + + EXPECT_TRUE(p.ReadWord(test1)); + EXPECT_TRUE(test1 == "test1"); + p.SkipWhites(); + EXPECT_TRUE(p.ReadWord(test2)); + EXPECT_TRUE(test2 == "test2"); + EXPECT_TRUE(p.ReadChar(&comma)); + EXPECT_TRUE(comma == ','); + EXPECT_TRUE(p.ReadInteger(&integer)); + EXPECT_TRUE(integer == 123); + EXPECT_TRUE(p.CheckEOF()); +} + +static bool ABChar(const char aChar) { return aChar == 'a' || aChar == 'b'; } + +TEST(Tokenizer, ReadCharClassified) +{ + Tokenizer p("abc"); + + char c; + EXPECT_TRUE(p.ReadChar(ABChar, &c)); + EXPECT_TRUE(c == 'a'); + EXPECT_TRUE(p.ReadChar(ABChar, &c)); + EXPECT_TRUE(c == 'b'); + EXPECT_FALSE(p.ReadChar(ABChar, &c)); + nsDependentCSubstring w; + EXPECT_TRUE(p.ReadWord(w)); + EXPECT_TRUE(w == "c"); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, ClaimSubstring) +{ + Tokenizer p(" abc "); + + EXPECT_TRUE(p.CheckWhite()); + + p.Record(); + EXPECT_TRUE(p.CheckWord("abc")); + nsDependentCSubstring v; + p.Claim(v, Tokenizer::INCLUDE_LAST); + EXPECT_TRUE(v == "abc"); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, Fragment) +{ + const char str[] = "ab;cd:10 "; + Tokenizer p(str); + nsDependentCSubstring f; + + Tokenizer::Token t1, t2; + + EXPECT_TRUE(p.Next(t1)); + EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD); + EXPECT_TRUE(t1.Fragment() == "ab"); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[0]); + + p.Rollback(); + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2)); + EXPECT_TRUE(t2.Fragment() == "ab"); + EXPECT_TRUE(t2.Fragment().BeginReading() == &str[0]); + + EXPECT_TRUE(p.Next(t1)); + EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR); + EXPECT_TRUE(t1.Fragment() == ";"); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[2]); + + p.Rollback(); + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2)); + EXPECT_TRUE(t2.Fragment() == ";"); + EXPECT_TRUE(t2.Fragment().BeginReading() == &str[2]); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2)); + EXPECT_TRUE(t2.Fragment() == "cd"); + EXPECT_TRUE(t2.Fragment().BeginReading() == &str[3]); + + p.Rollback(); + EXPECT_TRUE(p.Next(t1)); + EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD); + EXPECT_TRUE(t1.Fragment() == "cd"); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[3]); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2)); + EXPECT_TRUE(t2.Fragment() == ":"); + EXPECT_TRUE(t2.Fragment().BeginReading() == &str[5]); + + p.Rollback(); + EXPECT_TRUE(p.Next(t1)); + EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR); + EXPECT_TRUE(t1.Fragment() == ":"); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[5]); + + EXPECT_TRUE(p.Next(t1)); + EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_INTEGER); + EXPECT_TRUE(t1.Fragment() == "10"); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[6]); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WS, t2)); + EXPECT_TRUE(t2.Fragment() == " "); + EXPECT_TRUE(t2.Fragment().BeginReading() == &str[8]); + + EXPECT_TRUE(p.Check(Tokenizer::TOKEN_EOF, t1)); + EXPECT_TRUE(t1.Fragment() == ""); + EXPECT_TRUE(t1.Fragment().BeginReading() == &str[9]); +} + +TEST(Tokenizer, SkipWhites) +{ + Tokenizer p("Text1 \nText2 \nText3\n Text4\n "); + + EXPECT_TRUE(p.CheckWord("Text1")); + p.SkipWhites(); + EXPECT_TRUE(p.CheckEOL()); + + EXPECT_TRUE(p.CheckWord("Text2")); + p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE); + + EXPECT_TRUE(p.CheckWord("Text3")); + p.SkipWhites(); + EXPECT_TRUE(p.CheckEOL()); + p.SkipWhites(); + + EXPECT_TRUE(p.CheckWord("Text4")); + p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, SkipCustomWhites) +{ + Tokenizer p("Text1 \n\r\t.Text2 \n\r\t.", " \n\r\t."); + + EXPECT_TRUE(p.CheckWord("Text1")); + p.SkipWhites(); + EXPECT_TRUE(p.CheckWord("Text2")); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckWhite()); + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, IntegerReading) +{ +#define INT_6_BITS 64U +#define INT_30_BITS 1073741824UL +#define INT_32_BITS 4294967295UL +#define INT_50_BITS 1125899906842624ULL +#define STR_INT_MORE_THAN_64_BITS "922337203685477580899" + + { + Tokenizer p(MOZ_STRINGIFY(INT_6_BITS)); + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + EXPECT_TRUE(p.ReadInteger(&u8)); + EXPECT_TRUE(u8 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&u16)); + EXPECT_TRUE(u16 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&u32)); + EXPECT_TRUE(u32 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&u64)); + EXPECT_TRUE(u64 == INT_6_BITS); + + p.Rollback(); + + int8_t s8; + int16_t s16; + int32_t s32; + int64_t s64; + EXPECT_TRUE(p.ReadInteger(&s8)); + EXPECT_TRUE(s8 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&s16)); + EXPECT_TRUE(s16 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&s32)); + EXPECT_TRUE(s32 == INT_6_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&s64)); + EXPECT_TRUE(s64 == INT_6_BITS); + + EXPECT_TRUE(p.CheckWord("U")); + EXPECT_TRUE(p.CheckEOF()); + } + + { + Tokenizer p(MOZ_STRINGIFY(INT_30_BITS)); + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + EXPECT_FALSE(p.ReadInteger(&u8)); + EXPECT_FALSE(p.ReadInteger(&u16)); + EXPECT_TRUE(p.ReadInteger(&u32)); + EXPECT_TRUE(u32 == INT_30_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&u64)); + EXPECT_TRUE(u64 == INT_30_BITS); + + p.Rollback(); + + int8_t s8; + int16_t s16; + int32_t s32; + int64_t s64; + EXPECT_FALSE(p.ReadInteger(&s8)); + EXPECT_FALSE(p.ReadInteger(&s16)); + EXPECT_TRUE(p.ReadInteger(&s32)); + EXPECT_TRUE(s32 == INT_30_BITS); + p.Rollback(); + EXPECT_TRUE(p.ReadInteger(&s64)); + EXPECT_TRUE(s64 == INT_30_BITS); + EXPECT_TRUE(p.CheckWord("UL")); + EXPECT_TRUE(p.CheckEOF()); + } + + { + Tokenizer p(MOZ_STRINGIFY(INT_32_BITS)); + uint32_t u32; + int32_t s32; + EXPECT_FALSE(p.ReadInteger(&s32)); + EXPECT_TRUE(p.ReadInteger(&u32)); + EXPECT_TRUE(u32 == INT_32_BITS); + EXPECT_TRUE(p.CheckWord("UL")); + EXPECT_TRUE(p.CheckEOF()); + } + + { + Tokenizer p(MOZ_STRINGIFY(INT_50_BITS)); + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + EXPECT_FALSE(p.ReadInteger(&u8)); + EXPECT_FALSE(p.ReadInteger(&u16)); + EXPECT_FALSE(p.ReadInteger(&u32)); + EXPECT_TRUE(p.ReadInteger(&u64)); + EXPECT_TRUE(u64 == INT_50_BITS); + EXPECT_TRUE(p.CheckWord("ULL")); + EXPECT_TRUE(p.CheckEOF()); + } + + { + Tokenizer p(STR_INT_MORE_THAN_64_BITS); + int64_t i; + EXPECT_FALSE(p.ReadInteger(&i)); + uint64_t u; + EXPECT_FALSE(p.ReadInteger(&u)); + EXPECT_FALSE(p.CheckEOF()); + } +} + +TEST(Tokenizer, ReadUntil) +{ + Tokenizer p("Hello;test 4,"); + nsDependentCSubstring f; + EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(';'), f)); + EXPECT_TRUE(f == "Hello"); + p.Rollback(); + + EXPECT_TRUE( + p.ReadUntil(Tokenizer::Token::Char(';'), f, Tokenizer::INCLUDE_LAST)); + EXPECT_TRUE(f == "Hello;"); + p.Rollback(); + + EXPECT_FALSE(p.ReadUntil(Tokenizer::Token::Char('!'), f)); + EXPECT_TRUE(f == "Hello;test 4,"); + p.Rollback(); + + EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word("test"_ns), f)); + EXPECT_TRUE(f == "Hello;"); + p.Rollback(); + + EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word("test"_ns), f, + Tokenizer::INCLUDE_LAST)); + EXPECT_TRUE(f == "Hello;test"); + EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(','), f)); + EXPECT_TRUE(f == " 4"); +} + +TEST(Tokenizer, SkipUntil) +{ + { + Tokenizer p("test1,test2,,,test3"); + + p.SkipUntil(Tokenizer::Token::Char(',')); + EXPECT_TRUE(p.CheckChar(',')); + EXPECT_TRUE(p.CheckWord("test2")); + + p.SkipUntil(Tokenizer::Token::Char(',')); // must not move + EXPECT_TRUE(p.CheckChar(',')); // check the first comma of the ',,,' string + + p.Rollback(); // moves cursor back to the first comma of the ',,,' string + + p.SkipUntil( + Tokenizer::Token::Char(',')); // must not move, we are on the ',' char + EXPECT_TRUE(p.CheckChar(',')); + EXPECT_TRUE(p.CheckChar(',')); + EXPECT_TRUE(p.CheckChar(',')); + EXPECT_TRUE(p.CheckWord("test3")); + p.Rollback(); + + p.SkipUntil(Tokenizer::Token::Char(',')); + EXPECT_TRUE(p.CheckEOF()); + } + + { + Tokenizer p("test0,test1,test2"); + + p.SkipUntil(Tokenizer::Token::Char(',')); + EXPECT_TRUE(p.CheckChar(',')); + + p.SkipUntil(Tokenizer::Token::Char(',')); + p.Rollback(); + + EXPECT_TRUE(p.CheckWord("test1")); + EXPECT_TRUE(p.CheckChar(',')); + + p.SkipUntil(Tokenizer::Token::Char(',')); + p.Rollback(); + + EXPECT_TRUE(p.CheckWord("test2")); + EXPECT_TRUE(p.CheckEOF()); + } +} + +TEST(Tokenizer, Custom) +{ + Tokenizer p( + "aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2"); + + Tokenizer::Token c1 = + p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE); + Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE); + + // It's expected to NOT FIND the custom token if it's not on an edge + // between other recognizable tokens. + EXPECT_TRUE(p.CheckWord("aaaaaacustom")); + EXPECT_TRUE(p.CheckChar('-')); + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1))); + EXPECT_TRUE(p.CheckEOL()); + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.Check(c1)); + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.Check(c1)); + EXPECT_TRUE(p.CheckChar(',')); + + p.EnableCustomToken(c1, false); + EXPECT_TRUE(p.CheckWord("Custom")); + EXPECT_TRUE(p.CheckChar('-')); + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1))); + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(0))); + EXPECT_TRUE(p.Check(c2)); + EXPECT_TRUE(p.CheckWord("xxxx")); + EXPECT_TRUE(p.CheckChar(',')); + + EXPECT_TRUE(p.CheckWord("CUSTOM")); + EXPECT_TRUE(p.CheckChar('-')); + EXPECT_TRUE(p.Check(Tokenizer::Token::Number(2))); + + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, CustomRaw) +{ + Tokenizer p( + "aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2"); + + Tokenizer::Token c1 = + p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE); + Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE); + + // In this mode it's expected to find all custom tokens among any kind of + // input. + p.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + + Tokenizer::Token t; + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral("aaaaaa")); + + EXPECT_TRUE(p.Check(c1)); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral("\r,")); + + EXPECT_TRUE(p.Check(c1)); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral(",")); + + EXPECT_TRUE(p.Check(c1)); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral(",")); + + EXPECT_TRUE(p.Check(c1)); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral(",00")); + + EXPECT_TRUE(p.Check(c2)); + + EXPECT_TRUE(p.Next(t)); + EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW); + EXPECT_TRUE(t.Fragment().EqualsLiteral("xxxx,CUSTOM-2")); + + EXPECT_TRUE(p.CheckEOF()); +} + +TEST(Tokenizer, Incremental) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Equals(Token::Word("test1"_ns))); + break; + case 2: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 3: + EXPECT_TRUE(t.Equals(Token::Word("test2"_ns))); + break; + case 4: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 5: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 6: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 7: + EXPECT_TRUE(t.Equals(Token::Word("test3"_ns))); + break; + case 8: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + return NS_OK; + }); + + constexpr auto input = "test1,test2,,,test3"_ns; + const auto* cur = input.BeginReading(); + const auto* end = input.EndReading(); + for (; cur < end; ++cur) { + i.FeedInput(nsDependentCSubstring(cur, 1)); + } + + EXPECT_TRUE(test == 6); + i.FinishInput(); + EXPECT_TRUE(test == 8); +} + +TEST(Tokenizer, IncrementalRollback) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Equals(Token::Word("test1"_ns))); + break; + case 2: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 3: + EXPECT_TRUE(t.Equals(Token::Word("test2"_ns))); + i.Rollback(); // so that we get the token again + break; + case 4: + EXPECT_TRUE(t.Equals(Token::Word("test2"_ns))); + break; + case 5: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 6: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 7: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 8: + EXPECT_TRUE(t.Equals(Token::Word("test3"_ns))); + break; + case 9: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + return NS_OK; + }); + + constexpr auto input = "test1,test2,,,test3"_ns; + const auto* cur = input.BeginReading(); + const auto* end = input.EndReading(); + for (; cur < end; ++cur) { + i.FeedInput(nsDependentCSubstring(cur, 1)); + } + + EXPECT_TRUE(test == 7); + i.FinishInput(); + EXPECT_TRUE(test == 9); +} + +TEST(Tokenizer, IncrementalNeedMoreInput) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + Token t2; + switch (++test) { + case 1: + EXPECT_TRUE(t.Equals(Token::Word("a"_ns))); + break; + case 2: + case 3: + case 4: + case 5: + EXPECT_TRUE(t.Equals(Token::Whitespace())); + if (i.Next(t2)) { + EXPECT_TRUE(test == 5); + EXPECT_TRUE(t2.Equals(Token::Word("bb"_ns))); + } else { + EXPECT_TRUE(test < 5); + i.NeedMoreInput(); + } + break; + case 6: + EXPECT_TRUE(t.Equals(Token::Char(','))); + break; + case 7: + EXPECT_TRUE(t.Equals(Token::Word("c"_ns))); + return NS_ERROR_FAILURE; + default: + EXPECT_TRUE(false); + break; + } + + return NS_OK; + }); + + constexpr auto input = "a bb,c"_ns; + const auto* cur = input.BeginReading(); + const auto* end = input.EndReading(); + + nsresult rv; + for (; cur < end; ++cur) { + rv = i.FeedInput(nsDependentCSubstring(cur, 1)); + if (NS_FAILED(rv)) { + break; + } + } + + EXPECT_TRUE(rv == NS_OK); + EXPECT_TRUE(test == 6); + + rv = i.FinishInput(); + EXPECT_TRUE(rv == NS_ERROR_FAILURE); + EXPECT_TRUE(test == 7); +} + +TEST(Tokenizer, IncrementalCustom) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + Token custom; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Equals(custom)); + break; + case 2: + EXPECT_TRUE(t.Equals(Token::Word("bla"_ns))); + break; + case 3: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + return NS_OK; + }, + nullptr, "-"); + + custom = i.AddCustomToken("some-test", Tokenizer::CASE_SENSITIVE); + i.FeedInput("some-"_ns); + EXPECT_TRUE(test == 0); + i.FeedInput("tes"_ns); + EXPECT_TRUE(test == 0); + i.FeedInput("tbla"_ns); + EXPECT_TRUE(test == 1); + i.FinishInput(); + EXPECT_TRUE(test == 3); +} + +TEST(Tokenizer, IncrementalCustomRaw) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + Token custom; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Fragment().EqualsLiteral("test1,")); + break; + case 2: + EXPECT_TRUE(t.Equals(custom)); + break; + case 3: + EXPECT_TRUE(t.Fragment().EqualsLiteral("!,,test3")); + i.Rollback(); + i.SetTokenizingMode(Tokenizer::Mode::FULL); + break; + case 4: + EXPECT_TRUE(t.Equals(Token::Char('!'))); + i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + break; + case 5: + EXPECT_TRUE(t.Fragment().EqualsLiteral(",,test3")); + break; + case 6: + EXPECT_TRUE(t.Equals(custom)); + break; + case 7: + EXPECT_TRUE(t.Fragment().EqualsLiteral("tes")); + break; + case 8: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + return NS_OK; + }); + + custom = i.AddCustomToken("test2", Tokenizer::CASE_SENSITIVE); + i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + + constexpr auto input = "test1,test2!,,test3test2tes"_ns; + const auto* cur = input.BeginReading(); + const auto* end = input.EndReading(); + for (; cur < end; ++cur) { + i.FeedInput(nsDependentCSubstring(cur, 1)); + } + + EXPECT_TRUE(test == 6); + i.FinishInput(); + EXPECT_TRUE(test == 8); +} + +TEST(Tokenizer, IncrementalCustomRemove) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + Token custom; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Equals(custom)); + i.RemoveCustomToken(custom); + break; + case 2: + EXPECT_FALSE(t.Equals(custom)); + break; + case 3: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + return NS_OK; + }); + + custom = i.AddCustomToken("custom1", Tokenizer::CASE_SENSITIVE); + + constexpr auto input = "custom1custom1"_ns; + i.FeedInput(input); + EXPECT_TRUE(test == 1); + i.FinishInput(); + EXPECT_TRUE(test == 3); +} + +TEST(Tokenizer, IncrementalBuffering1) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + Token custom; + nsDependentCSubstring observedFragment; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Fragment().EqualsLiteral("012")); + break; + case 2: + EXPECT_TRUE(t.Fragment().EqualsLiteral("3456789")); + break; + case 3: + EXPECT_TRUE(t.Equals(custom)); + break; + case 4: + EXPECT_TRUE(t.Fragment().EqualsLiteral("qwe")); + break; + case 5: + EXPECT_TRUE(t.Fragment().EqualsLiteral("rt")); + break; + case 6: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + + observedFragment.Rebind(t.Fragment().BeginReading(), + t.Fragment().Length()); + return NS_OK; + }, + nullptr, nullptr, 3); + + custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE); + // This externally unused token is added only to check the internal algorithm + // does work correctly as expected when there are two different length tokens. + Unused << i.AddCustomToken("bb", Tokenizer::CASE_SENSITIVE); + i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + + i.FeedInput("01234"_ns); + EXPECT_TRUE(test == 1); + EXPECT_TRUE(observedFragment.EqualsLiteral("012")); + + i.FeedInput("5"_ns); + EXPECT_TRUE(test == 1); + i.FeedInput("6789aa"_ns); + EXPECT_TRUE(test == 2); + EXPECT_TRUE(observedFragment.EqualsLiteral("3456789")); + + i.FeedInput("aqwert"_ns); + EXPECT_TRUE(test == 4); + EXPECT_TRUE(observedFragment.EqualsLiteral("qwe")); + + i.FinishInput(); + EXPECT_TRUE(test == 6); +} + +TEST(Tokenizer, IncrementalBuffering2) +{ + using Token = IncrementalTokenizer::Token; + + int test = 0; + Token custom; + IncrementalTokenizer i( + [&](Token const& t, IncrementalTokenizer& i) -> nsresult { + switch (++test) { + case 1: + EXPECT_TRUE(t.Fragment().EqualsLiteral("01")); + break; + case 2: + EXPECT_TRUE(t.Fragment().EqualsLiteral("234567")); + break; + case 3: + EXPECT_TRUE(t.Fragment().EqualsLiteral("89")); + break; + case 4: + EXPECT_TRUE(t.Equals(custom)); + break; + case 5: + EXPECT_TRUE(t.Fragment().EqualsLiteral("qwert")); + break; + case 6: + EXPECT_TRUE(t.Equals(Token::EndOfFile())); + break; + } + return NS_OK; + }, + nullptr, nullptr, 3); + + custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE); + // This externally unused token is added only to check the internal algorithm + // does work correctly as expected when there are two different length tokens. + Unused << i.AddCustomToken("bbbbb", Tokenizer::CASE_SENSITIVE); + i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + + i.FeedInput("01234"_ns); + EXPECT_TRUE(test == 0); + i.FeedInput("5"_ns); + EXPECT_TRUE(test == 1); + i.FeedInput("6789aa"_ns); + EXPECT_TRUE(test == 2); + i.FeedInput("aqwert"_ns); + EXPECT_TRUE(test == 4); + i.FinishInput(); + EXPECT_TRUE(test == 6); +} + +TEST(Tokenizer, RecordAndReadUntil) +{ + Tokenizer t("aaaa,bbbb"); + t.SkipWhites(); + nsDependentCSubstring subject; + + EXPECT_TRUE(t.ReadUntil(mozilla::Tokenizer::Token::Char(','), subject)); + EXPECT_FALSE(t.CheckChar(',')); + EXPECT_TRUE(subject.Length() == 4); + EXPECT_TRUE(subject == "aaaa"); + + EXPECT_FALSE(t.ReadUntil(mozilla::Tokenizer::Token::Char(','), subject)); + EXPECT_TRUE(subject.Length() == 4); + EXPECT_TRUE(subject == "bbbb"); + + EXPECT_FALSE(t.ReadUntil(mozilla::Tokenizer::Token::Char(','), subject)); + EXPECT_TRUE(subject.Length() == 0); + + EXPECT_TRUE(t.CheckEOF()); +} + +TEST(Tokenizer, ReadIntegers) +{ + // Make sure that adding dash (the 'minus' sign) as an additional char + // doesn't break reading negative numbers. + Tokenizer t("100,-100,200,-200,4294967295,-4294967295,-2147483647", nullptr, + "-"); + + uint32_t unsigned_value32; + int32_t signed_value32; + int64_t signed_value64; + + // "100," + EXPECT_TRUE(t.ReadInteger(&unsigned_value32)); + EXPECT_TRUE(unsigned_value32 == 100); + EXPECT_TRUE(t.CheckChar(',')); + + // "-100," + EXPECT_FALSE(t.ReadInteger(&unsigned_value32)); + EXPECT_FALSE(t.CheckChar(',')); + + EXPECT_TRUE(t.ReadSignedInteger(&signed_value32)); + EXPECT_TRUE(signed_value32 == -100); + EXPECT_TRUE(t.CheckChar(',')); + + // "200," + EXPECT_TRUE(t.ReadSignedInteger(&signed_value32)); + EXPECT_TRUE(signed_value32 == 200); + EXPECT_TRUE(t.CheckChar(',')); + + // "-200," + EXPECT_TRUE(t.ReadSignedInteger(&signed_value32)); + EXPECT_TRUE(signed_value32 == -200); + EXPECT_TRUE(t.CheckChar(',')); + + // "4294967295," + EXPECT_FALSE(t.ReadSignedInteger(&signed_value32)); + EXPECT_FALSE(t.CheckChar(',')); + + EXPECT_TRUE(t.ReadInteger(&unsigned_value32)); + EXPECT_TRUE(unsigned_value32 == 4294967295UL); + EXPECT_TRUE(t.CheckChar(',')); + + // "-4294967295," + EXPECT_FALSE(t.ReadSignedInteger(&signed_value32)); + EXPECT_FALSE(t.CheckChar(',')); + + EXPECT_FALSE(t.ReadInteger(&unsigned_value32)); + EXPECT_FALSE(t.CheckChar(',')); + + EXPECT_TRUE(t.ReadSignedInteger(&signed_value64)); + EXPECT_TRUE(signed_value64 == -4294967295LL); + EXPECT_TRUE(t.CheckChar(',')); + + // "-2147483647" + EXPECT_FALSE(t.ReadInteger(&unsigned_value32)); + EXPECT_FALSE(t.CheckChar(',')); + + EXPECT_TRUE(t.ReadSignedInteger(&signed_value32)); + EXPECT_TRUE(signed_value32 == -2147483647L); + EXPECT_TRUE(t.CheckEOF()); +} + +TEST(Tokenizer, CheckPhrase) +{ + Tokenizer t("foo bar baz"); + + EXPECT_TRUE(t.CheckPhrase("foo ")); + + EXPECT_FALSE(t.CheckPhrase("barr")); + EXPECT_FALSE(t.CheckPhrase("BAR BAZ")); + EXPECT_FALSE(t.CheckPhrase("bar baz ")); + EXPECT_FALSE(t.CheckPhrase("b")); + EXPECT_FALSE(t.CheckPhrase("ba")); + EXPECT_FALSE(t.CheckPhrase("??")); + + EXPECT_TRUE(t.CheckPhrase("bar baz")); + + t.Rollback(); + EXPECT_TRUE(t.CheckPhrase("bar")); + EXPECT_TRUE(t.CheckPhrase(" baz")); + + t.Rollback(); + EXPECT_FALSE(t.CheckPhrase("\tbaz")); + EXPECT_TRUE(t.CheckPhrase(" baz")); + EXPECT_TRUE(t.CheckEOF()); +} -- cgit v1.2.3