diff options
Diffstat (limited to 'src/encoding/xml/xml_test.go')
-rw-r--r-- | src/encoding/xml/xml_test.go | 1423 |
1 files changed, 1423 insertions, 0 deletions
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go new file mode 100644 index 0000000..30fb94d --- /dev/null +++ b/src/encoding/xml/xml_test.go @@ -0,0 +1,1423 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + tests := []struct { + name string + tokens []Token + ok bool + }{ + { + name: "OK", + tokens: []Token{ + start, + start.End(), + }, + ok: true, + }, + { + name: "Malformed", + tokens: []Token{ + start, + StartElement{Name: Name{Local: "bad"}}, + start.End(), + }, + ok: false, + }, + } + for _, tc := range tests { + for _, eof := range []bool{true, false} { + name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) + t.Run(name, func(t *testing.T) { + d := NewTokenDecoder(&toks{ + earlyEOF: eof, + t: tc.tokens, + }) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if tc.ok && err != nil { + t.Fatalf("d.Decode: expected nil error, got %v", err) + } + if _, ok := err.(*SyntaxError); !tc.ok && !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } + } +} + +type toksNil struct { + returnEOF bool + t []Token +} + +func (t *toksNil) Token() (Token, error) { + if len(t.t) == 0 { + if !t.returnEOF { + // Return nil, nil before returning an EOF. It's legal, but + // discouraged. + t.returnEOF = true + return nil, nil + } + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + return tok, nil +} + +func TestDecodeNilToken(t *testing.T) { + for _, strict := range []bool{true, false} { + name := fmt.Sprintf("Strict=%v", strict) + t.Run(name, func(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + bad := StartElement{Name: Name{Local: "bad"}} + d := NewTokenDecoder(&toksNil{ + // Malformed + t: []Token{start, bad, start.End()}, + }) + d.Strict = strict + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } +} + +const testInput = ` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + + "\r\n\t" + ` > + <hello lang="en">World <>'" 白鵬翔</hello> + <query>&何; &is-it;</query> + <goodbye /> + <outer foo:attr="value" xmlns:tag="ns4"> + <inner/> + </outer> + <tag:name> + <![CDATA[Some text here.]]> + </tag:name> +</body><!-- missing final newline -->` + +var testEntity = map[string]string{"何": "What", "is-it": "is it?"} + +var rawTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"", "hello"}}, + CharData("\n "), + StartElement{Name{"", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"", "query"}}, + CharData("\n "), + StartElement{Name{"", "goodbye"}, []Attr{}}, + EndElement{Name{"", "goodbye"}}, + CharData("\n "), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"", "inner"}, []Attr{}}, + EndElement{Name{"", "inner"}}, + CharData("\n "), + EndElement{Name{"", "outer"}}, + CharData("\n "), + StartElement{Name{"tag", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"tag", "name"}}, + CharData("\n"), + EndElement{Name{"", "body"}}, + Comment(" missing final newline "), +} + +var cookedTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"ns2", "hello"}}, + CharData("\n "), + StartElement{Name{"ns2", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"ns2", "query"}}, + CharData("\n "), + StartElement{Name{"ns2", "goodbye"}, []Attr{}}, + EndElement{Name{"ns2", "goodbye"}}, + CharData("\n "), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"ns2", "inner"}, []Attr{}}, + EndElement{Name{"ns2", "inner"}}, + CharData("\n "), + EndElement{Name{"ns2", "outer"}}, + CharData("\n "), + StartElement{Name{"ns3", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"ns3", "name"}}, + CharData("\n"), + EndElement{Name{"ns2", "body"}}, + Comment(" missing final newline "), +} + +const testInputAltEncoding = ` +<?xml version="1.0" encoding="x-testing-uppercase"?> +<TAG>VALUE</TAG>` + +var rawTokensAltEncoding = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("value"), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + +func TestRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + testRawToken(t, d, testInput, rawTokens) +} + +const nonStrictInput = ` +<tag>non&entity</tag> +<tag>&unknown;entity</tag> +<tag>{</tag> +<tag>&#zzz;</tag> +<tag>&なまえ3;</tag> +<tag><-gt;</tag> +<tag>&;</tag> +<tag>&0a;</tag> +` + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&なまえ3;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("<-gt;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&0a;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictInput, nonStrictTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + panic("unreachable") +} + +func TestRawTokenAltEncoding(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + token, err := d.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = d.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { + lastEnd := int64(0) + for i, want := range rawTokens { + start := d.InputOffset() + have, err := d.RawToken() + end := d.InputOffset() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) + } + + // Check that InputOffset returned actual token. + switch { + case start < lastEnd: + t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) + case start >= end: + // Special case: EndElement can be synthesized. + if start == end && end == lastEnd { + break + } + t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) + case end > int64(len(raw)): + t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) + default: + text := raw[start:end] + if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { + t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) + } + } + lastEnd = end + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt ">">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "<">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), + CharData("\n"), +} + +func TestNestedDirectives(t *testing.T) { + d := NewDecoder(strings.NewReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + + for i, want := range cookedTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + d := NewDecoder(strings.NewReader(xmlInput[i])) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +func TestInputLinePos(t *testing.T) { + testInput := `<root> +<?pi + ?> <elt +att += +"val"> +<![CDATA[ +]]><!-- + +--></elt> +</root>` + linePos := [][]int{ + {1, 7}, + {2, 1}, + {3, 4}, + {3, 6}, + {6, 7}, + {7, 1}, + {8, 4}, + {10, 4}, + {10, 10}, + {11, 1}, + {11, 8}, + } + dec := NewDecoder(strings.NewReader(testInput)) + for _, want := range linePos { + if _, err := dec.Token(); err != nil { + t.Errorf("Unexpected error: %v", err) + continue + } + + gotLine, gotCol := dec.InputPos() + if gotLine != want[0] || gotCol != want[1] { + t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = `<allscalars> + <True1>true</True1> + <True2>1</True2> + <False1>false</False1> + <False2>0</False2> + <Int>1</Int> + <Int8>-2</Int8> + <Int16>3</Int16> + <Int32>-4</Int32> + <Int64>5</Int64> + <Uint>6</Uint> + <Uint8>7</Uint8> + <Uint16>8</Uint16> + <Uint32>9</Uint32> + <Uint64>10</Uint64> + <Uintptr>11</Uintptr> + <Float>12.0</Float> + <Float32>13.0</Float32> + <Float64>14.0</Float64> + <String>15</String> + <PtrString>16</PtrString> +</allscalars>` + +func TestAllScalars(t *testing.T) { + var a allScalars + err := Unmarshal([]byte(testScalarsInput), &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + FieldA string +} + +func TestIssue569(t *testing.T) { + data := `<item><FieldA>abcd</FieldA></item>` + var i item + err := Unmarshal([]byte(data), &i) + + if err != nil || i.FieldA != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "<tag attr=azAZ09:-_\t>" + d := NewDecoder(strings.NewReader(data)) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"<p nowrap>", "p", "nowrap"}, + {"<p nowrap >", "p", "nowrap"}, + {"<input checked/>", "input", "checked"}, + {"<input checked />", "input", "checked"}, + } + for _, test := range tests { + d := NewDecoder(strings.NewReader(test[0])) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if tok1.(StartElement).Attr[0].Value != "en" { + t.Error("CopyToken overwrote Attr[0]") + } + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenComment(t *testing.T) { + data := []byte("<!-- some comment -->") + var tok1 Token = Comment(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) != Comment") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "<P>Foo<P>\n\n<P>Bar</>\n" + d := NewDecoder(strings.NewReader(testInput)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { + } + if err != io.EOF { + t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `<test><![CDATA[ &val=foo ]]></test>` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +var characterTests = []struct { + in string + err string +}{ + {"\x12<doc/>", "illegal character code U+0012"}, + {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, + {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, + {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, + {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, + {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, + {"<doc>&hello;</doc>", "invalid character entity &hello;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + d := NewDecoder(strings.NewReader(tt.in)) + var err error + + for err == nil { + _, err = d.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) + } + } +} + +func TestIsInCharacterRange(t *testing.T) { + invalid := []rune{ + utf8.MaxRune + 1, + 0xD800, // surrogate min + 0xDFFF, // surrogate max + -1, + } + for _, r := range invalid { + if isInCharacterRange(r) { + t.Errorf("rune %U considered valid", r) + } + } +} + +var procInstTests = []struct { + input string + expect [2]string +}{ + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) + } + } +} + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> +<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), + CharData("\n"), + Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(strings.Builder) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +} + +func TestIssue5880(t *testing.T) { + type T []byte + data, err := Marshal(T{192, 168, 0, 1}) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !utf8.Valid(data) { + t.Errorf("Marshal generated invalid UTF-8: %x", data) + } +} + +func TestIssue8535(t *testing.T) { + + type ExampleConflict struct { + XMLName Name `xml:"example"` + Link string `xml:"link"` + AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space + } + testCase := `<example> + <title>Example</title> + <link>http://example.com/default</link> <!-- not assigned --> + <link>http://example.com/home</link> <!-- not assigned --> + <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> + </example>` + + var dest ExampleConflict + d := NewDecoder(strings.NewReader(testCase)) + if err := d.Decode(&dest); err != nil { + t.Fatal(err) + } +} + +func TestEncodeXMLNS(t *testing.T) { + testCases := []struct { + f func() ([]byte, error) + want string + ok bool + }{ + {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, + {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, + } + + for i, tc := range testCases { + if b, err := tc.f(); err == nil { + if got, want := string(b), tc.want; got != want { + t.Errorf("%d: got %s, want %s \n", i, got, want) + } + } else { + t.Errorf("%d: marshal failed with %s", i, err) + } + } +} + +func encodeXMLNS1() ([]byte, error) { + + type T struct { + XMLName Name `xml:"Test"` + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &T{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS2() ([]byte, error) { + + type Test struct { + Body string `xml:"http://example.com/ns body"` + } + + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS3() ([]byte, error) { + + type Test struct { + XMLName Name `xml:"http://example.com/ns Test"` + Body string + } + + //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing + // as documentation states + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS4() ([]byte, error) { + + type Test struct { + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &Test{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func TestIssue11405(t *testing.T) { + testCases := []string{ + "<root>", + "<root><foo>", + "<root><foo></foo>", + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) + } + } +} + +func TestIssue12417(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) + } + } +} + +func TestIssue20396(t *testing.T) { + + var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") + + testCases := []struct { + s string + wantErr error + }{ + {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd"/>`, attrError}, + {`<a:te&st xmlns:a="abcd"/>`, attrError}, + {`<a:test xmlns:a="abcd"/>`, nil}, + {`<a:te:st xmlns:a="abcd">1</a:te:st>`, + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, + {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, + {`<a:test xmlns:a="abcd">1</a:test>`, nil}, + } + + var dest string + for _, tc := range testCases { + if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { + if got == nil { + t.Errorf("%s: Unexpected success, want %v", tc.s, want) + } else if want == nil { + t.Errorf("%s: Unexpected error, got %v", tc.s, got) + } else if got.Error() != want.Error() { + t.Errorf("%s: got %v, want %v", tc.s, got, want) + } + } + } +} + +func TestIssue20685(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, + {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, + {`<x:book xmlns:x="abcd">one</y:book>`, false}, + {`<x:book>one</y:book>`, false}, + {`<xbook>one</ybook>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) + } + } +} + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`<br>`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} + +func testRoundTrip(t *testing.T, input string) { + d := NewDecoder(strings.NewReader(input)) + var tokens []Token + var buf bytes.Buffer + e := NewEncoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("invalid input: %v", err) + } + if err := e.EncodeToken(tok); err != nil { + t.Fatalf("failed to re-encode input: %v", err) + } + tokens = append(tokens, CopyToken(tok)) + } + if err := e.Flush(); err != nil { + t.Fatal(err) + } + + d = NewDecoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to decode output: %v", err) + } + if len(tokens) == 0 { + t.Fatalf("unexpected token: %#v", tok) + } + a, b := tokens[0], tok + if !reflect.DeepEqual(a, b) { + t.Fatalf("token mismatch: %#v vs %#v", a, b) + } + tokens = tokens[1:] + } + if len(tokens) > 0 { + t.Fatalf("lost tokens: %#v", tokens) + } +} + +func TestRoundTrip(t *testing.T) { + tests := map[string]string{ + "trailing colon": `<foo abc:="x"></foo>`, + "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, + } + for name, input := range tests { + t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) + } +} + +func TestParseErrors(t *testing.T) { + withDefaultHeader := func(s string) string { + return `<?xml version="1.0" encoding="UTF-8"?>` + s + } + tests := []struct { + src string + err string + }{ + {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, + {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, + {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, + {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, + {withDefaultHeader("\xf1"), `invalid UTF-8`}, + + // Header-related errors. + {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, + + // Cases below are for "no errors". + {withDefaultHeader(`<?ok?>`), ``}, + {withDefaultHeader(`<?ok version="ok"?>`), ``}, + } + + for _, test := range tests { + d := NewDecoder(strings.NewReader(test.src)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if test.err == "" { + if err != io.EOF { + t.Errorf("parse %s: have %q error, expected none", test.src, err) + } + continue + } + if err == nil || err == io.EOF { + t.Errorf("parse %s: have no error, expected a non-nil error", test.src) + continue + } + if !strings.Contains(err.Error(), test.err) { + t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) + continue + } + } +} + +const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> +<br> +<br/><br/> +<br><br> +<br></br> +<BR> +<BR/><BR/> +<Br></Br> +<BR><span id="test">abc</span><br/><br/>` + +func BenchmarkHTMLAutoClose(b *testing.B) { + b.RunParallel(func(p *testing.PB) { + for p.Next() { + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + b.Fatalf("unexpected error: %v", err) + } + } + } + }) +} + +func TestHTMLAutoClose(t *testing.T) { + wantTokens := []Token{ + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "Br"}, []Attr{}}, + EndElement{Name{"", "Br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, + CharData("abc"), + EndElement{Name{"", "span"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + } + + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + var haveTokens []Token + for { + tok, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("unexpected error: %v", err) + } + haveTokens = append(haveTokens, CopyToken(tok)) + } + if len(haveTokens) != len(wantTokens) { + t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) + } + for i, want := range wantTokens { + if i >= len(haveTokens) { + t.Errorf("token[%d] expected %#v, have no token", i, want) + } else { + have := haveTokens[i] + if !reflect.DeepEqual(have, want) { + t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) + } + } + } +} |