summaryrefslogtreecommitdiffstats
path: root/src/encoding/xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/encoding/xml')
-rw-r--r--src/encoding/xml/atom_test.go56
-rw-r--r--src/encoding/xml/example_marshaling_test.go84
-rw-r--r--src/encoding/xml/example_test.go151
-rw-r--r--src/encoding/xml/example_text_marshaling_test.go79
-rw-r--r--src/encoding/xml/marshal.go1131
-rw-r--r--src/encoding/xml/marshal_test.go2591
-rw-r--r--src/encoding/xml/read.go777
-rw-r--r--src/encoding/xml/read_test.go1128
-rw-r--r--src/encoding/xml/typeinfo.go367
-rw-r--r--src/encoding/xml/xml.go2060
-rw-r--r--src/encoding/xml/xml_test.go1479
11 files changed, 9903 insertions, 0 deletions
diff --git a/src/encoding/xml/atom_test.go b/src/encoding/xml/atom_test.go
new file mode 100644
index 0000000..f394dab
--- /dev/null
+++ b/src/encoding/xml/atom_test.go
@@ -0,0 +1,56 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import "time"
+
+var atomValue = &Feed{
+ XMLName: Name{"http://www.w3.org/2005/Atom", "feed"},
+ Title: "Example Feed",
+ Link: []Link{{Href: "http://example.org/"}},
+ Updated: ParseTime("2003-12-13T18:30:02Z"),
+ Author: Person{Name: "John Doe"},
+ ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",
+
+ Entry: []Entry{
+ {
+ Title: "Atom-Powered Robots Run Amok",
+ Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}},
+ ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+ Updated: ParseTime("2003-12-13T18:30:02Z"),
+ Summary: NewText("Some text."),
+ },
+ },
+}
+
+var atomXML = `` +
+ `<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` +
+ `<title>Example Feed</title>` +
+ `<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` +
+ `<link href="http://example.org/"></link>` +
+ `<author><name>John Doe</name><uri></uri><email></email></author>` +
+ `<entry>` +
+ `<title>Atom-Powered Robots Run Amok</title>` +
+ `<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>` +
+ `<link href="http://example.org/2003/12/13/atom03"></link>` +
+ `<updated>2003-12-13T18:30:02Z</updated>` +
+ `<author><name></name><uri></uri><email></email></author>` +
+ `<summary>Some text.</summary>` +
+ `</entry>` +
+ `</feed>`
+
+func ParseTime(str string) time.Time {
+ t, err := time.Parse(time.RFC3339, str)
+ if err != nil {
+ panic(err)
+ }
+ return t
+}
+
+func NewText(text string) Text {
+ return Text{
+ Body: text,
+ }
+}
diff --git a/src/encoding/xml/example_marshaling_test.go b/src/encoding/xml/example_marshaling_test.go
new file mode 100644
index 0000000..9f9e801
--- /dev/null
+++ b/src/encoding/xml/example_marshaling_test.go
@@ -0,0 +1,84 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml_test
+
+import (
+ "encoding/xml"
+ "fmt"
+ "log"
+ "strings"
+)
+
+type Animal int
+
+const (
+ Unknown Animal = iota
+ Gopher
+ Zebra
+)
+
+func (a *Animal) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ var s string
+ if err := d.DecodeElement(&s, &start); err != nil {
+ return err
+ }
+ switch strings.ToLower(s) {
+ default:
+ *a = Unknown
+ case "gopher":
+ *a = Gopher
+ case "zebra":
+ *a = Zebra
+ }
+
+ return nil
+}
+
+func (a Animal) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
+ var s string
+ switch a {
+ default:
+ s = "unknown"
+ case Gopher:
+ s = "gopher"
+ case Zebra:
+ s = "zebra"
+ }
+ return e.EncodeElement(s, start)
+}
+
+func Example_customMarshalXML() {
+ blob := `
+ <animals>
+ <animal>gopher</animal>
+ <animal>armadillo</animal>
+ <animal>zebra</animal>
+ <animal>unknown</animal>
+ <animal>gopher</animal>
+ <animal>bee</animal>
+ <animal>gopher</animal>
+ <animal>zebra</animal>
+ </animals>`
+ var zoo struct {
+ Animals []Animal `xml:"animal"`
+ }
+ if err := xml.Unmarshal([]byte(blob), &zoo); err != nil {
+ log.Fatal(err)
+ }
+
+ census := make(map[Animal]int)
+ for _, animal := range zoo.Animals {
+ census[animal] += 1
+ }
+
+ fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n",
+ census[Gopher], census[Zebra], census[Unknown])
+
+ // Output:
+ // Zoo Census:
+ // * Gophers: 3
+ // * Zebras: 2
+ // * Unknown: 3
+}
diff --git a/src/encoding/xml/example_test.go b/src/encoding/xml/example_test.go
new file mode 100644
index 0000000..21b48de
--- /dev/null
+++ b/src/encoding/xml/example_test.go
@@ -0,0 +1,151 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml_test
+
+import (
+ "encoding/xml"
+ "fmt"
+ "os"
+)
+
+func ExampleMarshalIndent() {
+ type Address struct {
+ City, State string
+ }
+ type Person struct {
+ XMLName xml.Name `xml:"person"`
+ Id int `xml:"id,attr"`
+ FirstName string `xml:"name>first"`
+ LastName string `xml:"name>last"`
+ Age int `xml:"age"`
+ Height float32 `xml:"height,omitempty"`
+ Married bool
+ Address
+ Comment string `xml:",comment"`
+ }
+
+ v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42}
+ v.Comment = " Need more details. "
+ v.Address = Address{"Hanga Roa", "Easter Island"}
+
+ output, err := xml.MarshalIndent(v, " ", " ")
+ if err != nil {
+ fmt.Printf("error: %v\n", err)
+ }
+
+ os.Stdout.Write(output)
+ // Output:
+ // <person id="13">
+ // <name>
+ // <first>John</first>
+ // <last>Doe</last>
+ // </name>
+ // <age>42</age>
+ // <Married>false</Married>
+ // <City>Hanga Roa</City>
+ // <State>Easter Island</State>
+ // <!-- Need more details. -->
+ // </person>
+}
+
+func ExampleEncoder() {
+ type Address struct {
+ City, State string
+ }
+ type Person struct {
+ XMLName xml.Name `xml:"person"`
+ Id int `xml:"id,attr"`
+ FirstName string `xml:"name>first"`
+ LastName string `xml:"name>last"`
+ Age int `xml:"age"`
+ Height float32 `xml:"height,omitempty"`
+ Married bool
+ Address
+ Comment string `xml:",comment"`
+ }
+
+ v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42}
+ v.Comment = " Need more details. "
+ v.Address = Address{"Hanga Roa", "Easter Island"}
+
+ enc := xml.NewEncoder(os.Stdout)
+ enc.Indent(" ", " ")
+ if err := enc.Encode(v); err != nil {
+ fmt.Printf("error: %v\n", err)
+ }
+
+ // Output:
+ // <person id="13">
+ // <name>
+ // <first>John</first>
+ // <last>Doe</last>
+ // </name>
+ // <age>42</age>
+ // <Married>false</Married>
+ // <City>Hanga Roa</City>
+ // <State>Easter Island</State>
+ // <!-- Need more details. -->
+ // </person>
+}
+
+// This example demonstrates unmarshaling an XML excerpt into a value with
+// some preset fields. Note that the Phone field isn't modified and that
+// the XML <Company> element is ignored. Also, the Groups field is assigned
+// considering the element path provided in its tag.
+func ExampleUnmarshal() {
+ type Email struct {
+ Where string `xml:"where,attr"`
+ Addr string
+ }
+ type Address struct {
+ City, State string
+ }
+ type Result struct {
+ XMLName xml.Name `xml:"Person"`
+ Name string `xml:"FullName"`
+ Phone string
+ Email []Email
+ Groups []string `xml:"Group>Value"`
+ Address
+ }
+ v := Result{Name: "none", Phone: "none"}
+
+ data := `
+ <Person>
+ <FullName>Grace R. Emlin</FullName>
+ <Company>Example Inc.</Company>
+ <Email where="home">
+ <Addr>gre@example.com</Addr>
+ </Email>
+ <Email where='work'>
+ <Addr>gre@work.com</Addr>
+ </Email>
+ <Group>
+ <Value>Friends</Value>
+ <Value>Squash</Value>
+ </Group>
+ <City>Hanga Roa</City>
+ <State>Easter Island</State>
+ </Person>
+ `
+ err := xml.Unmarshal([]byte(data), &v)
+ if err != nil {
+ fmt.Printf("error: %v", err)
+ return
+ }
+ fmt.Printf("XMLName: %#v\n", v.XMLName)
+ fmt.Printf("Name: %q\n", v.Name)
+ fmt.Printf("Phone: %q\n", v.Phone)
+ fmt.Printf("Email: %v\n", v.Email)
+ fmt.Printf("Groups: %v\n", v.Groups)
+ fmt.Printf("Address: %v\n", v.Address)
+ // Output:
+ // XMLName: xml.Name{Space:"", Local:"Person"}
+ // Name: "Grace R. Emlin"
+ // Phone: "none"
+ // Email: [{home gre@example.com} {work gre@work.com}]
+ // Groups: [Friends Squash]
+ // Address: {Hanga Roa Easter Island}
+}
diff --git a/src/encoding/xml/example_text_marshaling_test.go b/src/encoding/xml/example_text_marshaling_test.go
new file mode 100644
index 0000000..2549cb1
--- /dev/null
+++ b/src/encoding/xml/example_text_marshaling_test.go
@@ -0,0 +1,79 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml_test
+
+import (
+ "encoding/xml"
+ "fmt"
+ "log"
+ "strings"
+)
+
+type Size int
+
+const (
+ Unrecognized Size = iota
+ Small
+ Large
+)
+
+func (s *Size) UnmarshalText(text []byte) error {
+ switch strings.ToLower(string(text)) {
+ default:
+ *s = Unrecognized
+ case "small":
+ *s = Small
+ case "large":
+ *s = Large
+ }
+ return nil
+}
+
+func (s Size) MarshalText() ([]byte, error) {
+ var name string
+ switch s {
+ default:
+ name = "unrecognized"
+ case Small:
+ name = "small"
+ case Large:
+ name = "large"
+ }
+ return []byte(name), nil
+}
+
+func Example_textMarshalXML() {
+ blob := `
+ <sizes>
+ <size>small</size>
+ <size>regular</size>
+ <size>large</size>
+ <size>unrecognized</size>
+ <size>small</size>
+ <size>normal</size>
+ <size>small</size>
+ <size>large</size>
+ </sizes>`
+ var inventory struct {
+ Sizes []Size `xml:"size"`
+ }
+ if err := xml.Unmarshal([]byte(blob), &inventory); err != nil {
+ log.Fatal(err)
+ }
+
+ counts := make(map[Size]int)
+ for _, size := range inventory.Sizes {
+ counts[size] += 1
+ }
+
+ fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n",
+ counts[Small], counts[Large], counts[Unrecognized])
+
+ // Output:
+ // Inventory Counts:
+ // * Small: 3
+ // * Large: 2
+ // * Unrecognized: 3
+}
diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go
new file mode 100644
index 0000000..05b5542
--- /dev/null
+++ b/src/encoding/xml/marshal.go
@@ -0,0 +1,1131 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "bufio"
+ "bytes"
+ "encoding"
+ "errors"
+ "fmt"
+ "io"
+ "reflect"
+ "strconv"
+ "strings"
+)
+
+const (
+ // Header is a generic XML header suitable for use with the output of [Marshal].
+ // This is not automatically added to any output of this package,
+ // it is provided as a convenience.
+ Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n"
+)
+
+// Marshal returns the XML encoding of v.
+//
+// Marshal handles an array or slice by marshaling each of the elements.
+// Marshal handles a pointer by marshaling the value it points at or, if the
+// pointer is nil, by writing nothing. Marshal handles an interface value by
+// marshaling the value it contains or, if the interface value is nil, by
+// writing nothing. Marshal handles all other data by writing one or more XML
+// elements containing the data.
+//
+// The name for the XML elements is taken from, in order of preference:
+// - the tag on the XMLName field, if the data is a struct
+// - the value of the XMLName field of type [Name]
+// - the tag of the struct field used to obtain the data
+// - the name of the struct field used to obtain the data
+// - the name of the marshaled type
+//
+// The XML element for a struct contains marshaled elements for each of the
+// exported fields of the struct, with these exceptions:
+// - the XMLName field, described above, is omitted.
+// - a field with tag "-" is omitted.
+// - a field with tag "name,attr" becomes an attribute with
+// the given name in the XML element.
+// - a field with tag ",attr" becomes an attribute with the
+// field name in the XML element.
+// - a field with tag ",chardata" is written as character data,
+// not as an XML element.
+// - a field with tag ",cdata" is written as character data
+// wrapped in one or more <![CDATA[ ... ]]> tags, not as an XML element.
+// - a field with tag ",innerxml" is written verbatim, not subject
+// to the usual marshaling procedure.
+// - a field with tag ",comment" is written as an XML comment, not
+// subject to the usual marshaling procedure. It must not contain
+// the "--" string within it.
+// - a field with a tag including the "omitempty" option is omitted
+// if the field value is empty. The empty values are false, 0, any
+// nil pointer or interface value, and any array, slice, map, or
+// string of length zero.
+// - an anonymous struct field is handled as if the fields of its
+// value were part of the outer struct.
+// - a field implementing [Marshaler] is written by calling its MarshalXML
+// method.
+// - a field implementing [encoding.TextMarshaler] is written by encoding the
+// result of its MarshalText method as text.
+//
+// If a field uses a tag "a>b>c", then the element c will be nested inside
+// parent elements a and b. Fields that appear next to each other that name
+// the same parent will be enclosed in one XML element.
+//
+// If the XML name for a struct field is defined by both the field tag and the
+// struct's XMLName field, the names must match.
+//
+// See [MarshalIndent] for an example.
+//
+// Marshal will return an error if asked to marshal a channel, function, or map.
+func Marshal(v any) ([]byte, error) {
+ var b bytes.Buffer
+ enc := NewEncoder(&b)
+ if err := enc.Encode(v); err != nil {
+ return nil, err
+ }
+ if err := enc.Close(); err != nil {
+ return nil, err
+ }
+ return b.Bytes(), nil
+}
+
+// Marshaler is the interface implemented by objects that can marshal
+// themselves into valid XML elements.
+//
+// MarshalXML encodes the receiver as zero or more XML elements.
+// By convention, arrays or slices are typically encoded as a sequence
+// of elements, one per entry.
+// Using start as the element tag is not required, but doing so
+// will enable [Unmarshal] to match the XML elements to the correct
+// struct field.
+// One common implementation strategy is to construct a separate
+// value with a layout corresponding to the desired XML and then
+// to encode it using e.EncodeElement.
+// Another common strategy is to use repeated calls to e.EncodeToken
+// to generate the XML output one token at a time.
+// The sequence of encoded tokens must make up zero or more valid
+// XML elements.
+type Marshaler interface {
+ MarshalXML(e *Encoder, start StartElement) error
+}
+
+// MarshalerAttr is the interface implemented by objects that can marshal
+// themselves into valid XML attributes.
+//
+// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver.
+// Using name as the attribute name is not required, but doing so
+// will enable [Unmarshal] to match the attribute to the correct
+// struct field.
+// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute
+// will be generated in the output.
+// MarshalXMLAttr is used only for struct fields with the
+// "attr" option in the field tag.
+type MarshalerAttr interface {
+ MarshalXMLAttr(name Name) (Attr, error)
+}
+
+// MarshalIndent works like [Marshal], but each XML element begins on a new
+// indented line that starts with prefix and is followed by one or more
+// copies of indent according to the nesting depth.
+func MarshalIndent(v any, prefix, indent string) ([]byte, error) {
+ var b bytes.Buffer
+ enc := NewEncoder(&b)
+ enc.Indent(prefix, indent)
+ if err := enc.Encode(v); err != nil {
+ return nil, err
+ }
+ if err := enc.Close(); err != nil {
+ return nil, err
+ }
+ return b.Bytes(), nil
+}
+
+// An Encoder writes XML data to an output stream.
+type Encoder struct {
+ p printer
+}
+
+// NewEncoder returns a new encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ e := &Encoder{printer{w: bufio.NewWriter(w)}}
+ e.p.encoder = e
+ return e
+}
+
+// Indent sets the encoder to generate XML in which each element
+// begins on a new indented line that starts with prefix and is followed by
+// one or more copies of indent according to the nesting depth.
+func (enc *Encoder) Indent(prefix, indent string) {
+ enc.p.prefix = prefix
+ enc.p.indent = indent
+}
+
+// Encode writes the XML encoding of v to the stream.
+//
+// See the documentation for [Marshal] for details about the conversion
+// of Go values to XML.
+//
+// Encode calls [Encoder.Flush] before returning.
+func (enc *Encoder) Encode(v any) error {
+ err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil)
+ if err != nil {
+ return err
+ }
+ return enc.p.w.Flush()
+}
+
+// EncodeElement writes the XML encoding of v to the stream,
+// using start as the outermost tag in the encoding.
+//
+// See the documentation for [Marshal] for details about the conversion
+// of Go values to XML.
+//
+// EncodeElement calls [Encoder.Flush] before returning.
+func (enc *Encoder) EncodeElement(v any, start StartElement) error {
+ err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start)
+ if err != nil {
+ return err
+ }
+ return enc.p.w.Flush()
+}
+
+var (
+ begComment = []byte("<!--")
+ endComment = []byte("-->")
+ endProcInst = []byte("?>")
+)
+
+// EncodeToken writes the given XML token to the stream.
+// It returns an error if [StartElement] and [EndElement] tokens are not properly matched.
+//
+// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation
+// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked
+// during those), and those will call Flush when finished.
+// Callers that create an Encoder and then invoke EncodeToken directly, without
+// using Encode or EncodeElement, need to call Flush when finished to ensure
+// that the XML is written to the underlying writer.
+//
+// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token
+// in the stream.
+func (enc *Encoder) EncodeToken(t Token) error {
+
+ p := &enc.p
+ switch t := t.(type) {
+ case StartElement:
+ if err := p.writeStart(&t); err != nil {
+ return err
+ }
+ case EndElement:
+ if err := p.writeEnd(t.Name); err != nil {
+ return err
+ }
+ case CharData:
+ escapeText(p, t, false)
+ case Comment:
+ if bytes.Contains(t, endComment) {
+ return fmt.Errorf("xml: EncodeToken of Comment containing --> marker")
+ }
+ p.WriteString("<!--")
+ p.Write(t)
+ p.WriteString("-->")
+ return p.cachedWriteError()
+ case ProcInst:
+ // First token to be encoded which is also a ProcInst with target of xml
+ // is the xml declaration. The only ProcInst where target of xml is allowed.
+ if t.Target == "xml" && p.w.Buffered() != 0 {
+ return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded")
+ }
+ if !isNameString(t.Target) {
+ return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target")
+ }
+ if bytes.Contains(t.Inst, endProcInst) {
+ return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker")
+ }
+ p.WriteString("<?")
+ p.WriteString(t.Target)
+ if len(t.Inst) > 0 {
+ p.WriteByte(' ')
+ p.Write(t.Inst)
+ }
+ p.WriteString("?>")
+ case Directive:
+ if !isValidDirective(t) {
+ return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers")
+ }
+ p.WriteString("<!")
+ p.Write(t)
+ p.WriteString(">")
+ default:
+ return fmt.Errorf("xml: EncodeToken of invalid token type")
+
+ }
+ return p.cachedWriteError()
+}
+
+// isValidDirective reports whether dir is a valid directive text,
+// meaning angle brackets are matched, ignoring comments and strings.
+func isValidDirective(dir Directive) bool {
+ var (
+ depth int
+ inquote uint8
+ incomment bool
+ )
+ for i, c := range dir {
+ switch {
+ case incomment:
+ if c == '>' {
+ if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) {
+ incomment = false
+ }
+ }
+ // Just ignore anything in comment
+ case inquote != 0:
+ if c == inquote {
+ inquote = 0
+ }
+ // Just ignore anything within quotes
+ case c == '\'' || c == '"':
+ inquote = c
+ case c == '<':
+ if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) {
+ incomment = true
+ } else {
+ depth++
+ }
+ case c == '>':
+ if depth == 0 {
+ return false
+ }
+ depth--
+ }
+ }
+ return depth == 0 && inquote == 0 && !incomment
+}
+
+// Flush flushes any buffered XML to the underlying writer.
+// See the [Encoder.EncodeToken] documentation for details about when it is necessary.
+func (enc *Encoder) Flush() error {
+ return enc.p.w.Flush()
+}
+
+// Close the Encoder, indicating that no more data will be written. It flushes
+// any buffered XML to the underlying writer and returns an error if the
+// written XML is invalid (e.g. by containing unclosed elements).
+func (enc *Encoder) Close() error {
+ return enc.p.Close()
+}
+
+type printer struct {
+ w *bufio.Writer
+ encoder *Encoder
+ seq int
+ indent string
+ prefix string
+ depth int
+ indentedIn bool
+ putNewline bool
+ attrNS map[string]string // map prefix -> name space
+ attrPrefix map[string]string // map name space -> prefix
+ prefixes []string
+ tags []Name
+ closed bool
+ err error
+}
+
+// createAttrPrefix finds the name space prefix attribute to use for the given name space,
+// defining a new prefix if necessary. It returns the prefix.
+func (p *printer) createAttrPrefix(url string) string {
+ if prefix := p.attrPrefix[url]; prefix != "" {
+ return prefix
+ }
+
+ // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml"
+ // and must be referred to that way.
+ // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns",
+ // but users should not be trying to use that one directly - that's our job.)
+ if url == xmlURL {
+ return xmlPrefix
+ }
+
+ // Need to define a new name space.
+ if p.attrPrefix == nil {
+ p.attrPrefix = make(map[string]string)
+ p.attrNS = make(map[string]string)
+ }
+
+ // Pick a name. We try to use the final element of the path
+ // but fall back to _.
+ prefix := strings.TrimRight(url, "/")
+ if i := strings.LastIndex(prefix, "/"); i >= 0 {
+ prefix = prefix[i+1:]
+ }
+ if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") {
+ prefix = "_"
+ }
+ // xmlanything is reserved and any variant of it regardless of
+ // case should be matched, so:
+ // (('X'|'x') ('M'|'m') ('L'|'l'))
+ // See Section 2.3 of https://www.w3.org/TR/REC-xml/
+ if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") {
+ prefix = "_" + prefix
+ }
+ if p.attrNS[prefix] != "" {
+ // Name is taken. Find a better one.
+ for p.seq++; ; p.seq++ {
+ if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" {
+ prefix = id
+ break
+ }
+ }
+ }
+
+ p.attrPrefix[url] = prefix
+ p.attrNS[prefix] = url
+
+ p.WriteString(`xmlns:`)
+ p.WriteString(prefix)
+ p.WriteString(`="`)
+ EscapeText(p, []byte(url))
+ p.WriteString(`" `)
+
+ p.prefixes = append(p.prefixes, prefix)
+
+ return prefix
+}
+
+// deleteAttrPrefix removes an attribute name space prefix.
+func (p *printer) deleteAttrPrefix(prefix string) {
+ delete(p.attrPrefix, p.attrNS[prefix])
+ delete(p.attrNS, prefix)
+}
+
+func (p *printer) markPrefix() {
+ p.prefixes = append(p.prefixes, "")
+}
+
+func (p *printer) popPrefix() {
+ for len(p.prefixes) > 0 {
+ prefix := p.prefixes[len(p.prefixes)-1]
+ p.prefixes = p.prefixes[:len(p.prefixes)-1]
+ if prefix == "" {
+ break
+ }
+ p.deleteAttrPrefix(prefix)
+ }
+}
+
+var (
+ marshalerType = reflect.TypeFor[Marshaler]()
+ marshalerAttrType = reflect.TypeFor[MarshalerAttr]()
+ textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]()
+)
+
+// marshalValue writes one or more XML elements representing val.
+// If val was obtained from a struct field, finfo must have its details.
+func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error {
+ if startTemplate != nil && startTemplate.Name.Local == "" {
+ return fmt.Errorf("xml: EncodeElement of StartElement with missing name")
+ }
+
+ if !val.IsValid() {
+ return nil
+ }
+ if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) {
+ return nil
+ }
+
+ // Drill into interfaces and pointers.
+ // This can turn into an infinite loop given a cyclic chain,
+ // but it matches the Go 1 behavior.
+ for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer {
+ if val.IsNil() {
+ return nil
+ }
+ val = val.Elem()
+ }
+
+ kind := val.Kind()
+ typ := val.Type()
+
+ // Check for marshaler.
+ if val.CanInterface() && typ.Implements(marshalerType) {
+ return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate))
+ }
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(marshalerType) {
+ return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate))
+ }
+ }
+
+ // Check for text marshaler.
+ if val.CanInterface() && typ.Implements(textMarshalerType) {
+ return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate))
+ }
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textMarshalerType) {
+ return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate))
+ }
+ }
+
+ // Slices and arrays iterate over the elements. They do not have an enclosing tag.
+ if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 {
+ for i, n := 0, val.Len(); i < n; i++ {
+ if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+
+ tinfo, err := getTypeInfo(typ)
+ if err != nil {
+ return err
+ }
+
+ // Create start element.
+ // Precedence for the XML element name is:
+ // 0. startTemplate
+ // 1. XMLName field in underlying struct;
+ // 2. field name/tag in the struct field; and
+ // 3. type name
+ var start StartElement
+
+ if startTemplate != nil {
+ start.Name = startTemplate.Name
+ start.Attr = append(start.Attr, startTemplate.Attr...)
+ } else if tinfo.xmlname != nil {
+ xmlname := tinfo.xmlname
+ if xmlname.name != "" {
+ start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name
+ } else {
+ fv := xmlname.value(val, dontInitNilPointers)
+ if v, ok := fv.Interface().(Name); ok && v.Local != "" {
+ start.Name = v
+ }
+ }
+ }
+ if start.Name.Local == "" && finfo != nil {
+ start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name
+ }
+ if start.Name.Local == "" {
+ name := typ.Name()
+ if i := strings.IndexByte(name, '['); i >= 0 {
+ // Truncate generic instantiation name. See issue 48318.
+ name = name[:i]
+ }
+ if name == "" {
+ return &UnsupportedTypeError{typ}
+ }
+ start.Name.Local = name
+ }
+
+ // Attributes
+ for i := range tinfo.fields {
+ finfo := &tinfo.fields[i]
+ if finfo.flags&fAttr == 0 {
+ continue
+ }
+ fv := finfo.value(val, dontInitNilPointers)
+
+ if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) {
+ continue
+ }
+
+ if fv.Kind() == reflect.Interface && fv.IsNil() {
+ continue
+ }
+
+ name := Name{Space: finfo.xmlns, Local: finfo.name}
+ if err := p.marshalAttr(&start, name, fv); err != nil {
+ return err
+ }
+ }
+
+ // If an empty name was found, namespace is overridden with an empty space
+ if tinfo.xmlname != nil && start.Name.Space == "" &&
+ tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" &&
+ len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" {
+ start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""})
+ }
+ if err := p.writeStart(&start); err != nil {
+ return err
+ }
+
+ if val.Kind() == reflect.Struct {
+ err = p.marshalStruct(tinfo, val)
+ } else {
+ s, b, err1 := p.marshalSimple(typ, val)
+ if err1 != nil {
+ err = err1
+ } else if b != nil {
+ EscapeText(p, b)
+ } else {
+ p.EscapeString(s)
+ }
+ }
+ if err != nil {
+ return err
+ }
+
+ if err := p.writeEnd(start.Name); err != nil {
+ return err
+ }
+
+ return p.cachedWriteError()
+}
+
+// marshalAttr marshals an attribute with the given name and value, adding to start.Attr.
+func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error {
+ if val.CanInterface() && val.Type().Implements(marshalerAttrType) {
+ attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name)
+ if err != nil {
+ return err
+ }
+ if attr.Name.Local != "" {
+ start.Attr = append(start.Attr, attr)
+ }
+ return nil
+ }
+
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) {
+ attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name)
+ if err != nil {
+ return err
+ }
+ if attr.Name.Local != "" {
+ start.Attr = append(start.Attr, attr)
+ }
+ return nil
+ }
+ }
+
+ if val.CanInterface() && val.Type().Implements(textMarshalerType) {
+ text, err := val.Interface().(encoding.TextMarshaler).MarshalText()
+ if err != nil {
+ return err
+ }
+ start.Attr = append(start.Attr, Attr{name, string(text)})
+ return nil
+ }
+
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textMarshalerType) {
+ text, err := pv.Interface().(encoding.TextMarshaler).MarshalText()
+ if err != nil {
+ return err
+ }
+ start.Attr = append(start.Attr, Attr{name, string(text)})
+ return nil
+ }
+ }
+
+ // Dereference or skip nil pointer, interface values.
+ switch val.Kind() {
+ case reflect.Pointer, reflect.Interface:
+ if val.IsNil() {
+ return nil
+ }
+ val = val.Elem()
+ }
+
+ // Walk slices.
+ if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 {
+ n := val.Len()
+ for i := 0; i < n; i++ {
+ if err := p.marshalAttr(start, name, val.Index(i)); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+
+ if val.Type() == attrType {
+ start.Attr = append(start.Attr, val.Interface().(Attr))
+ return nil
+ }
+
+ s, b, err := p.marshalSimple(val.Type(), val)
+ if err != nil {
+ return err
+ }
+ if b != nil {
+ s = string(b)
+ }
+ start.Attr = append(start.Attr, Attr{name, s})
+ return nil
+}
+
+// defaultStart returns the default start element to use,
+// given the reflect type, field info, and start template.
+func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement {
+ var start StartElement
+ // Precedence for the XML element name is as above,
+ // except that we do not look inside structs for the first field.
+ if startTemplate != nil {
+ start.Name = startTemplate.Name
+ start.Attr = append(start.Attr, startTemplate.Attr...)
+ } else if finfo != nil && finfo.name != "" {
+ start.Name.Local = finfo.name
+ start.Name.Space = finfo.xmlns
+ } else if typ.Name() != "" {
+ start.Name.Local = typ.Name()
+ } else {
+ // Must be a pointer to a named type,
+ // since it has the Marshaler methods.
+ start.Name.Local = typ.Elem().Name()
+ }
+ return start
+}
+
+// marshalInterface marshals a Marshaler interface value.
+func (p *printer) marshalInterface(val Marshaler, start StartElement) error {
+ // Push a marker onto the tag stack so that MarshalXML
+ // cannot close the XML tags that it did not open.
+ p.tags = append(p.tags, Name{})
+ n := len(p.tags)
+
+ err := val.MarshalXML(p.encoder, start)
+ if err != nil {
+ return err
+ }
+
+ // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark.
+ if len(p.tags) > n {
+ return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local)
+ }
+ p.tags = p.tags[:n-1]
+ return nil
+}
+
+// marshalTextInterface marshals a TextMarshaler interface value.
+func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error {
+ if err := p.writeStart(&start); err != nil {
+ return err
+ }
+ text, err := val.MarshalText()
+ if err != nil {
+ return err
+ }
+ EscapeText(p, text)
+ return p.writeEnd(start.Name)
+}
+
+// writeStart writes the given start element.
+func (p *printer) writeStart(start *StartElement) error {
+ if start.Name.Local == "" {
+ return fmt.Errorf("xml: start tag with no name")
+ }
+
+ p.tags = append(p.tags, start.Name)
+ p.markPrefix()
+
+ p.writeIndent(1)
+ p.WriteByte('<')
+ p.WriteString(start.Name.Local)
+
+ if start.Name.Space != "" {
+ p.WriteString(` xmlns="`)
+ p.EscapeString(start.Name.Space)
+ p.WriteByte('"')
+ }
+
+ // Attributes
+ for _, attr := range start.Attr {
+ name := attr.Name
+ if name.Local == "" {
+ continue
+ }
+ p.WriteByte(' ')
+ if name.Space != "" {
+ p.WriteString(p.createAttrPrefix(name.Space))
+ p.WriteByte(':')
+ }
+ p.WriteString(name.Local)
+ p.WriteString(`="`)
+ p.EscapeString(attr.Value)
+ p.WriteByte('"')
+ }
+ p.WriteByte('>')
+ return nil
+}
+
+func (p *printer) writeEnd(name Name) error {
+ if name.Local == "" {
+ return fmt.Errorf("xml: end tag with no name")
+ }
+ if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" {
+ return fmt.Errorf("xml: end tag </%s> without start tag", name.Local)
+ }
+ if top := p.tags[len(p.tags)-1]; top != name {
+ if top.Local != name.Local {
+ return fmt.Errorf("xml: end tag </%s> does not match start tag <%s>", name.Local, top.Local)
+ }
+ return fmt.Errorf("xml: end tag </%s> in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space)
+ }
+ p.tags = p.tags[:len(p.tags)-1]
+
+ p.writeIndent(-1)
+ p.WriteByte('<')
+ p.WriteByte('/')
+ p.WriteString(name.Local)
+ p.WriteByte('>')
+ p.popPrefix()
+ return nil
+}
+
+func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) {
+ switch val.Kind() {
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ return strconv.FormatInt(val.Int(), 10), nil, nil
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ return strconv.FormatUint(val.Uint(), 10), nil, nil
+ case reflect.Float32, reflect.Float64:
+ return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil
+ case reflect.String:
+ return val.String(), nil, nil
+ case reflect.Bool:
+ return strconv.FormatBool(val.Bool()), nil, nil
+ case reflect.Array:
+ if typ.Elem().Kind() != reflect.Uint8 {
+ break
+ }
+ // [...]byte
+ var bytes []byte
+ if val.CanAddr() {
+ bytes = val.Bytes()
+ } else {
+ bytes = make([]byte, val.Len())
+ reflect.Copy(reflect.ValueOf(bytes), val)
+ }
+ return "", bytes, nil
+ case reflect.Slice:
+ if typ.Elem().Kind() != reflect.Uint8 {
+ break
+ }
+ // []byte
+ return "", val.Bytes(), nil
+ }
+ return "", nil, &UnsupportedTypeError{typ}
+}
+
+var ddBytes = []byte("--")
+
+// indirect drills into interfaces and pointers, returning the pointed-at value.
+// If it encounters a nil interface or pointer, indirect returns that nil value.
+// This can turn into an infinite loop given a cyclic chain,
+// but it matches the Go 1 behavior.
+func indirect(vf reflect.Value) reflect.Value {
+ for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer {
+ if vf.IsNil() {
+ return vf
+ }
+ vf = vf.Elem()
+ }
+ return vf
+}
+
+func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
+ s := parentStack{p: p}
+ for i := range tinfo.fields {
+ finfo := &tinfo.fields[i]
+ if finfo.flags&fAttr != 0 {
+ continue
+ }
+ vf := finfo.value(val, dontInitNilPointers)
+ if !vf.IsValid() {
+ // The field is behind an anonymous struct field that's
+ // nil. Skip it.
+ continue
+ }
+
+ switch finfo.flags & fMode {
+ case fCDATA, fCharData:
+ emit := EscapeText
+ if finfo.flags&fMode == fCDATA {
+ emit = emitCDATA
+ }
+ if err := s.trim(finfo.parents); err != nil {
+ return err
+ }
+ if vf.CanInterface() && vf.Type().Implements(textMarshalerType) {
+ data, err := vf.Interface().(encoding.TextMarshaler).MarshalText()
+ if err != nil {
+ return err
+ }
+ if err := emit(p, data); err != nil {
+ return err
+ }
+ continue
+ }
+ if vf.CanAddr() {
+ pv := vf.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textMarshalerType) {
+ data, err := pv.Interface().(encoding.TextMarshaler).MarshalText()
+ if err != nil {
+ return err
+ }
+ if err := emit(p, data); err != nil {
+ return err
+ }
+ continue
+ }
+ }
+
+ var scratch [64]byte
+ vf = indirect(vf)
+ switch vf.Kind() {
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil {
+ return err
+ }
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil {
+ return err
+ }
+ case reflect.Float32, reflect.Float64:
+ if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil {
+ return err
+ }
+ case reflect.Bool:
+ if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil {
+ return err
+ }
+ case reflect.String:
+ if err := emit(p, []byte(vf.String())); err != nil {
+ return err
+ }
+ case reflect.Slice:
+ if elem, ok := vf.Interface().([]byte); ok {
+ if err := emit(p, elem); err != nil {
+ return err
+ }
+ }
+ }
+ continue
+
+ case fComment:
+ if err := s.trim(finfo.parents); err != nil {
+ return err
+ }
+ vf = indirect(vf)
+ k := vf.Kind()
+ if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) {
+ return fmt.Errorf("xml: bad type for comment field of %s", val.Type())
+ }
+ if vf.Len() == 0 {
+ continue
+ }
+ p.writeIndent(0)
+ p.WriteString("<!--")
+ dashDash := false
+ dashLast := false
+ switch k {
+ case reflect.String:
+ s := vf.String()
+ dashDash = strings.Contains(s, "--")
+ dashLast = s[len(s)-1] == '-'
+ if !dashDash {
+ p.WriteString(s)
+ }
+ case reflect.Slice:
+ b := vf.Bytes()
+ dashDash = bytes.Contains(b, ddBytes)
+ dashLast = b[len(b)-1] == '-'
+ if !dashDash {
+ p.Write(b)
+ }
+ default:
+ panic("can't happen")
+ }
+ if dashDash {
+ return fmt.Errorf(`xml: comments must not contain "--"`)
+ }
+ if dashLast {
+ // "--->" is invalid grammar. Make it "- -->"
+ p.WriteByte(' ')
+ }
+ p.WriteString("-->")
+ continue
+
+ case fInnerXML:
+ vf = indirect(vf)
+ iface := vf.Interface()
+ switch raw := iface.(type) {
+ case []byte:
+ p.Write(raw)
+ continue
+ case string:
+ p.WriteString(raw)
+ continue
+ }
+
+ case fElement, fElement | fAny:
+ if err := s.trim(finfo.parents); err != nil {
+ return err
+ }
+ if len(finfo.parents) > len(s.stack) {
+ if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() {
+ if err := s.push(finfo.parents[len(s.stack):]); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ if err := p.marshalValue(vf, finfo, nil); err != nil {
+ return err
+ }
+ }
+ s.trim(nil)
+ return p.cachedWriteError()
+}
+
+// Write implements io.Writer
+func (p *printer) Write(b []byte) (n int, err error) {
+ if p.closed && p.err == nil {
+ p.err = errors.New("use of closed Encoder")
+ }
+ if p.err == nil {
+ n, p.err = p.w.Write(b)
+ }
+ return n, p.err
+}
+
+// WriteString implements io.StringWriter
+func (p *printer) WriteString(s string) (n int, err error) {
+ if p.closed && p.err == nil {
+ p.err = errors.New("use of closed Encoder")
+ }
+ if p.err == nil {
+ n, p.err = p.w.WriteString(s)
+ }
+ return n, p.err
+}
+
+// WriteByte implements io.ByteWriter
+func (p *printer) WriteByte(c byte) error {
+ if p.closed && p.err == nil {
+ p.err = errors.New("use of closed Encoder")
+ }
+ if p.err == nil {
+ p.err = p.w.WriteByte(c)
+ }
+ return p.err
+}
+
+// Close the Encoder, indicating that no more data will be written. It flushes
+// any buffered XML to the underlying writer and returns an error if the
+// written XML is invalid (e.g. by containing unclosed elements).
+func (p *printer) Close() error {
+ if p.closed {
+ return nil
+ }
+ p.closed = true
+ if err := p.w.Flush(); err != nil {
+ return err
+ }
+ if len(p.tags) > 0 {
+ return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local)
+ }
+ return nil
+}
+
+// return the bufio Writer's cached write error
+func (p *printer) cachedWriteError() error {
+ _, err := p.Write(nil)
+ return err
+}
+
+func (p *printer) writeIndent(depthDelta int) {
+ if len(p.prefix) == 0 && len(p.indent) == 0 {
+ return
+ }
+ if depthDelta < 0 {
+ p.depth--
+ if p.indentedIn {
+ p.indentedIn = false
+ return
+ }
+ p.indentedIn = false
+ }
+ if p.putNewline {
+ p.WriteByte('\n')
+ } else {
+ p.putNewline = true
+ }
+ if len(p.prefix) > 0 {
+ p.WriteString(p.prefix)
+ }
+ if len(p.indent) > 0 {
+ for i := 0; i < p.depth; i++ {
+ p.WriteString(p.indent)
+ }
+ }
+ if depthDelta > 0 {
+ p.depth++
+ p.indentedIn = true
+ }
+}
+
+type parentStack struct {
+ p *printer
+ stack []string
+}
+
+// trim updates the XML context to match the longest common prefix of the stack
+// and the given parents. A closing tag will be written for every parent
+// popped. Passing a zero slice or nil will close all the elements.
+func (s *parentStack) trim(parents []string) error {
+ split := 0
+ for ; split < len(parents) && split < len(s.stack); split++ {
+ if parents[split] != s.stack[split] {
+ break
+ }
+ }
+ for i := len(s.stack) - 1; i >= split; i-- {
+ if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil {
+ return err
+ }
+ }
+ s.stack = s.stack[:split]
+ return nil
+}
+
+// push adds parent elements to the stack and writes open tags.
+func (s *parentStack) push(parents []string) error {
+ for i := 0; i < len(parents); i++ {
+ if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil {
+ return err
+ }
+ }
+ s.stack = append(s.stack, parents...)
+ return nil
+}
+
+// UnsupportedTypeError is returned when [Marshal] encounters a type
+// that cannot be converted into XML.
+type UnsupportedTypeError struct {
+ Type reflect.Type
+}
+
+func (e *UnsupportedTypeError) Error() string {
+ return "xml: unsupported type: " + e.Type.String()
+}
+
+func isEmptyValue(v reflect.Value) bool {
+ switch v.Kind() {
+ case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
+ return v.Len() == 0
+ case reflect.Bool,
+ reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+ reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
+ reflect.Float32, reflect.Float64,
+ reflect.Interface, reflect.Pointer:
+ return v.IsZero()
+ }
+ return false
+}
diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go
new file mode 100644
index 0000000..f6bcc7f
--- /dev/null
+++ b/src/encoding/xml/marshal_test.go
@@ -0,0 +1,2591 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "reflect"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+)
+
+type DriveType int
+
+const (
+ HyperDrive DriveType = iota
+ ImprobabilityDrive
+)
+
+type Passenger struct {
+ Name []string `xml:"name"`
+ Weight float32 `xml:"weight"`
+}
+
+type Ship struct {
+ XMLName struct{} `xml:"spaceship"`
+
+ Name string `xml:"name,attr"`
+ Pilot string `xml:"pilot,attr"`
+ Drive DriveType `xml:"drive"`
+ Age uint `xml:"age"`
+ Passenger []*Passenger `xml:"passenger"`
+ secret string
+}
+
+type NamedType string
+
+type Port struct {
+ XMLName struct{} `xml:"port"`
+ Type string `xml:"type,attr,omitempty"`
+ Comment string `xml:",comment"`
+ Number string `xml:",chardata"`
+}
+
+type Domain struct {
+ XMLName struct{} `xml:"domain"`
+ Country string `xml:",attr,omitempty"`
+ Name []byte `xml:",chardata"`
+ Comment []byte `xml:",comment"`
+}
+
+type Book struct {
+ XMLName struct{} `xml:"book"`
+ Title string `xml:",chardata"`
+}
+
+type Event struct {
+ XMLName struct{} `xml:"event"`
+ Year int `xml:",chardata"`
+}
+
+type Movie struct {
+ XMLName struct{} `xml:"movie"`
+ Length uint `xml:",chardata"`
+}
+
+type Pi struct {
+ XMLName struct{} `xml:"pi"`
+ Approximation float32 `xml:",chardata"`
+}
+
+type Universe struct {
+ XMLName struct{} `xml:"universe"`
+ Visible float64 `xml:",chardata"`
+}
+
+type Particle struct {
+ XMLName struct{} `xml:"particle"`
+ HasMass bool `xml:",chardata"`
+}
+
+type Departure struct {
+ XMLName struct{} `xml:"departure"`
+ When time.Time `xml:",chardata"`
+}
+
+type SecretAgent struct {
+ XMLName struct{} `xml:"agent"`
+ Handle string `xml:"handle,attr"`
+ Identity string
+ Obfuscate string `xml:",innerxml"`
+}
+
+type NestedItems struct {
+ XMLName struct{} `xml:"result"`
+ Items []string `xml:">item"`
+ Item1 []string `xml:"Items>item1"`
+}
+
+type NestedOrder struct {
+ XMLName struct{} `xml:"result"`
+ Field1 string `xml:"parent>c"`
+ Field2 string `xml:"parent>b"`
+ Field3 string `xml:"parent>a"`
+}
+
+type MixedNested struct {
+ XMLName struct{} `xml:"result"`
+ A string `xml:"parent1>a"`
+ B string `xml:"b"`
+ C string `xml:"parent1>parent2>c"`
+ D string `xml:"parent1>d"`
+}
+
+type NilTest struct {
+ A any `xml:"parent1>parent2>a"`
+ B any `xml:"parent1>b"`
+ C any `xml:"parent1>parent2>c"`
+}
+
+type Service struct {
+ XMLName struct{} `xml:"service"`
+ Domain *Domain `xml:"host>domain"`
+ Port *Port `xml:"host>port"`
+ Extra1 any
+ Extra2 any `xml:"host>extra2"`
+}
+
+var nilStruct *Ship
+
+type EmbedA struct {
+ EmbedC
+ EmbedB EmbedB
+ FieldA string
+ embedD
+}
+
+type EmbedB struct {
+ FieldB string
+ *EmbedC
+}
+
+type EmbedC struct {
+ FieldA1 string `xml:"FieldA>A1"`
+ FieldA2 string `xml:"FieldA>A2"`
+ FieldB string
+ FieldC string
+}
+
+type embedD struct {
+ fieldD string
+ FieldE string // Promoted and visible when embedD is embedded.
+}
+
+type NameCasing struct {
+ XMLName struct{} `xml:"casing"`
+ Xy string
+ XY string
+ XyA string `xml:"Xy,attr"`
+ XYA string `xml:"XY,attr"`
+}
+
+type NamePrecedence struct {
+ XMLName Name `xml:"Parent"`
+ FromTag XMLNameWithoutTag `xml:"InTag"`
+ FromNameVal XMLNameWithoutTag
+ FromNameTag XMLNameWithTag
+ InFieldName string
+}
+
+type XMLNameWithTag struct {
+ XMLName Name `xml:"InXMLNameTag"`
+ Value string `xml:",chardata"`
+}
+
+type XMLNameWithoutTag struct {
+ XMLName Name
+ Value string `xml:",chardata"`
+}
+
+type NameInField struct {
+ Foo Name `xml:"ns foo"`
+}
+
+type AttrTest struct {
+ Int int `xml:",attr"`
+ Named int `xml:"int,attr"`
+ Float float64 `xml:",attr"`
+ Uint8 uint8 `xml:",attr"`
+ Bool bool `xml:",attr"`
+ Str string `xml:",attr"`
+ Bytes []byte `xml:",attr"`
+}
+
+type AttrsTest struct {
+ Attrs []Attr `xml:",any,attr"`
+ Int int `xml:",attr"`
+ Named int `xml:"int,attr"`
+ Float float64 `xml:",attr"`
+ Uint8 uint8 `xml:",attr"`
+ Bool bool `xml:",attr"`
+ Str string `xml:",attr"`
+ Bytes []byte `xml:",attr"`
+}
+
+type OmitAttrTest struct {
+ Int int `xml:",attr,omitempty"`
+ Named int `xml:"int,attr,omitempty"`
+ Float float64 `xml:",attr,omitempty"`
+ Uint8 uint8 `xml:",attr,omitempty"`
+ Bool bool `xml:",attr,omitempty"`
+ Str string `xml:",attr,omitempty"`
+ Bytes []byte `xml:",attr,omitempty"`
+ PStr *string `xml:",attr,omitempty"`
+}
+
+type OmitFieldTest struct {
+ Int int `xml:",omitempty"`
+ Named int `xml:"int,omitempty"`
+ Float float64 `xml:",omitempty"`
+ Uint8 uint8 `xml:",omitempty"`
+ Bool bool `xml:",omitempty"`
+ Str string `xml:",omitempty"`
+ Bytes []byte `xml:",omitempty"`
+ PStr *string `xml:",omitempty"`
+ Ptr *PresenceTest `xml:",omitempty"`
+}
+
+type AnyTest struct {
+ XMLName struct{} `xml:"a"`
+ Nested string `xml:"nested>value"`
+ AnyField AnyHolder `xml:",any"`
+}
+
+type AnyOmitTest struct {
+ XMLName struct{} `xml:"a"`
+ Nested string `xml:"nested>value"`
+ AnyField *AnyHolder `xml:",any,omitempty"`
+}
+
+type AnySliceTest struct {
+ XMLName struct{} `xml:"a"`
+ Nested string `xml:"nested>value"`
+ AnyField []AnyHolder `xml:",any"`
+}
+
+type AnyHolder struct {
+ XMLName Name
+ XML string `xml:",innerxml"`
+}
+
+type RecurseA struct {
+ A string
+ B *RecurseB
+}
+
+type RecurseB struct {
+ A *RecurseA
+ B string
+}
+
+type PresenceTest struct {
+ Exists *struct{}
+}
+
+type IgnoreTest struct {
+ PublicSecret string `xml:"-"`
+}
+
+type MyBytes []byte
+
+type Data struct {
+ Bytes []byte
+ Attr []byte `xml:",attr"`
+ Custom MyBytes
+}
+
+type Plain struct {
+ V any
+}
+
+type MyInt int
+
+type EmbedInt struct {
+ MyInt
+}
+
+type Strings struct {
+ X []string `xml:"A>B,omitempty"`
+}
+
+type PointerFieldsTest struct {
+ XMLName Name `xml:"dummy"`
+ Name *string `xml:"name,attr"`
+ Age *uint `xml:"age,attr"`
+ Empty *string `xml:"empty,attr"`
+ Contents *string `xml:",chardata"`
+}
+
+type ChardataEmptyTest struct {
+ XMLName Name `xml:"test"`
+ Contents *string `xml:",chardata"`
+}
+
+type PointerAnonFields struct {
+ *MyInt
+ *NamedType
+}
+
+type MyMarshalerTest struct {
+}
+
+var _ Marshaler = (*MyMarshalerTest)(nil)
+
+func (m *MyMarshalerTest) MarshalXML(e *Encoder, start StartElement) error {
+ e.EncodeToken(start)
+ e.EncodeToken(CharData([]byte("hello world")))
+ e.EncodeToken(EndElement{start.Name})
+ return nil
+}
+
+type MyMarshalerAttrTest struct {
+}
+
+var _ MarshalerAttr = (*MyMarshalerAttrTest)(nil)
+
+func (m *MyMarshalerAttrTest) MarshalXMLAttr(name Name) (Attr, error) {
+ return Attr{name, "hello world"}, nil
+}
+
+func (m *MyMarshalerAttrTest) UnmarshalXMLAttr(attr Attr) error {
+ return nil
+}
+
+type MarshalerStruct struct {
+ Foo MyMarshalerAttrTest `xml:",attr"`
+}
+
+type InnerStruct struct {
+ XMLName Name `xml:"testns outer"`
+}
+
+type OuterStruct struct {
+ InnerStruct
+ IntAttr int `xml:"int,attr"`
+}
+
+type OuterNamedStruct struct {
+ InnerStruct
+ XMLName Name `xml:"outerns test"`
+ IntAttr int `xml:"int,attr"`
+}
+
+type OuterNamedOrderedStruct struct {
+ XMLName Name `xml:"outerns test"`
+ InnerStruct
+ IntAttr int `xml:"int,attr"`
+}
+
+type OuterOuterStruct struct {
+ OuterStruct
+}
+
+type NestedAndChardata struct {
+ AB []string `xml:"A>B"`
+ Chardata string `xml:",chardata"`
+}
+
+type NestedAndComment struct {
+ AB []string `xml:"A>B"`
+ Comment string `xml:",comment"`
+}
+
+type CDataTest struct {
+ Chardata string `xml:",cdata"`
+}
+
+type NestedAndCData struct {
+ AB []string `xml:"A>B"`
+ CDATA string `xml:",cdata"`
+}
+
+func ifaceptr(x any) any {
+ return &x
+}
+
+func stringptr(x string) *string {
+ return &x
+}
+
+type T1 struct{}
+type T2 struct{}
+
+type IndirComment struct {
+ T1 T1
+ Comment *string `xml:",comment"`
+ T2 T2
+}
+
+type DirectComment struct {
+ T1 T1
+ Comment string `xml:",comment"`
+ T2 T2
+}
+
+type IfaceComment struct {
+ T1 T1
+ Comment any `xml:",comment"`
+ T2 T2
+}
+
+type IndirChardata struct {
+ T1 T1
+ Chardata *string `xml:",chardata"`
+ T2 T2
+}
+
+type DirectChardata struct {
+ T1 T1
+ Chardata string `xml:",chardata"`
+ T2 T2
+}
+
+type IfaceChardata struct {
+ T1 T1
+ Chardata any `xml:",chardata"`
+ T2 T2
+}
+
+type IndirCDATA struct {
+ T1 T1
+ CDATA *string `xml:",cdata"`
+ T2 T2
+}
+
+type DirectCDATA struct {
+ T1 T1
+ CDATA string `xml:",cdata"`
+ T2 T2
+}
+
+type IfaceCDATA struct {
+ T1 T1
+ CDATA any `xml:",cdata"`
+ T2 T2
+}
+
+type IndirInnerXML struct {
+ T1 T1
+ InnerXML *string `xml:",innerxml"`
+ T2 T2
+}
+
+type DirectInnerXML struct {
+ T1 T1
+ InnerXML string `xml:",innerxml"`
+ T2 T2
+}
+
+type IfaceInnerXML struct {
+ T1 T1
+ InnerXML any `xml:",innerxml"`
+ T2 T2
+}
+
+type IndirElement struct {
+ T1 T1
+ Element *string
+ T2 T2
+}
+
+type DirectElement struct {
+ T1 T1
+ Element string
+ T2 T2
+}
+
+type IfaceElement struct {
+ T1 T1
+ Element any
+ T2 T2
+}
+
+type IndirOmitEmpty struct {
+ T1 T1
+ OmitEmpty *string `xml:",omitempty"`
+ T2 T2
+}
+
+type DirectOmitEmpty struct {
+ T1 T1
+ OmitEmpty string `xml:",omitempty"`
+ T2 T2
+}
+
+type IfaceOmitEmpty struct {
+ T1 T1
+ OmitEmpty any `xml:",omitempty"`
+ T2 T2
+}
+
+type IndirAny struct {
+ T1 T1
+ Any *string `xml:",any"`
+ T2 T2
+}
+
+type DirectAny struct {
+ T1 T1
+ Any string `xml:",any"`
+ T2 T2
+}
+
+type IfaceAny struct {
+ T1 T1
+ Any any `xml:",any"`
+ T2 T2
+}
+
+type Generic[T any] struct {
+ X T
+}
+
+var (
+ nameAttr = "Sarah"
+ ageAttr = uint(12)
+ contentsAttr = "lorem ipsum"
+ empty = ""
+)
+
+// Unless explicitly stated as such (or *Plain), all of the
+// tests below are two-way tests. When introducing new tests,
+// please try to make them two-way as well to ensure that
+// marshaling and unmarshaling are as symmetrical as feasible.
+var marshalTests = []struct {
+ Value any
+ ExpectXML string
+ MarshalOnly bool
+ MarshalError string
+ UnmarshalOnly bool
+ UnmarshalError string
+}{
+ // Test nil marshals to nothing
+ {Value: nil, ExpectXML: ``, MarshalOnly: true},
+ {Value: nilStruct, ExpectXML: ``, MarshalOnly: true},
+
+ // Test value types
+ {Value: &Plain{true}, ExpectXML: `<Plain><V>true</V></Plain>`},
+ {Value: &Plain{false}, ExpectXML: `<Plain><V>false</V></Plain>`},
+ {Value: &Plain{int(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{int8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{int16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{int32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{uint(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{uint8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{uint16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{uint32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`},
+ {Value: &Plain{float32(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`},
+ {Value: &Plain{float64(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`},
+ {Value: &Plain{uintptr(0xFFDD)}, ExpectXML: `<Plain><V>65501</V></Plain>`},
+ {Value: &Plain{"gopher"}, ExpectXML: `<Plain><V>gopher</V></Plain>`},
+ {Value: &Plain{[]byte("gopher")}, ExpectXML: `<Plain><V>gopher</V></Plain>`},
+ {Value: &Plain{"</>"}, ExpectXML: `<Plain><V>&lt;/&gt;</V></Plain>`},
+ {Value: &Plain{[]byte("</>")}, ExpectXML: `<Plain><V>&lt;/&gt;</V></Plain>`},
+ {Value: &Plain{[3]byte{'<', '/', '>'}}, ExpectXML: `<Plain><V>&lt;/&gt;</V></Plain>`},
+ {Value: &Plain{NamedType("potato")}, ExpectXML: `<Plain><V>potato</V></Plain>`},
+ {Value: &Plain{[]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`},
+ {Value: &Plain{[3]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`},
+ {Value: ifaceptr(true), MarshalOnly: true, ExpectXML: `<bool>true</bool>`},
+
+ // Test time.
+ {
+ Value: &Plain{time.Unix(1e9, 123456789).UTC()},
+ ExpectXML: `<Plain><V>2001-09-09T01:46:40.123456789Z</V></Plain>`,
+ },
+
+ // A pointer to struct{} may be used to test for an element's presence.
+ {
+ Value: &PresenceTest{new(struct{})},
+ ExpectXML: `<PresenceTest><Exists></Exists></PresenceTest>`,
+ },
+ {
+ Value: &PresenceTest{},
+ ExpectXML: `<PresenceTest></PresenceTest>`,
+ },
+
+ // A []byte field is only nil if the element was not found.
+ {
+ Value: &Data{},
+ ExpectXML: `<Data></Data>`,
+ UnmarshalOnly: true,
+ },
+ {
+ Value: &Data{Bytes: []byte{}, Custom: MyBytes{}, Attr: []byte{}},
+ ExpectXML: `<Data Attr=""><Bytes></Bytes><Custom></Custom></Data>`,
+ UnmarshalOnly: true,
+ },
+
+ // Check that []byte works, including named []byte types.
+ {
+ Value: &Data{Bytes: []byte("ab"), Custom: MyBytes("cd"), Attr: []byte{'v'}},
+ ExpectXML: `<Data Attr="v"><Bytes>ab</Bytes><Custom>cd</Custom></Data>`,
+ },
+
+ // Test innerxml
+ {
+ Value: &SecretAgent{
+ Handle: "007",
+ Identity: "James Bond",
+ Obfuscate: "<redacted/>",
+ },
+ ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &SecretAgent{
+ Handle: "007",
+ Identity: "James Bond",
+ Obfuscate: "<Identity>James Bond</Identity><redacted/>",
+ },
+ ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`,
+ UnmarshalOnly: true,
+ },
+
+ // Test structs
+ {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `<port type="ssl">443</port>`},
+ {Value: &Port{Number: "443"}, ExpectXML: `<port>443</port>`},
+ {Value: &Port{Type: "<unix>"}, ExpectXML: `<port type="&lt;unix&gt;"></port>`},
+ {Value: &Port{Number: "443", Comment: "https"}, ExpectXML: `<port><!--https-->443</port>`},
+ {Value: &Port{Number: "443", Comment: "add space-"}, ExpectXML: `<port><!--add space- -->443</port>`, MarshalOnly: true},
+ {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&amp;friends</domain>`},
+ {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`},
+ {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride &amp; Prejudice</book>`},
+ {Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`},
+ {Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`},
+ {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`},
+ {Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`},
+ {Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`},
+ {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`},
+ {Value: atomValue, ExpectXML: atomXML},
+ {Value: &Generic[int]{1}, ExpectXML: `<Generic><X>1</X></Generic>`},
+ {
+ Value: &Ship{
+ Name: "Heart of Gold",
+ Pilot: "Computer",
+ Age: 1,
+ Drive: ImprobabilityDrive,
+ Passenger: []*Passenger{
+ {
+ Name: []string{"Zaphod", "Beeblebrox"},
+ Weight: 7.25,
+ },
+ {
+ Name: []string{"Trisha", "McMillen"},
+ Weight: 5.5,
+ },
+ {
+ Name: []string{"Ford", "Prefect"},
+ Weight: 7,
+ },
+ {
+ Name: []string{"Arthur", "Dent"},
+ Weight: 6.75,
+ },
+ },
+ },
+ ExpectXML: `<spaceship name="Heart of Gold" pilot="Computer">` +
+ `<drive>` + strconv.Itoa(int(ImprobabilityDrive)) + `</drive>` +
+ `<age>1</age>` +
+ `<passenger>` +
+ `<name>Zaphod</name>` +
+ `<name>Beeblebrox</name>` +
+ `<weight>7.25</weight>` +
+ `</passenger>` +
+ `<passenger>` +
+ `<name>Trisha</name>` +
+ `<name>McMillen</name>` +
+ `<weight>5.5</weight>` +
+ `</passenger>` +
+ `<passenger>` +
+ `<name>Ford</name>` +
+ `<name>Prefect</name>` +
+ `<weight>7</weight>` +
+ `</passenger>` +
+ `<passenger>` +
+ `<name>Arthur</name>` +
+ `<name>Dent</name>` +
+ `<weight>6.75</weight>` +
+ `</passenger>` +
+ `</spaceship>`,
+ },
+
+ // Test a>b
+ {
+ Value: &NestedItems{Items: nil, Item1: nil},
+ ExpectXML: `<result>` +
+ `<Items>` +
+ `</Items>` +
+ `</result>`,
+ },
+ {
+ Value: &NestedItems{Items: []string{}, Item1: []string{}},
+ ExpectXML: `<result>` +
+ `<Items>` +
+ `</Items>` +
+ `</result>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &NestedItems{Items: nil, Item1: []string{"A"}},
+ ExpectXML: `<result>` +
+ `<Items>` +
+ `<item1>A</item1>` +
+ `</Items>` +
+ `</result>`,
+ },
+ {
+ Value: &NestedItems{Items: []string{"A", "B"}, Item1: nil},
+ ExpectXML: `<result>` +
+ `<Items>` +
+ `<item>A</item>` +
+ `<item>B</item>` +
+ `</Items>` +
+ `</result>`,
+ },
+ {
+ Value: &NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}},
+ ExpectXML: `<result>` +
+ `<Items>` +
+ `<item>A</item>` +
+ `<item>B</item>` +
+ `<item1>C</item1>` +
+ `</Items>` +
+ `</result>`,
+ },
+ {
+ Value: &NestedOrder{Field1: "C", Field2: "B", Field3: "A"},
+ ExpectXML: `<result>` +
+ `<parent>` +
+ `<c>C</c>` +
+ `<b>B</b>` +
+ `<a>A</a>` +
+ `</parent>` +
+ `</result>`,
+ },
+ {
+ Value: &NilTest{A: "A", B: nil, C: "C"},
+ ExpectXML: `<NilTest>` +
+ `<parent1>` +
+ `<parent2><a>A</a></parent2>` +
+ `<parent2><c>C</c></parent2>` +
+ `</parent1>` +
+ `</NilTest>`,
+ MarshalOnly: true, // Uses interface{}
+ },
+ {
+ Value: &MixedNested{A: "A", B: "B", C: "C", D: "D"},
+ ExpectXML: `<result>` +
+ `<parent1><a>A</a></parent1>` +
+ `<b>B</b>` +
+ `<parent1>` +
+ `<parent2><c>C</c></parent2>` +
+ `<d>D</d>` +
+ `</parent1>` +
+ `</result>`,
+ },
+ {
+ Value: &Service{Port: &Port{Number: "80"}},
+ ExpectXML: `<service><host><port>80</port></host></service>`,
+ },
+ {
+ Value: &Service{},
+ ExpectXML: `<service></service>`,
+ },
+ {
+ Value: &Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"},
+ ExpectXML: `<service>` +
+ `<host><port>80</port></host>` +
+ `<Extra1>A</Extra1>` +
+ `<host><extra2>B</extra2></host>` +
+ `</service>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &Service{Port: &Port{Number: "80"}, Extra2: "example"},
+ ExpectXML: `<service>` +
+ `<host><port>80</port></host>` +
+ `<host><extra2>example</extra2></host>` +
+ `</service>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &struct {
+ XMLName struct{} `xml:"space top"`
+ A string `xml:"x>a"`
+ B string `xml:"x>b"`
+ C string `xml:"space x>c"`
+ C1 string `xml:"space1 x>c"`
+ D1 string `xml:"space1 x>d"`
+ }{
+ A: "a",
+ B: "b",
+ C: "c",
+ C1: "c1",
+ D1: "d1",
+ },
+ ExpectXML: `<top xmlns="space">` +
+ `<x><a>a</a><b>b</b><c xmlns="space">c</c>` +
+ `<c xmlns="space1">c1</c>` +
+ `<d xmlns="space1">d1</d>` +
+ `</x>` +
+ `</top>`,
+ },
+ {
+ Value: &struct {
+ XMLName Name
+ A string `xml:"x>a"`
+ B string `xml:"x>b"`
+ C string `xml:"space x>c"`
+ C1 string `xml:"space1 x>c"`
+ D1 string `xml:"space1 x>d"`
+ }{
+ XMLName: Name{
+ Space: "space0",
+ Local: "top",
+ },
+ A: "a",
+ B: "b",
+ C: "c",
+ C1: "c1",
+ D1: "d1",
+ },
+ ExpectXML: `<top xmlns="space0">` +
+ `<x><a>a</a><b>b</b>` +
+ `<c xmlns="space">c</c>` +
+ `<c xmlns="space1">c1</c>` +
+ `<d xmlns="space1">d1</d>` +
+ `</x>` +
+ `</top>`,
+ },
+ {
+ Value: &struct {
+ XMLName struct{} `xml:"top"`
+ B string `xml:"space x>b"`
+ B1 string `xml:"space1 x>b"`
+ }{
+ B: "b",
+ B1: "b1",
+ },
+ ExpectXML: `<top>` +
+ `<x><b xmlns="space">b</b>` +
+ `<b xmlns="space1">b1</b></x>` +
+ `</top>`,
+ },
+
+ // Test struct embedding
+ {
+ Value: &EmbedA{
+ EmbedC: EmbedC{
+ FieldA1: "", // Shadowed by A.A
+ FieldA2: "", // Shadowed by A.A
+ FieldB: "A.C.B",
+ FieldC: "A.C.C",
+ },
+ EmbedB: EmbedB{
+ FieldB: "A.B.B",
+ EmbedC: &EmbedC{
+ FieldA1: "A.B.C.A1",
+ FieldA2: "A.B.C.A2",
+ FieldB: "", // Shadowed by A.B.B
+ FieldC: "A.B.C.C",
+ },
+ },
+ FieldA: "A.A",
+ embedD: embedD{
+ FieldE: "A.D.E",
+ },
+ },
+ ExpectXML: `<EmbedA>` +
+ `<FieldB>A.C.B</FieldB>` +
+ `<FieldC>A.C.C</FieldC>` +
+ `<EmbedB>` +
+ `<FieldB>A.B.B</FieldB>` +
+ `<FieldA>` +
+ `<A1>A.B.C.A1</A1>` +
+ `<A2>A.B.C.A2</A2>` +
+ `</FieldA>` +
+ `<FieldC>A.B.C.C</FieldC>` +
+ `</EmbedB>` +
+ `<FieldA>A.A</FieldA>` +
+ `<FieldE>A.D.E</FieldE>` +
+ `</EmbedA>`,
+ },
+
+ // Anonymous struct pointer field which is nil
+ {
+ Value: &EmbedB{},
+ ExpectXML: `<EmbedB><FieldB></FieldB></EmbedB>`,
+ },
+
+ // Other kinds of nil anonymous fields
+ {
+ Value: &PointerAnonFields{},
+ ExpectXML: `<PointerAnonFields></PointerAnonFields>`,
+ },
+
+ // Test that name casing matters
+ {
+ Value: &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"},
+ ExpectXML: `<casing Xy="mixedA" XY="upperA"><Xy>mixed</Xy><XY>upper</XY></casing>`,
+ },
+
+ // Test the order in which the XML element name is chosen
+ {
+ Value: &NamePrecedence{
+ FromTag: XMLNameWithoutTag{Value: "A"},
+ FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "InXMLName"}, Value: "B"},
+ FromNameTag: XMLNameWithTag{Value: "C"},
+ InFieldName: "D",
+ },
+ ExpectXML: `<Parent>` +
+ `<InTag>A</InTag>` +
+ `<InXMLName>B</InXMLName>` +
+ `<InXMLNameTag>C</InXMLNameTag>` +
+ `<InFieldName>D</InFieldName>` +
+ `</Parent>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &NamePrecedence{
+ XMLName: Name{Local: "Parent"},
+ FromTag: XMLNameWithoutTag{XMLName: Name{Local: "InTag"}, Value: "A"},
+ FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "FromNameVal"}, Value: "B"},
+ FromNameTag: XMLNameWithTag{XMLName: Name{Local: "InXMLNameTag"}, Value: "C"},
+ InFieldName: "D",
+ },
+ ExpectXML: `<Parent>` +
+ `<InTag>A</InTag>` +
+ `<FromNameVal>B</FromNameVal>` +
+ `<InXMLNameTag>C</InXMLNameTag>` +
+ `<InFieldName>D</InFieldName>` +
+ `</Parent>`,
+ UnmarshalOnly: true,
+ },
+
+ // xml.Name works in a plain field as well.
+ {
+ Value: &NameInField{Name{Space: "ns", Local: "foo"}},
+ ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`,
+ },
+ {
+ Value: &NameInField{Name{Space: "ns", Local: "foo"}},
+ ExpectXML: `<NameInField><foo xmlns="ns"><ignore></ignore></foo></NameInField>`,
+ UnmarshalOnly: true,
+ },
+
+ // Marshaling zero xml.Name uses the tag or field name.
+ {
+ Value: &NameInField{},
+ ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`,
+ MarshalOnly: true,
+ },
+
+ // Test attributes
+ {
+ Value: &AttrTest{
+ Int: 8,
+ Named: 9,
+ Float: 23.5,
+ Uint8: 255,
+ Bool: true,
+ Str: "str",
+ Bytes: []byte("byt"),
+ },
+ ExpectXML: `<AttrTest Int="8" int="9" Float="23.5" Uint8="255"` +
+ ` Bool="true" Str="str" Bytes="byt"></AttrTest>`,
+ },
+ {
+ Value: &AttrTest{Bytes: []byte{}},
+ ExpectXML: `<AttrTest Int="0" int="0" Float="0" Uint8="0"` +
+ ` Bool="false" Str="" Bytes=""></AttrTest>`,
+ },
+ {
+ Value: &AttrsTest{
+ Attrs: []Attr{
+ {Name: Name{Local: "Answer"}, Value: "42"},
+ {Name: Name{Local: "Int"}, Value: "8"},
+ {Name: Name{Local: "int"}, Value: "9"},
+ {Name: Name{Local: "Float"}, Value: "23.5"},
+ {Name: Name{Local: "Uint8"}, Value: "255"},
+ {Name: Name{Local: "Bool"}, Value: "true"},
+ {Name: Name{Local: "Str"}, Value: "str"},
+ {Name: Name{Local: "Bytes"}, Value: "byt"},
+ },
+ },
+ ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &AttrsTest{
+ Attrs: []Attr{
+ {Name: Name{Local: "Answer"}, Value: "42"},
+ },
+ Int: 8,
+ Named: 9,
+ Float: 23.5,
+ Uint8: 255,
+ Bool: true,
+ Str: "str",
+ Bytes: []byte("byt"),
+ },
+ ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt"></AttrsTest>`,
+ },
+ {
+ Value: &AttrsTest{
+ Attrs: []Attr{
+ {Name: Name{Local: "Int"}, Value: "0"},
+ {Name: Name{Local: "int"}, Value: "0"},
+ {Name: Name{Local: "Float"}, Value: "0"},
+ {Name: Name{Local: "Uint8"}, Value: "0"},
+ {Name: Name{Local: "Bool"}, Value: "false"},
+ {Name: Name{Local: "Str"}},
+ {Name: Name{Local: "Bytes"}},
+ },
+ Bytes: []byte{},
+ },
+ ExpectXML: `<AttrsTest Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes="" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`,
+ MarshalOnly: true,
+ },
+ {
+ Value: &OmitAttrTest{
+ Int: 8,
+ Named: 9,
+ Float: 23.5,
+ Uint8: 255,
+ Bool: true,
+ Str: "str",
+ Bytes: []byte("byt"),
+ PStr: &empty,
+ },
+ ExpectXML: `<OmitAttrTest Int="8" int="9" Float="23.5" Uint8="255"` +
+ ` Bool="true" Str="str" Bytes="byt" PStr=""></OmitAttrTest>`,
+ },
+ {
+ Value: &OmitAttrTest{},
+ ExpectXML: `<OmitAttrTest></OmitAttrTest>`,
+ },
+
+ // pointer fields
+ {
+ Value: &PointerFieldsTest{Name: &nameAttr, Age: &ageAttr, Contents: &contentsAttr},
+ ExpectXML: `<dummy name="Sarah" age="12">lorem ipsum</dummy>`,
+ MarshalOnly: true,
+ },
+
+ // empty chardata pointer field
+ {
+ Value: &ChardataEmptyTest{},
+ ExpectXML: `<test></test>`,
+ MarshalOnly: true,
+ },
+
+ // omitempty on fields
+ {
+ Value: &OmitFieldTest{
+ Int: 8,
+ Named: 9,
+ Float: 23.5,
+ Uint8: 255,
+ Bool: true,
+ Str: "str",
+ Bytes: []byte("byt"),
+ PStr: &empty,
+ Ptr: &PresenceTest{},
+ },
+ ExpectXML: `<OmitFieldTest>` +
+ `<Int>8</Int>` +
+ `<int>9</int>` +
+ `<Float>23.5</Float>` +
+ `<Uint8>255</Uint8>` +
+ `<Bool>true</Bool>` +
+ `<Str>str</Str>` +
+ `<Bytes>byt</Bytes>` +
+ `<PStr></PStr>` +
+ `<Ptr></Ptr>` +
+ `</OmitFieldTest>`,
+ },
+ {
+ Value: &OmitFieldTest{},
+ ExpectXML: `<OmitFieldTest></OmitFieldTest>`,
+ },
+
+ // Test ",any"
+ {
+ ExpectXML: `<a><nested><value>known</value></nested><other><sub>unknown</sub></other></a>`,
+ Value: &AnyTest{
+ Nested: "known",
+ AnyField: AnyHolder{
+ XMLName: Name{Local: "other"},
+ XML: "<sub>unknown</sub>",
+ },
+ },
+ },
+ {
+ Value: &AnyTest{Nested: "known",
+ AnyField: AnyHolder{
+ XML: "<unknown/>",
+ XMLName: Name{Local: "AnyField"},
+ },
+ },
+ ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`,
+ },
+ {
+ ExpectXML: `<a><nested><value>b</value></nested></a>`,
+ Value: &AnyOmitTest{
+ Nested: "b",
+ },
+ },
+ {
+ ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`,
+ Value: &AnySliceTest{
+ Nested: "b",
+ AnyField: []AnyHolder{
+ {
+ XMLName: Name{Local: "c"},
+ XML: "<d>e</d>",
+ },
+ {
+ XMLName: Name{Space: "f", Local: "g"},
+ XML: "<h>i</h>",
+ },
+ },
+ },
+ },
+ {
+ ExpectXML: `<a><nested><value>b</value></nested></a>`,
+ Value: &AnySliceTest{
+ Nested: "b",
+ },
+ },
+
+ // Test recursive types.
+ {
+ Value: &RecurseA{
+ A: "a1",
+ B: &RecurseB{
+ A: &RecurseA{"a2", nil},
+ B: "b1",
+ },
+ },
+ ExpectXML: `<RecurseA><A>a1</A><B><A><A>a2</A></A><B>b1</B></B></RecurseA>`,
+ },
+
+ // Test ignoring fields via "-" tag
+ {
+ ExpectXML: `<IgnoreTest></IgnoreTest>`,
+ Value: &IgnoreTest{},
+ },
+ {
+ ExpectXML: `<IgnoreTest></IgnoreTest>`,
+ Value: &IgnoreTest{PublicSecret: "can't tell"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IgnoreTest><PublicSecret>ignore me</PublicSecret></IgnoreTest>`,
+ Value: &IgnoreTest{},
+ UnmarshalOnly: true,
+ },
+
+ // Test escaping.
+ {
+ ExpectXML: `<a><nested><value>dquote: &#34;; squote: &#39;; ampersand: &amp;; less: &lt;; greater: &gt;;</value></nested><empty></empty></a>`,
+ Value: &AnyTest{
+ Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
+ AnyField: AnyHolder{XMLName: Name{Local: "empty"}},
+ },
+ },
+ {
+ ExpectXML: `<a><nested><value>newline: &#xA;; cr: &#xD;; tab: &#x9;;</value></nested><AnyField></AnyField></a>`,
+ Value: &AnyTest{
+ Nested: "newline: \n; cr: \r; tab: \t;",
+ AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}},
+ },
+ },
+ {
+ ExpectXML: "<a><nested><value>1\r2\r\n3\n\r4\n5</value></nested></a>",
+ Value: &AnyTest{
+ Nested: "1\n2\n3\n\n4\n5",
+ },
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<EmbedInt><MyInt>42</MyInt></EmbedInt>`,
+ Value: &EmbedInt{
+ MyInt: 42,
+ },
+ },
+ // Test outputting CDATA-wrapped text.
+ {
+ ExpectXML: `<CDataTest></CDataTest>`,
+ Value: &CDataTest{},
+ },
+ {
+ ExpectXML: `<CDataTest><![CDATA[http://example.com/tests/1?foo=1&bar=baz]]></CDataTest>`,
+ Value: &CDataTest{
+ Chardata: "http://example.com/tests/1?foo=1&bar=baz",
+ },
+ },
+ {
+ ExpectXML: `<CDataTest><![CDATA[Literal <![CDATA[Nested]]]]><![CDATA[>!]]></CDataTest>`,
+ Value: &CDataTest{
+ Chardata: "Literal <![CDATA[Nested]]>!",
+ },
+ },
+ {
+ ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`,
+ Value: &CDataTest{
+ Chardata: "<![CDATA[Nested]]> Literal!",
+ },
+ },
+ {
+ ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal! <![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`,
+ Value: &CDataTest{
+ Chardata: "<![CDATA[Nested]]> Literal! <![CDATA[Nested]]> Literal!",
+ },
+ },
+ {
+ ExpectXML: `<CDataTest><![CDATA[<![CDATA[<![CDATA[Nested]]]]><![CDATA[>]]]]><![CDATA[>]]></CDataTest>`,
+ Value: &CDataTest{
+ Chardata: "<![CDATA[<![CDATA[Nested]]>]]>",
+ },
+ },
+
+ // Test omitempty with parent chain; see golang.org/issue/4168.
+ {
+ ExpectXML: `<Strings><A></A></Strings>`,
+ Value: &Strings{},
+ },
+ // Custom marshalers.
+ {
+ ExpectXML: `<MyMarshalerTest>hello world</MyMarshalerTest>`,
+ Value: &MyMarshalerTest{},
+ },
+ {
+ ExpectXML: `<MarshalerStruct Foo="hello world"></MarshalerStruct>`,
+ Value: &MarshalerStruct{},
+ },
+ {
+ ExpectXML: `<outer xmlns="testns" int="10"></outer>`,
+ Value: &OuterStruct{IntAttr: 10},
+ },
+ {
+ ExpectXML: `<test xmlns="outerns" int="10"></test>`,
+ Value: &OuterNamedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10},
+ },
+ {
+ ExpectXML: `<test xmlns="outerns" int="10"></test>`,
+ Value: &OuterNamedOrderedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10},
+ },
+ {
+ ExpectXML: `<outer xmlns="testns" int="10"></outer>`,
+ Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}},
+ },
+ {
+ ExpectXML: `<NestedAndChardata><A><B></B><B></B></A>test</NestedAndChardata>`,
+ Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"},
+ },
+ {
+ ExpectXML: `<NestedAndComment><A><B></B><B></B></A><!--test--></NestedAndComment>`,
+ Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"},
+ },
+ {
+ ExpectXML: `<NestedAndCData><A><B></B><B></B></A><![CDATA[test]]></NestedAndCData>`,
+ Value: &NestedAndCData{AB: make([]string, 2), CDATA: "test"},
+ },
+ // Test pointer indirection in various kinds of fields.
+ // https://golang.org/issue/19063
+ {
+ ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`,
+ Value: &IndirComment{Comment: stringptr("hi")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`,
+ Value: &IndirComment{Comment: stringptr("")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`,
+ Value: &IndirComment{Comment: nil},
+ MarshalError: "xml: bad type for comment field of xml.IndirComment",
+ },
+ {
+ ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`,
+ Value: &IndirComment{Comment: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`,
+ Value: &IfaceComment{Comment: "hi"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`,
+ Value: &IfaceComment{Comment: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`,
+ Value: &IfaceComment{Comment: nil},
+ MarshalError: "xml: bad type for comment field of xml.IfaceComment",
+ },
+ {
+ ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`,
+ Value: &IfaceComment{Comment: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectComment><T1></T1><!--hi--><T2></T2></DirectComment>`,
+ Value: &DirectComment{Comment: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectComment><T1></T1><T2></T2></DirectComment>`,
+ Value: &DirectComment{Comment: string("")},
+ },
+ {
+ ExpectXML: `<IndirChardata><T1></T1>hi<T2></T2></IndirChardata>`,
+ Value: &IndirChardata{Chardata: stringptr("hi")},
+ },
+ {
+ ExpectXML: `<IndirChardata><T1></T1><![CDATA[hi]]><T2></T2></IndirChardata>`,
+ Value: &IndirChardata{Chardata: stringptr("hi")},
+ UnmarshalOnly: true, // marshals without CDATA
+ },
+ {
+ ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`,
+ Value: &IndirChardata{Chardata: stringptr("")},
+ },
+ {
+ ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`,
+ Value: &IndirChardata{Chardata: nil},
+ MarshalOnly: true, // unmarshal leaves Chardata=stringptr("")
+ },
+ {
+ ExpectXML: `<IfaceChardata><T1></T1>hi<T2></T2></IfaceChardata>`,
+ Value: &IfaceChardata{Chardata: string("hi")},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceChardata><T1></T1><![CDATA[hi]]><T2></T2></IfaceChardata>`,
+ Value: &IfaceChardata{Chardata: string("hi")},
+ UnmarshalOnly: true, // marshals without CDATA
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`,
+ Value: &IfaceChardata{Chardata: string("")},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`,
+ Value: &IfaceChardata{Chardata: nil},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<DirectChardata><T1></T1>hi<T2></T2></DirectChardata>`,
+ Value: &DirectChardata{Chardata: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectChardata><T1></T1><![CDATA[hi]]><T2></T2></DirectChardata>`,
+ Value: &DirectChardata{Chardata: string("hi")},
+ UnmarshalOnly: true, // marshals without CDATA
+ },
+ {
+ ExpectXML: `<DirectChardata><T1></T1><T2></T2></DirectChardata>`,
+ Value: &DirectChardata{Chardata: string("")},
+ },
+ {
+ ExpectXML: `<IndirCDATA><T1></T1><![CDATA[hi]]><T2></T2></IndirCDATA>`,
+ Value: &IndirCDATA{CDATA: stringptr("hi")},
+ },
+ {
+ ExpectXML: `<IndirCDATA><T1></T1>hi<T2></T2></IndirCDATA>`,
+ Value: &IndirCDATA{CDATA: stringptr("hi")},
+ UnmarshalOnly: true, // marshals with CDATA
+ },
+ {
+ ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`,
+ Value: &IndirCDATA{CDATA: stringptr("")},
+ },
+ {
+ ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`,
+ Value: &IndirCDATA{CDATA: nil},
+ MarshalOnly: true, // unmarshal leaves CDATA=stringptr("")
+ },
+ {
+ ExpectXML: `<IfaceCDATA><T1></T1><![CDATA[hi]]><T2></T2></IfaceCDATA>`,
+ Value: &IfaceCDATA{CDATA: string("hi")},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceCDATA><T1></T1>hi<T2></T2></IfaceCDATA>`,
+ Value: &IfaceCDATA{CDATA: string("hi")},
+ UnmarshalOnly: true, // marshals with CDATA
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`,
+ Value: &IfaceCDATA{CDATA: string("")},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`,
+ Value: &IfaceCDATA{CDATA: nil},
+ UnmarshalError: "cannot unmarshal into interface {}",
+ },
+ {
+ ExpectXML: `<DirectCDATA><T1></T1><![CDATA[hi]]><T2></T2></DirectCDATA>`,
+ Value: &DirectCDATA{CDATA: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectCDATA><T1></T1>hi<T2></T2></DirectCDATA>`,
+ Value: &DirectCDATA{CDATA: string("hi")},
+ UnmarshalOnly: true, // marshals with CDATA
+ },
+ {
+ ExpectXML: `<DirectCDATA><T1></T1><T2></T2></DirectCDATA>`,
+ Value: &DirectCDATA{CDATA: string("")},
+ },
+ {
+ ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`,
+ Value: &IndirInnerXML{InnerXML: stringptr("<hi/>")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`,
+ Value: &IndirInnerXML{InnerXML: stringptr("")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`,
+ Value: &IndirInnerXML{InnerXML: nil},
+ },
+ {
+ ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`,
+ Value: &IndirInnerXML{InnerXML: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`,
+ Value: &IfaceInnerXML{InnerXML: "<hi/>"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`,
+ Value: &IfaceInnerXML{InnerXML: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`,
+ Value: &IfaceInnerXML{InnerXML: nil},
+ },
+ {
+ ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`,
+ Value: &IfaceInnerXML{InnerXML: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`,
+ Value: &DirectInnerXML{InnerXML: string("<hi/>")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`,
+ Value: &DirectInnerXML{InnerXML: string("<T1></T1><hi/><T2></T2>")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`,
+ Value: &DirectInnerXML{InnerXML: string("")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`,
+ Value: &DirectInnerXML{InnerXML: string("<T1></T1><T2></T2>")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirElement><T1></T1><Element>hi</Element><T2></T2></IndirElement>`,
+ Value: &IndirElement{Element: stringptr("hi")},
+ },
+ {
+ ExpectXML: `<IndirElement><T1></T1><Element></Element><T2></T2></IndirElement>`,
+ Value: &IndirElement{Element: stringptr("")},
+ },
+ {
+ ExpectXML: `<IndirElement><T1></T1><T2></T2></IndirElement>`,
+ Value: &IndirElement{Element: nil},
+ },
+ {
+ ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`,
+ Value: &IfaceElement{Element: "hi"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`,
+ Value: &IfaceElement{Element: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`,
+ Value: &IfaceElement{Element: nil},
+ },
+ {
+ ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`,
+ Value: &IfaceElement{Element: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectElement><T1></T1><Element>hi</Element><T2></T2></DirectElement>`,
+ Value: &DirectElement{Element: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectElement><T1></T1><Element></Element><T2></T2></DirectElement>`,
+ Value: &DirectElement{Element: string("")},
+ },
+ {
+ ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IndirOmitEmpty>`,
+ Value: &IndirOmitEmpty{OmitEmpty: stringptr("hi")},
+ },
+ {
+ // Note: Changed in Go 1.8 to include <OmitEmpty> element (because x.OmitEmpty != nil).
+ ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`,
+ Value: &IndirOmitEmpty{OmitEmpty: stringptr("")},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`,
+ Value: &IndirOmitEmpty{OmitEmpty: stringptr("")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirOmitEmpty><T1></T1><T2></T2></IndirOmitEmpty>`,
+ Value: &IndirOmitEmpty{OmitEmpty: nil},
+ },
+ {
+ ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`,
+ Value: &IfaceOmitEmpty{OmitEmpty: "hi"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`,
+ Value: &IfaceOmitEmpty{OmitEmpty: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`,
+ Value: &IfaceOmitEmpty{OmitEmpty: nil},
+ },
+ {
+ ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`,
+ Value: &IfaceOmitEmpty{OmitEmpty: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></DirectOmitEmpty>`,
+ Value: &DirectOmitEmpty{OmitEmpty: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectOmitEmpty><T1></T1><T2></T2></DirectOmitEmpty>`,
+ Value: &DirectOmitEmpty{OmitEmpty: string("")},
+ },
+ {
+ ExpectXML: `<IndirAny><T1></T1><Any>hi</Any><T2></T2></IndirAny>`,
+ Value: &IndirAny{Any: stringptr("hi")},
+ },
+ {
+ ExpectXML: `<IndirAny><T1></T1><Any></Any><T2></T2></IndirAny>`,
+ Value: &IndirAny{Any: stringptr("")},
+ },
+ {
+ ExpectXML: `<IndirAny><T1></T1><T2></T2></IndirAny>`,
+ Value: &IndirAny{Any: nil},
+ },
+ {
+ ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`,
+ Value: &IfaceAny{Any: "hi"},
+ MarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`,
+ Value: &IfaceAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`,
+ Value: &IfaceAny{Any: nil},
+ },
+ {
+ ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`,
+ Value: &IfaceAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectAny><T1></T1><Any>hi</Any><T2></T2></DirectAny>`,
+ Value: &DirectAny{Any: string("hi")},
+ },
+ {
+ ExpectXML: `<DirectAny><T1></T1><Any></Any><T2></T2></DirectAny>`,
+ Value: &DirectAny{Any: string("")},
+ },
+ {
+ ExpectXML: `<IndirFoo><T1></T1><Foo>hi</Foo><T2></T2></IndirFoo>`,
+ Value: &IndirAny{Any: stringptr("hi")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirFoo><T1></T1><Foo></Foo><T2></T2></IndirFoo>`,
+ Value: &IndirAny{Any: stringptr("")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IndirFoo><T1></T1><T2></T2></IndirFoo>`,
+ Value: &IndirAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceFoo><T1></T1><Foo>hi</Foo><T2></T2></IfaceFoo>`,
+ Value: &IfaceAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`,
+ Value: &IfaceAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`,
+ Value: &IfaceAny{Any: nil},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectFoo><T1></T1><Foo>hi</Foo><T2></T2></DirectFoo>`,
+ Value: &DirectAny{Any: string("hi")},
+ UnmarshalOnly: true,
+ },
+ {
+ ExpectXML: `<DirectFoo><T1></T1><Foo></Foo><T2></T2></DirectFoo>`,
+ Value: &DirectAny{Any: string("")},
+ UnmarshalOnly: true,
+ },
+}
+
+func TestMarshal(t *testing.T) {
+ for idx, test := range marshalTests {
+ if test.UnmarshalOnly {
+ continue
+ }
+
+ t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) {
+ data, err := Marshal(test.Value)
+ if err != nil {
+ if test.MarshalError == "" {
+ t.Errorf("marshal(%#v): %s", test.Value, err)
+ return
+ }
+ if !strings.Contains(err.Error(), test.MarshalError) {
+ t.Errorf("marshal(%#v): %s, want %q", test.Value, err, test.MarshalError)
+ }
+ return
+ }
+ if test.MarshalError != "" {
+ t.Errorf("Marshal succeeded, want error %q", test.MarshalError)
+ return
+ }
+ if got, want := string(data), test.ExpectXML; got != want {
+ if strings.Contains(want, "\n") {
+ t.Errorf("marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", test.Value, got, want)
+ } else {
+ t.Errorf("marshal(%#v):\nhave %#q\nwant %#q", test.Value, got, want)
+ }
+ }
+ })
+ }
+}
+
+type AttrParent struct {
+ X string `xml:"X>Y,attr"`
+}
+
+type BadAttr struct {
+ Name map[string]string `xml:"name,attr"`
+}
+
+var marshalErrorTests = []struct {
+ Value any
+ Err string
+ Kind reflect.Kind
+}{
+ {
+ Value: make(chan bool),
+ Err: "xml: unsupported type: chan bool",
+ Kind: reflect.Chan,
+ },
+ {
+ Value: map[string]string{
+ "question": "What do you get when you multiply six by nine?",
+ "answer": "42",
+ },
+ Err: "xml: unsupported type: map[string]string",
+ Kind: reflect.Map,
+ },
+ {
+ Value: map[*Ship]bool{nil: false},
+ Err: "xml: unsupported type: map[*xml.Ship]bool",
+ Kind: reflect.Map,
+ },
+ {
+ Value: &Domain{Comment: []byte("f--bar")},
+ Err: `xml: comments must not contain "--"`,
+ },
+ // Reject parent chain with attr, never worked; see golang.org/issue/5033.
+ {
+ Value: &AttrParent{},
+ Err: `xml: X>Y chain not valid with attr flag`,
+ },
+ {
+ Value: BadAttr{map[string]string{"X": "Y"}},
+ Err: `xml: unsupported type: map[string]string`,
+ },
+}
+
+var marshalIndentTests = []struct {
+ Value any
+ Prefix string
+ Indent string
+ ExpectXML string
+}{
+ {
+ Value: &SecretAgent{
+ Handle: "007",
+ Identity: "James Bond",
+ Obfuscate: "<redacted/>",
+ },
+ Prefix: "",
+ Indent: "\t",
+ ExpectXML: fmt.Sprintf("<agent handle=\"007\">\n\t<Identity>James Bond</Identity><redacted/>\n</agent>"),
+ },
+}
+
+func TestMarshalErrors(t *testing.T) {
+ for idx, test := range marshalErrorTests {
+ data, err := Marshal(test.Value)
+ if err == nil {
+ t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err)
+ continue
+ }
+ if err.Error() != test.Err {
+ t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err)
+ }
+ if test.Kind != reflect.Invalid {
+ if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind {
+ t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind)
+ }
+ }
+ }
+}
+
+// Do invertibility testing on the various structures that we test
+func TestUnmarshal(t *testing.T) {
+ for i, test := range marshalTests {
+ if test.MarshalOnly {
+ continue
+ }
+ if _, ok := test.Value.(*Plain); ok {
+ continue
+ }
+ if test.ExpectXML == `<top>`+
+ `<x><b xmlns="space">b</b>`+
+ `<b xmlns="space1">b1</b></x>`+
+ `</top>` {
+ // TODO(rogpeppe): re-enable this test in
+ // https://go-review.googlesource.com/#/c/5910/
+ continue
+ }
+
+ vt := reflect.TypeOf(test.Value)
+ dest := reflect.New(vt.Elem()).Interface()
+ err := Unmarshal([]byte(test.ExpectXML), dest)
+
+ t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+ switch fix := dest.(type) {
+ case *Feed:
+ fix.Author.InnerXML = ""
+ for i := range fix.Entry {
+ fix.Entry[i].Author.InnerXML = ""
+ }
+ }
+
+ if err != nil {
+ if test.UnmarshalError == "" {
+ t.Errorf("unmarshal(%#v): %s", test.ExpectXML, err)
+ return
+ }
+ if !strings.Contains(err.Error(), test.UnmarshalError) {
+ t.Errorf("unmarshal(%#v): %s, want %q", test.ExpectXML, err, test.UnmarshalError)
+ }
+ return
+ }
+ if got, want := dest, test.Value; !reflect.DeepEqual(got, want) {
+ t.Errorf("unmarshal(%q):\nhave %#v\nwant %#v", test.ExpectXML, got, want)
+ }
+ })
+ }
+}
+
+func TestMarshalIndent(t *testing.T) {
+ for i, test := range marshalIndentTests {
+ data, err := MarshalIndent(test.Value, test.Prefix, test.Indent)
+ if err != nil {
+ t.Errorf("#%d: Error: %s", i, err)
+ continue
+ }
+ if got, want := string(data), test.ExpectXML; got != want {
+ t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want)
+ }
+ }
+}
+
+type limitedBytesWriter struct {
+ w io.Writer
+ remain int // until writes fail
+}
+
+func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) {
+ if lw.remain <= 0 {
+ println("error")
+ return 0, errors.New("write limit hit")
+ }
+ if len(p) > lw.remain {
+ p = p[:lw.remain]
+ n, _ = lw.w.Write(p)
+ lw.remain = 0
+ return n, errors.New("write limit hit")
+ }
+ n, err = lw.w.Write(p)
+ lw.remain -= n
+ return n, err
+}
+
+func TestMarshalWriteErrors(t *testing.T) {
+ var buf bytes.Buffer
+ const writeCap = 1024
+ w := &limitedBytesWriter{&buf, writeCap}
+ enc := NewEncoder(w)
+ var err error
+ var i int
+ const n = 4000
+ for i = 1; i <= n; i++ {
+ err = enc.Encode(&Passenger{
+ Name: []string{"Alice", "Bob"},
+ Weight: 5,
+ })
+ if err != nil {
+ break
+ }
+ }
+ if err == nil {
+ t.Error("expected an error")
+ }
+ if i == n {
+ t.Errorf("expected to fail before the end")
+ }
+ if buf.Len() != writeCap {
+ t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap)
+ }
+}
+
+func TestMarshalWriteIOErrors(t *testing.T) {
+ enc := NewEncoder(errWriter{})
+
+ expectErr := "unwritable"
+ err := enc.Encode(&Passenger{})
+ if err == nil || err.Error() != expectErr {
+ t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr)
+ }
+}
+
+func TestMarshalFlush(t *testing.T) {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ if err := enc.EncodeToken(CharData("hello world")); err != nil {
+ t.Fatalf("enc.EncodeToken: %v", err)
+ }
+ if buf.Len() > 0 {
+ t.Fatalf("enc.EncodeToken caused actual write: %q", buf.String())
+ }
+ if err := enc.Flush(); err != nil {
+ t.Fatalf("enc.Flush: %v", err)
+ }
+ if buf.String() != "hello world" {
+ t.Fatalf("after enc.Flush, buf.String() = %q, want %q", buf.String(), "hello world")
+ }
+}
+
+func BenchmarkMarshal(b *testing.B) {
+ b.ReportAllocs()
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ Marshal(atomValue)
+ }
+ })
+}
+
+func BenchmarkUnmarshal(b *testing.B) {
+ b.ReportAllocs()
+ xml := []byte(atomXML)
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ Unmarshal(xml, &Feed{})
+ }
+ })
+}
+
+// golang.org/issue/6556
+func TestStructPointerMarshal(t *testing.T) {
+ type A struct {
+ XMLName string `xml:"a"`
+ B []any
+ }
+ type C struct {
+ XMLName Name
+ Value string `xml:"value"`
+ }
+
+ a := new(A)
+ a.B = append(a.B, &C{
+ XMLName: Name{Local: "c"},
+ Value: "x",
+ })
+
+ b, err := Marshal(a)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if x := string(b); x != "<a><c><value>x</value></c></a>" {
+ t.Fatal(x)
+ }
+ var v A
+ err = Unmarshal(b, &v)
+ if err != nil {
+ t.Fatal(err)
+ }
+}
+
+var encodeTokenTests = []struct {
+ desc string
+ toks []Token
+ want string
+ err string
+}{{
+ desc: "start element with name space",
+ toks: []Token{
+ StartElement{Name{"space", "local"}, nil},
+ },
+ want: `<local xmlns="space">`,
+}, {
+ desc: "start element with no name",
+ toks: []Token{
+ StartElement{Name{"space", ""}, nil},
+ },
+ err: "xml: start tag with no name",
+}, {
+ desc: "end element with no name",
+ toks: []Token{
+ EndElement{Name{"space", ""}},
+ },
+ err: "xml: end tag with no name",
+}, {
+ desc: "char data",
+ toks: []Token{
+ CharData("foo"),
+ },
+ want: `foo`,
+}, {
+ desc: "char data with escaped chars",
+ toks: []Token{
+ CharData(" \t\n"),
+ },
+ want: " &#x9;\n",
+}, {
+ desc: "comment",
+ toks: []Token{
+ Comment("foo"),
+ },
+ want: `<!--foo-->`,
+}, {
+ desc: "comment with invalid content",
+ toks: []Token{
+ Comment("foo-->"),
+ },
+ err: "xml: EncodeToken of Comment containing --> marker",
+}, {
+ desc: "proc instruction",
+ toks: []Token{
+ ProcInst{"Target", []byte("Instruction")},
+ },
+ want: `<?Target Instruction?>`,
+}, {
+ desc: "proc instruction with empty target",
+ toks: []Token{
+ ProcInst{"", []byte("Instruction")},
+ },
+ err: "xml: EncodeToken of ProcInst with invalid Target",
+}, {
+ desc: "proc instruction with bad content",
+ toks: []Token{
+ ProcInst{"", []byte("Instruction?>")},
+ },
+ err: "xml: EncodeToken of ProcInst with invalid Target",
+}, {
+ desc: "directive",
+ toks: []Token{
+ Directive("foo"),
+ },
+ want: `<!foo>`,
+}, {
+ desc: "more complex directive",
+ toks: []Token{
+ Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"),
+ },
+ want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`,
+}, {
+ desc: "directive instruction with bad name",
+ toks: []Token{
+ Directive("foo>"),
+ },
+ err: "xml: EncodeToken of Directive containing wrong < or > markers",
+}, {
+ desc: "end tag without start tag",
+ toks: []Token{
+ EndElement{Name{"foo", "bar"}},
+ },
+ err: "xml: end tag </bar> without start tag",
+}, {
+ desc: "mismatching end tag local name",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, nil},
+ EndElement{Name{"", "bar"}},
+ },
+ err: "xml: end tag </bar> does not match start tag <foo>",
+ want: `<foo>`,
+}, {
+ desc: "mismatching end tag namespace",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, nil},
+ EndElement{Name{"another", "foo"}},
+ },
+ err: "xml: end tag </foo> in namespace another does not match start tag <foo> in namespace space",
+ want: `<foo xmlns="space">`,
+}, {
+ desc: "start element with explicit namespace",
+ toks: []Token{
+ StartElement{Name{"space", "local"}, []Attr{
+ {Name{"xmlns", "x"}, "space"},
+ {Name{"space", "foo"}, "value"},
+ }},
+ },
+ want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value">`,
+}, {
+ desc: "start element with explicit namespace and colliding prefix",
+ toks: []Token{
+ StartElement{Name{"space", "local"}, []Attr{
+ {Name{"xmlns", "x"}, "space"},
+ {Name{"space", "foo"}, "value"},
+ {Name{"x", "bar"}, "other"},
+ }},
+ },
+ want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value" xmlns:x="x" x:bar="other">`,
+}, {
+ desc: "start element using previously defined namespace",
+ toks: []Token{
+ StartElement{Name{"", "local"}, []Attr{
+ {Name{"xmlns", "x"}, "space"},
+ }},
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"space", "x"}, "y"},
+ }},
+ },
+ want: `<local xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns:space="space" space:x="y">`,
+}, {
+ desc: "nested name space with same prefix",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"xmlns", "x"}, "space1"},
+ }},
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"xmlns", "x"}, "space2"},
+ }},
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"space1", "a"}, "space1 value"},
+ {Name{"space2", "b"}, "space2 value"},
+ }},
+ EndElement{Name{"", "foo"}},
+ EndElement{Name{"", "foo"}},
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"space1", "a"}, "space1 value"},
+ {Name{"space2", "b"}, "space2 value"},
+ }},
+ },
+ want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space1"><foo _xmlns:x="space2"><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value"></foo></foo><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value">`,
+}, {
+ desc: "start element defining several prefixes for the same name space",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"xmlns", "a"}, "space"},
+ {Name{"xmlns", "b"}, "space"},
+ {Name{"space", "x"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:a="space" _xmlns:b="space" xmlns:space="space" space:x="value">`,
+}, {
+ desc: "nested element redefines name space",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"xmlns", "x"}, "space"},
+ }},
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"xmlns", "y"}, "space"},
+ {Name{"space", "a"}, "value"},
+ }},
+ },
+ want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" _xmlns:y="space" xmlns:space="space" space:a="value">`,
+}, {
+ desc: "nested element creates alias for default name space",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ }},
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"xmlns", "y"}, "space"},
+ {Name{"space", "a"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:y="space" xmlns:space="space" space:a="value">`,
+}, {
+ desc: "nested element defines default name space with existing prefix",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"xmlns", "x"}, "space"},
+ }},
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ {Name{"space", "a"}, "value"},
+ }},
+ },
+ want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns="space" xmlns:space="space" space:a="value">`,
+}, {
+ desc: "nested element uses empty attribute name space when default ns defined",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ }},
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "attr"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" attr="value">`,
+}, {
+ desc: "redefine xmlns",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"foo", "xmlns"}, "space"},
+ }},
+ },
+ want: `<foo xmlns:foo="foo" foo:xmlns="space">`,
+}, {
+ desc: "xmlns with explicit name space #1",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"xml", "xmlns"}, "space"},
+ }},
+ },
+ want: `<foo xmlns="space" xmlns:_xml="xml" _xml:xmlns="space">`,
+}, {
+ desc: "xmlns with explicit name space #2",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{xmlURL, "xmlns"}, "space"},
+ }},
+ },
+ want: `<foo xmlns="space" xml:xmlns="space">`,
+}, {
+ desc: "empty name space declaration is ignored",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"xmlns", "foo"}, ""},
+ }},
+ },
+ want: `<foo xmlns:_xmlns="xmlns" _xmlns:foo="">`,
+}, {
+ desc: "attribute with no name is ignored",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"", ""}, "value"},
+ }},
+ },
+ want: `<foo>`,
+}, {
+ desc: "namespace URL with non-valid name",
+ toks: []Token{
+ StartElement{Name{"/34", "foo"}, []Attr{
+ {Name{"/34", "x"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="/34" xmlns:_="/34" _:x="value">`,
+}, {
+ desc: "nested element resets default namespace to empty",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ }},
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"", "xmlns"}, ""},
+ {Name{"", "x"}, "value"},
+ {Name{"space", "x"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="space" xmlns="space"><foo xmlns="" x="value" xmlns:space="space" space:x="value">`,
+}, {
+ desc: "nested element requires empty default name space",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ }},
+ StartElement{Name{"", "foo"}, nil},
+ },
+ want: `<foo xmlns="space" xmlns="space"><foo>`,
+}, {
+ desc: "attribute uses name space from xmlns",
+ toks: []Token{
+ StartElement{Name{"some/space", "foo"}, []Attr{
+ {Name{"", "attr"}, "value"},
+ {Name{"some/space", "other"}, "other value"},
+ }},
+ },
+ want: `<foo xmlns="some/space" attr="value" xmlns:space="some/space" space:other="other value">`,
+}, {
+ desc: "default name space should not be used by attributes",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ {Name{"xmlns", "bar"}, "space"},
+ {Name{"space", "baz"}, "foo"},
+ }},
+ StartElement{Name{"space", "baz"}, nil},
+ EndElement{Name{"space", "baz"}},
+ EndElement{Name{"space", "foo"}},
+ },
+ want: `<foo xmlns="space" xmlns="space" xmlns:_xmlns="xmlns" _xmlns:bar="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`,
+}, {
+ desc: "default name space not used by attributes, not explicitly defined",
+ toks: []Token{
+ StartElement{Name{"space", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ {Name{"space", "baz"}, "foo"},
+ }},
+ StartElement{Name{"space", "baz"}, nil},
+ EndElement{Name{"space", "baz"}},
+ EndElement{Name{"space", "foo"}},
+ },
+ want: `<foo xmlns="space" xmlns="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`,
+}, {
+ desc: "impossible xmlns declaration",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"", "xmlns"}, "space"},
+ }},
+ StartElement{Name{"space", "bar"}, []Attr{
+ {Name{"space", "attr"}, "value"},
+ }},
+ },
+ want: `<foo xmlns="space"><bar xmlns="space" xmlns:space="space" space:attr="value">`,
+}, {
+ desc: "reserved namespace prefix -- all lower case",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"http://www.w3.org/2001/xmlSchema-instance", "nil"}, "true"},
+ }},
+ },
+ want: `<foo xmlns:_xmlSchema-instance="http://www.w3.org/2001/xmlSchema-instance" _xmlSchema-instance:nil="true">`,
+}, {
+ desc: "reserved namespace prefix -- all upper case",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"http://www.w3.org/2001/XMLSchema-instance", "nil"}, "true"},
+ }},
+ },
+ want: `<foo xmlns:_XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" _XMLSchema-instance:nil="true">`,
+}, {
+ desc: "reserved namespace prefix -- all mixed case",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, []Attr{
+ {Name{"http://www.w3.org/2001/XmLSchema-instance", "nil"}, "true"},
+ }},
+ },
+ want: `<foo xmlns:_XmLSchema-instance="http://www.w3.org/2001/XmLSchema-instance" _XmLSchema-instance:nil="true">`,
+}}
+
+func TestEncodeToken(t *testing.T) {
+loop:
+ for i, tt := range encodeTokenTests {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ var err error
+ for j, tok := range tt.toks {
+ err = enc.EncodeToken(tok)
+ if err != nil && j < len(tt.toks)-1 {
+ t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err)
+ continue loop
+ }
+ }
+ errorf := func(f string, a ...any) {
+ t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...))
+ }
+ switch {
+ case tt.err != "" && err == nil:
+ errorf(" expected error; got none")
+ continue
+ case tt.err == "" && err != nil:
+ errorf(" got error: %v", err)
+ continue
+ case tt.err != "" && err != nil && tt.err != err.Error():
+ errorf(" error mismatch; got %v, want %v", err, tt.err)
+ continue
+ }
+ if err := enc.Flush(); err != nil {
+ errorf(" %v", err)
+ continue
+ }
+ if got := buf.String(); got != tt.want {
+ errorf("\ngot %v\nwant %v", got, tt.want)
+ continue
+ }
+ }
+}
+
+func TestProcInstEncodeToken(t *testing.T) {
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+
+ if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil {
+ t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err)
+ }
+
+ if err := enc.EncodeToken(ProcInst{"Target", []byte("Instruction")}); err != nil {
+ t.Fatalf("enc.EncodeToken: expected to be able to add non-xml target ProcInst")
+ }
+
+ if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err == nil {
+ t.Fatalf("enc.EncodeToken: expected to not be allowed to encode xml target ProcInst when not first token")
+ }
+}
+
+func TestDecodeEncode(t *testing.T) {
+ var in, out bytes.Buffer
+ in.WriteString(`<?xml version="1.0" encoding="UTF-8"?>
+<?Target Instruction?>
+<root>
+</root>
+`)
+ dec := NewDecoder(&in)
+ enc := NewEncoder(&out)
+ for tok, err := dec.Token(); err == nil; tok, err = dec.Token() {
+ err = enc.EncodeToken(tok)
+ if err != nil {
+ t.Fatalf("enc.EncodeToken: Unable to encode token (%#v), %v", tok, err)
+ }
+ }
+}
+
+// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race.
+func TestRace9796(t *testing.T) {
+ type A struct{}
+ type B struct {
+ C []A `xml:"X>Y"`
+ }
+ var wg sync.WaitGroup
+ for i := 0; i < 2; i++ {
+ wg.Add(1)
+ go func() {
+ Marshal(B{[]A{{}}})
+ wg.Done()
+ }()
+ }
+ wg.Wait()
+}
+
+func TestIsValidDirective(t *testing.T) {
+ testOK := []string{
+ "<>",
+ "< < > >",
+ "<!DOCTYPE '<' '>' '>' <!--nothing-->>",
+ "<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>",
+ "<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>",
+ "<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >",
+ }
+ testKO := []string{
+ "<",
+ ">",
+ "<!--",
+ "-->",
+ "< > > < < >",
+ "<!dummy <!-- > -->",
+ "<!DOCTYPE doc '>",
+ "<!DOCTYPE doc '>'",
+ "<!DOCTYPE doc <!--comment>",
+ }
+ for _, s := range testOK {
+ if !isValidDirective(Directive(s)) {
+ t.Errorf("Directive %q is expected to be valid", s)
+ }
+ }
+ for _, s := range testKO {
+ if isValidDirective(Directive(s)) {
+ t.Errorf("Directive %q is expected to be invalid", s)
+ }
+ }
+}
+
+// Issue 11719. EncodeToken used to silently eat tokens with an invalid type.
+func TestSimpleUseOfEncodeToken(t *testing.T) {
+ var buf strings.Builder
+ enc := NewEncoder(&buf)
+ if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil {
+ t.Errorf("enc.EncodeToken: pointer type should be rejected")
+ }
+ if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil {
+ t.Errorf("enc.EncodeToken: pointer type should be rejected")
+ }
+ if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil {
+ t.Errorf("enc.EncodeToken: StartElement %s", err)
+ }
+ if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil {
+ t.Errorf("enc.EncodeToken: EndElement %s", err)
+ }
+ if err := enc.EncodeToken(Universe{}); err == nil {
+ t.Errorf("enc.EncodeToken: invalid type not caught")
+ }
+ if err := enc.Flush(); err != nil {
+ t.Errorf("enc.Flush: %s", err)
+ }
+ if buf.Len() == 0 {
+ t.Errorf("enc.EncodeToken: empty buffer")
+ }
+ want := "<object2></object2>"
+ if buf.String() != want {
+ t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String())
+ }
+}
+
+// Issue 16158. Decoder.unmarshalAttr ignores the return value of copyValue.
+func TestIssue16158(t *testing.T) {
+ const data = `<foo b="HELLOWORLD"></foo>`
+ err := Unmarshal([]byte(data), &struct {
+ B byte `xml:"b,attr,omitempty"`
+ }{})
+ if err == nil {
+ t.Errorf("Unmarshal: expected error, got nil")
+ }
+}
+
+// Issue 20953. Crash on invalid XMLName attribute.
+
+type InvalidXMLName struct {
+ XMLName Name `xml:"error"`
+ Type struct {
+ XMLName Name `xml:"type,attr"`
+ }
+}
+
+func TestInvalidXMLName(t *testing.T) {
+ var buf bytes.Buffer
+ enc := NewEncoder(&buf)
+ if err := enc.Encode(InvalidXMLName{}); err == nil {
+ t.Error("unexpected success")
+ } else if want := "invalid tag"; !strings.Contains(err.Error(), want) {
+ t.Errorf("error %q does not contain %q", err, want)
+ }
+}
+
+// Issue 50164. Crash on zero value XML attribute.
+type LayerOne struct {
+ XMLName Name `xml:"l1"`
+
+ Value *float64 `xml:"value,omitempty"`
+ *LayerTwo `xml:",omitempty"`
+}
+
+type LayerTwo struct {
+ ValueTwo *int `xml:"value_two,attr,omitempty"`
+}
+
+func TestMarshalZeroValue(t *testing.T) {
+ proofXml := `<l1><value>1.2345</value></l1>`
+ var l1 LayerOne
+ err := Unmarshal([]byte(proofXml), &l1)
+ if err != nil {
+ t.Fatalf("unmarshal XML error: %v", err)
+ }
+ want := float64(1.2345)
+ got := *l1.Value
+ if got != want {
+ t.Fatalf("unexpected unmarshal result, want %f but got %f", want, got)
+ }
+
+ // Marshal again (or Encode again)
+ // In issue 50164, here `Marshal(l1)` will panic because of the zero value of xml attribute ValueTwo `value_two`.
+ anotherXML, err := Marshal(l1)
+ if err != nil {
+ t.Fatalf("marshal XML error: %v", err)
+ }
+ if string(anotherXML) != proofXml {
+ t.Fatalf("unexpected unmarshal result, want %q but got %q", proofXml, anotherXML)
+ }
+}
+
+var closeTests = []struct {
+ desc string
+ toks []Token
+ want string
+ err string
+}{{
+ desc: "unclosed start element",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, nil},
+ },
+ want: `<foo>`,
+ err: "unclosed tag <foo>",
+}, {
+ desc: "closed element",
+ toks: []Token{
+ StartElement{Name{"", "foo"}, nil},
+ EndElement{Name{"", "foo"}},
+ },
+ want: `<foo></foo>`,
+}, {
+ desc: "directive",
+ toks: []Token{
+ Directive("foo"),
+ },
+ want: `<!foo>`,
+}}
+
+func TestClose(t *testing.T) {
+ for _, tt := range closeTests {
+ tt := tt
+ t.Run(tt.desc, func(t *testing.T) {
+ var out strings.Builder
+ enc := NewEncoder(&out)
+ for j, tok := range tt.toks {
+ if err := enc.EncodeToken(tok); err != nil {
+ t.Fatalf("token #%d: %v", j, err)
+ }
+ }
+ err := enc.Close()
+ switch {
+ case tt.err != "" && err == nil:
+ t.Error(" expected error; got none")
+ case tt.err == "" && err != nil:
+ t.Errorf(" got error: %v", err)
+ case tt.err != "" && err != nil && tt.err != err.Error():
+ t.Errorf(" error mismatch; got %v, want %v", err, tt.err)
+ }
+ if got := out.String(); got != tt.want {
+ t.Errorf("\ngot %v\nwant %v", got, tt.want)
+ }
+ t.Log(enc.p.closed)
+ if err := enc.EncodeToken(Directive("foo")); err == nil {
+ t.Errorf("unexpected success when encoding after Close")
+ }
+ })
+ }
+}
diff --git a/src/encoding/xml/read.go b/src/encoding/xml/read.go
new file mode 100644
index 0000000..3cc4968
--- /dev/null
+++ b/src/encoding/xml/read.go
@@ -0,0 +1,777 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "bytes"
+ "encoding"
+ "errors"
+ "fmt"
+ "reflect"
+ "runtime"
+ "strconv"
+ "strings"
+)
+
+// BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
+// an XML element is an order-dependent collection of anonymous
+// values, while a data structure is an order-independent collection
+// of named values.
+// See [encoding/json] for a textual representation more suitable
+// to data structures.
+
+// Unmarshal parses the XML-encoded data and stores the result in
+// the value pointed to by v, which must be an arbitrary struct,
+// slice, or string. Well-formed data that does not fit into v is
+// discarded.
+//
+// Because Unmarshal uses the reflect package, it can only assign
+// to exported (upper case) fields. Unmarshal uses a case-sensitive
+// comparison to match XML element names to tag values and struct
+// field names.
+//
+// Unmarshal maps an XML element to a struct using the following rules.
+// In the rules, the tag of a field refers to the value associated with the
+// key 'xml' in the struct field's tag (see the example above).
+//
+// - If the struct has a field of type []byte or string with tag
+// ",innerxml", Unmarshal accumulates the raw XML nested inside the
+// element in that field. The rest of the rules still apply.
+//
+// - If the struct has a field named XMLName of type Name,
+// Unmarshal records the element name in that field.
+//
+// - If the XMLName field has an associated tag of the form
+// "name" or "namespace-URL name", the XML element must have
+// the given name (and, optionally, name space) or else Unmarshal
+// returns an error.
+//
+// - If the XML element has an attribute whose name matches a
+// struct field name with an associated tag containing ",attr" or
+// the explicit name in a struct field tag of the form "name,attr",
+// Unmarshal records the attribute value in that field.
+//
+// - If the XML element has an attribute not handled by the previous
+// rule and the struct has a field with an associated tag containing
+// ",any,attr", Unmarshal records the attribute value in the first
+// such field.
+//
+// - If the XML element contains character data, that data is
+// accumulated in the first struct field that has tag ",chardata".
+// The struct field may have type []byte or string.
+// If there is no such field, the character data is discarded.
+//
+// - If the XML element contains comments, they are accumulated in
+// the first struct field that has tag ",comment". The struct
+// field may have type []byte or string. If there is no such
+// field, the comments are discarded.
+//
+// - If the XML element contains a sub-element whose name matches
+// the prefix of a tag formatted as "a" or "a>b>c", unmarshal
+// will descend into the XML structure looking for elements with the
+// given names, and will map the innermost elements to that struct
+// field. A tag starting with ">" is equivalent to one starting
+// with the field name followed by ">".
+//
+// - If the XML element contains a sub-element whose name matches
+// a struct field's XMLName tag and the struct field has no
+// explicit name tag as per the previous rule, unmarshal maps
+// the sub-element to that struct field.
+//
+// - If the XML element contains a sub-element whose name matches a
+// field without any mode flags (",attr", ",chardata", etc), Unmarshal
+// maps the sub-element to that struct field.
+//
+// - If the XML element contains a sub-element that hasn't matched any
+// of the above rules and the struct has a field with tag ",any",
+// unmarshal maps the sub-element to that struct field.
+//
+// - An anonymous struct field is handled as if the fields of its
+// value were part of the outer struct.
+//
+// - A struct field with tag "-" is never unmarshaled into.
+//
+// If Unmarshal encounters a field type that implements the Unmarshaler
+// interface, Unmarshal calls its UnmarshalXML method to produce the value from
+// the XML element. Otherwise, if the value implements
+// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method.
+//
+// Unmarshal maps an XML element to a string or []byte by saving the
+// concatenation of that element's character data in the string or
+// []byte. The saved []byte is never nil.
+//
+// Unmarshal maps an attribute value to a string or []byte by saving
+// the value in the string or slice.
+//
+// Unmarshal maps an attribute value to an [Attr] by saving the attribute,
+// including its name, in the Attr.
+//
+// Unmarshal maps an XML element or attribute value to a slice by
+// extending the length of the slice and mapping the element or attribute
+// to the newly created value.
+//
+// Unmarshal maps an XML element or attribute value to a bool by
+// setting it to the boolean value represented by the string. Whitespace
+// is trimmed and ignored.
+//
+// Unmarshal maps an XML element or attribute value to an integer or
+// floating-point field by setting the field to the result of
+// interpreting the string value in decimal. There is no check for
+// overflow. Whitespace is trimmed and ignored.
+//
+// Unmarshal maps an XML element to a Name by recording the element
+// name.
+//
+// Unmarshal maps an XML element to a pointer by setting the pointer
+// to a freshly allocated value and then mapping the element to that value.
+//
+// A missing element or empty attribute value will be unmarshaled as a zero value.
+// If the field is a slice, a zero value will be appended to the field. Otherwise, the
+// field will be set to its zero value.
+func Unmarshal(data []byte, v any) error {
+ return NewDecoder(bytes.NewReader(data)).Decode(v)
+}
+
+// Decode works like [Unmarshal], except it reads the decoder
+// stream to find the start element.
+func (d *Decoder) Decode(v any) error {
+ return d.DecodeElement(v, nil)
+}
+
+// DecodeElement works like [Unmarshal] except that it takes
+// a pointer to the start XML element to decode into v.
+// It is useful when a client reads some raw XML tokens itself
+// but also wants to defer to [Unmarshal] for some elements.
+func (d *Decoder) DecodeElement(v any, start *StartElement) error {
+ val := reflect.ValueOf(v)
+ if val.Kind() != reflect.Pointer {
+ return errors.New("non-pointer passed to Unmarshal")
+ }
+
+ if val.IsNil() {
+ return errors.New("nil pointer passed to Unmarshal")
+ }
+ return d.unmarshal(val.Elem(), start, 0)
+}
+
+// An UnmarshalError represents an error in the unmarshaling process.
+type UnmarshalError string
+
+func (e UnmarshalError) Error() string { return string(e) }
+
+// Unmarshaler is the interface implemented by objects that can unmarshal
+// an XML element description of themselves.
+//
+// UnmarshalXML decodes a single XML element
+// beginning with the given start element.
+// If it returns an error, the outer call to Unmarshal stops and
+// returns that error.
+// UnmarshalXML must consume exactly one XML element.
+// One common implementation strategy is to unmarshal into
+// a separate value with a layout matching the expected XML
+// using d.DecodeElement, and then to copy the data from
+// that value into the receiver.
+// Another common strategy is to use d.Token to process the
+// XML object one token at a time.
+// UnmarshalXML may not use d.RawToken.
+type Unmarshaler interface {
+ UnmarshalXML(d *Decoder, start StartElement) error
+}
+
+// UnmarshalerAttr is the interface implemented by objects that can unmarshal
+// an XML attribute description of themselves.
+//
+// UnmarshalXMLAttr decodes a single XML attribute.
+// If it returns an error, the outer call to [Unmarshal] stops and
+// returns that error.
+// UnmarshalXMLAttr is used only for struct fields with the
+// "attr" option in the field tag.
+type UnmarshalerAttr interface {
+ UnmarshalXMLAttr(attr Attr) error
+}
+
+// receiverType returns the receiver type to use in an expression like "%s.MethodName".
+func receiverType(val any) string {
+ t := reflect.TypeOf(val)
+ if t.Name() != "" {
+ return t.String()
+ }
+ return "(" + t.String() + ")"
+}
+
+// unmarshalInterface unmarshals a single XML element into val.
+// start is the opening tag of the element.
+func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error {
+ // Record that decoder must stop at end tag corresponding to start.
+ d.pushEOF()
+
+ d.unmarshalDepth++
+ err := val.UnmarshalXML(d, *start)
+ d.unmarshalDepth--
+ if err != nil {
+ d.popEOF()
+ return err
+ }
+
+ if !d.popEOF() {
+ return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local)
+ }
+
+ return nil
+}
+
+// unmarshalTextInterface unmarshals a single XML element into val.
+// The chardata contained in the element (but not its children)
+// is passed to the text unmarshaler.
+func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error {
+ var buf []byte
+ depth := 1
+ for depth > 0 {
+ t, err := d.Token()
+ if err != nil {
+ return err
+ }
+ switch t := t.(type) {
+ case CharData:
+ if depth == 1 {
+ buf = append(buf, t...)
+ }
+ case StartElement:
+ depth++
+ case EndElement:
+ depth--
+ }
+ }
+ return val.UnmarshalText(buf)
+}
+
+// unmarshalAttr unmarshals a single XML attribute into val.
+func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error {
+ if val.Kind() == reflect.Pointer {
+ if val.IsNil() {
+ val.Set(reflect.New(val.Type().Elem()))
+ }
+ val = val.Elem()
+ }
+ if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) {
+ // This is an unmarshaler with a non-pointer receiver,
+ // so it's likely to be incorrect, but we do what we're told.
+ return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
+ }
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) {
+ return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
+ }
+ }
+
+ // Not an UnmarshalerAttr; try encoding.TextUnmarshaler.
+ if val.CanInterface() && val.Type().Implements(textUnmarshalerType) {
+ // This is an unmarshaler with a non-pointer receiver,
+ // so it's likely to be incorrect, but we do what we're told.
+ return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value))
+ }
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
+ return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value))
+ }
+ }
+
+ if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 {
+ // Slice of element values.
+ // Grow slice.
+ n := val.Len()
+ val.Grow(1)
+ val.SetLen(n + 1)
+
+ // Recur to read element into slice.
+ if err := d.unmarshalAttr(val.Index(n), attr); err != nil {
+ val.SetLen(n)
+ return err
+ }
+ return nil
+ }
+
+ if val.Type() == attrType {
+ val.Set(reflect.ValueOf(attr))
+ return nil
+ }
+
+ return copyValue(val, []byte(attr.Value))
+}
+
+var (
+ attrType = reflect.TypeFor[Attr]()
+ unmarshalerType = reflect.TypeFor[Unmarshaler]()
+ unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]()
+ textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]()
+)
+
+const (
+ maxUnmarshalDepth = 10000
+ maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498
+)
+
+var errUnmarshalDepth = errors.New("exceeded max depth")
+
+// Unmarshal a single XML element into val.
+func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error {
+ if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm {
+ return errUnmarshalDepth
+ }
+ // Find start element if we need it.
+ if start == nil {
+ for {
+ tok, err := d.Token()
+ if err != nil {
+ return err
+ }
+ if t, ok := tok.(StartElement); ok {
+ start = &t
+ break
+ }
+ }
+ }
+
+ // Load value from interface, but only if the result will be
+ // usefully addressable.
+ if val.Kind() == reflect.Interface && !val.IsNil() {
+ e := val.Elem()
+ if e.Kind() == reflect.Pointer && !e.IsNil() {
+ val = e
+ }
+ }
+
+ if val.Kind() == reflect.Pointer {
+ if val.IsNil() {
+ val.Set(reflect.New(val.Type().Elem()))
+ }
+ val = val.Elem()
+ }
+
+ if val.CanInterface() && val.Type().Implements(unmarshalerType) {
+ // This is an unmarshaler with a non-pointer receiver,
+ // so it's likely to be incorrect, but we do what we're told.
+ return d.unmarshalInterface(val.Interface().(Unmarshaler), start)
+ }
+
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(unmarshalerType) {
+ return d.unmarshalInterface(pv.Interface().(Unmarshaler), start)
+ }
+ }
+
+ if val.CanInterface() && val.Type().Implements(textUnmarshalerType) {
+ return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler))
+ }
+
+ if val.CanAddr() {
+ pv := val.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
+ return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler))
+ }
+ }
+
+ var (
+ data []byte
+ saveData reflect.Value
+ comment []byte
+ saveComment reflect.Value
+ saveXML reflect.Value
+ saveXMLIndex int
+ saveXMLData []byte
+ saveAny reflect.Value
+ sv reflect.Value
+ tinfo *typeInfo
+ err error
+ )
+
+ switch v := val; v.Kind() {
+ default:
+ return errors.New("unknown type " + v.Type().String())
+
+ case reflect.Interface:
+ // TODO: For now, simply ignore the field. In the near
+ // future we may choose to unmarshal the start
+ // element on it, if not nil.
+ return d.Skip()
+
+ case reflect.Slice:
+ typ := v.Type()
+ if typ.Elem().Kind() == reflect.Uint8 {
+ // []byte
+ saveData = v
+ break
+ }
+
+ // Slice of element values.
+ // Grow slice.
+ n := v.Len()
+ v.Grow(1)
+ v.SetLen(n + 1)
+
+ // Recur to read element into slice.
+ if err := d.unmarshal(v.Index(n), start, depth+1); err != nil {
+ v.SetLen(n)
+ return err
+ }
+ return nil
+
+ case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String:
+ saveData = v
+
+ case reflect.Struct:
+ typ := v.Type()
+ if typ == nameType {
+ v.Set(reflect.ValueOf(start.Name))
+ break
+ }
+
+ sv = v
+ tinfo, err = getTypeInfo(typ)
+ if err != nil {
+ return err
+ }
+
+ // Validate and assign element name.
+ if tinfo.xmlname != nil {
+ finfo := tinfo.xmlname
+ if finfo.name != "" && finfo.name != start.Name.Local {
+ return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">")
+ }
+ if finfo.xmlns != "" && finfo.xmlns != start.Name.Space {
+ e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have "
+ if start.Name.Space == "" {
+ e += "no name space"
+ } else {
+ e += start.Name.Space
+ }
+ return UnmarshalError(e)
+ }
+ fv := finfo.value(sv, initNilPointers)
+ if _, ok := fv.Interface().(Name); ok {
+ fv.Set(reflect.ValueOf(start.Name))
+ }
+ }
+
+ // Assign attributes.
+ for _, a := range start.Attr {
+ handled := false
+ any := -1
+ for i := range tinfo.fields {
+ finfo := &tinfo.fields[i]
+ switch finfo.flags & fMode {
+ case fAttr:
+ strv := finfo.value(sv, initNilPointers)
+ if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) {
+ if err := d.unmarshalAttr(strv, a); err != nil {
+ return err
+ }
+ handled = true
+ }
+
+ case fAny | fAttr:
+ if any == -1 {
+ any = i
+ }
+ }
+ }
+ if !handled && any >= 0 {
+ finfo := &tinfo.fields[any]
+ strv := finfo.value(sv, initNilPointers)
+ if err := d.unmarshalAttr(strv, a); err != nil {
+ return err
+ }
+ }
+ }
+
+ // Determine whether we need to save character data or comments.
+ for i := range tinfo.fields {
+ finfo := &tinfo.fields[i]
+ switch finfo.flags & fMode {
+ case fCDATA, fCharData:
+ if !saveData.IsValid() {
+ saveData = finfo.value(sv, initNilPointers)
+ }
+
+ case fComment:
+ if !saveComment.IsValid() {
+ saveComment = finfo.value(sv, initNilPointers)
+ }
+
+ case fAny, fAny | fElement:
+ if !saveAny.IsValid() {
+ saveAny = finfo.value(sv, initNilPointers)
+ }
+
+ case fInnerXML:
+ if !saveXML.IsValid() {
+ saveXML = finfo.value(sv, initNilPointers)
+ if d.saved == nil {
+ saveXMLIndex = 0
+ d.saved = new(bytes.Buffer)
+ } else {
+ saveXMLIndex = d.savedOffset()
+ }
+ }
+ }
+ }
+ }
+
+ // Find end element.
+ // Process sub-elements along the way.
+Loop:
+ for {
+ var savedOffset int
+ if saveXML.IsValid() {
+ savedOffset = d.savedOffset()
+ }
+ tok, err := d.Token()
+ if err != nil {
+ return err
+ }
+ switch t := tok.(type) {
+ case StartElement:
+ consumed := false
+ if sv.IsValid() {
+ // unmarshalPath can call unmarshal, so we need to pass the depth through so that
+ // we can continue to enforce the maximum recursion limit.
+ consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth)
+ if err != nil {
+ return err
+ }
+ if !consumed && saveAny.IsValid() {
+ consumed = true
+ if err := d.unmarshal(saveAny, &t, depth+1); err != nil {
+ return err
+ }
+ }
+ }
+ if !consumed {
+ if err := d.Skip(); err != nil {
+ return err
+ }
+ }
+
+ case EndElement:
+ if saveXML.IsValid() {
+ saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset]
+ if saveXMLIndex == 0 {
+ d.saved = nil
+ }
+ }
+ break Loop
+
+ case CharData:
+ if saveData.IsValid() {
+ data = append(data, t...)
+ }
+
+ case Comment:
+ if saveComment.IsValid() {
+ comment = append(comment, t...)
+ }
+ }
+ }
+
+ if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) {
+ if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil {
+ return err
+ }
+ saveData = reflect.Value{}
+ }
+
+ if saveData.IsValid() && saveData.CanAddr() {
+ pv := saveData.Addr()
+ if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
+ if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil {
+ return err
+ }
+ saveData = reflect.Value{}
+ }
+ }
+
+ if err := copyValue(saveData, data); err != nil {
+ return err
+ }
+
+ switch t := saveComment; t.Kind() {
+ case reflect.String:
+ t.SetString(string(comment))
+ case reflect.Slice:
+ t.Set(reflect.ValueOf(comment))
+ }
+
+ switch t := saveXML; t.Kind() {
+ case reflect.String:
+ t.SetString(string(saveXMLData))
+ case reflect.Slice:
+ if t.Type().Elem().Kind() == reflect.Uint8 {
+ t.Set(reflect.ValueOf(saveXMLData))
+ }
+ }
+
+ return nil
+}
+
+func copyValue(dst reflect.Value, src []byte) (err error) {
+ dst0 := dst
+
+ if dst.Kind() == reflect.Pointer {
+ if dst.IsNil() {
+ dst.Set(reflect.New(dst.Type().Elem()))
+ }
+ dst = dst.Elem()
+ }
+
+ // Save accumulated data.
+ switch dst.Kind() {
+ case reflect.Invalid:
+ // Probably a comment.
+ default:
+ return errors.New("cannot unmarshal into " + dst0.Type().String())
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ if len(src) == 0 {
+ dst.SetInt(0)
+ return nil
+ }
+ itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
+ if err != nil {
+ return err
+ }
+ dst.SetInt(itmp)
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ if len(src) == 0 {
+ dst.SetUint(0)
+ return nil
+ }
+ utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
+ if err != nil {
+ return err
+ }
+ dst.SetUint(utmp)
+ case reflect.Float32, reflect.Float64:
+ if len(src) == 0 {
+ dst.SetFloat(0)
+ return nil
+ }
+ ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits())
+ if err != nil {
+ return err
+ }
+ dst.SetFloat(ftmp)
+ case reflect.Bool:
+ if len(src) == 0 {
+ dst.SetBool(false)
+ return nil
+ }
+ value, err := strconv.ParseBool(strings.TrimSpace(string(src)))
+ if err != nil {
+ return err
+ }
+ dst.SetBool(value)
+ case reflect.String:
+ dst.SetString(string(src))
+ case reflect.Slice:
+ if len(src) == 0 {
+ // non-nil to flag presence
+ src = []byte{}
+ }
+ dst.SetBytes(src)
+ }
+ return nil
+}
+
+// unmarshalPath walks down an XML structure looking for wanted
+// paths, and calls unmarshal on them.
+// The consumed result tells whether XML elements have been consumed
+// from the Decoder until start's matching end element, or if it's
+// still untouched because start is uninteresting for sv's fields.
+func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) {
+ recurse := false
+Loop:
+ for i := range tinfo.fields {
+ finfo := &tinfo.fields[i]
+ if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space {
+ continue
+ }
+ for j := range parents {
+ if parents[j] != finfo.parents[j] {
+ continue Loop
+ }
+ }
+ if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local {
+ // It's a perfect match, unmarshal the field.
+ return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1)
+ }
+ if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local {
+ // It's a prefix for the field. Break and recurse
+ // since it's not ok for one field path to be itself
+ // the prefix for another field path.
+ recurse = true
+
+ // We can reuse the same slice as long as we
+ // don't try to append to it.
+ parents = finfo.parents[:len(parents)+1]
+ break
+ }
+ }
+ if !recurse {
+ // We have no business with this element.
+ return false, nil
+ }
+ // The element is not a perfect match for any field, but one
+ // or more fields have the path to this element as a parent
+ // prefix. Recurse and attempt to match these.
+ for {
+ var tok Token
+ tok, err = d.Token()
+ if err != nil {
+ return true, err
+ }
+ switch t := tok.(type) {
+ case StartElement:
+ // the recursion depth of unmarshalPath is limited to the path length specified
+ // by the struct field tag, so we don't increment the depth here.
+ consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth)
+ if err != nil {
+ return true, err
+ }
+ if !consumed2 {
+ if err := d.Skip(); err != nil {
+ return true, err
+ }
+ }
+ case EndElement:
+ return true, nil
+ }
+ }
+}
+
+// Skip reads tokens until it has consumed the end element
+// matching the most recent start element already consumed,
+// skipping nested structures.
+// It returns nil if it finds an end element matching the start
+// element; otherwise it returns an error describing the problem.
+func (d *Decoder) Skip() error {
+ var depth int64
+ for {
+ tok, err := d.Token()
+ if err != nil {
+ return err
+ }
+ switch tok.(type) {
+ case StartElement:
+ depth++
+ case EndElement:
+ if depth == 0 {
+ return nil
+ }
+ depth--
+ }
+ }
+}
diff --git a/src/encoding/xml/read_test.go b/src/encoding/xml/read_test.go
new file mode 100644
index 0000000..ce99894
--- /dev/null
+++ b/src/encoding/xml/read_test.go
@@ -0,0 +1,1128 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "reflect"
+ "runtime"
+ "strings"
+ "testing"
+ "time"
+)
+
+// Stripped down Atom feed data structures.
+
+func TestUnmarshalFeed(t *testing.T) {
+ var f Feed
+ if err := Unmarshal([]byte(atomFeedString), &f); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if !reflect.DeepEqual(f, atomFeed) {
+ t.Fatalf("have %#v\nwant %#v", f, atomFeed)
+ }
+}
+
+// hget http://codereview.appspot.com/rss/mine/rsc
+const atomFeedString = `
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us" updated="2009-10-04T01:35:58+00:00"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><author><name>rietveld&lt;&gt;</name></author><entry><title>rietveld: an attempt at pubsubhubbub
+</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
+ An attempt at adding pubsubhubbub support to Rietveld.
+http://code.google.com/p/pubsubhubbub
+http://code.google.com/p/rietveld/issues/detail?id=155
+
+The server side of the protocol is trivial:
+ 1. add a &amp;lt;link rel=&amp;quot;hub&amp;quot; href=&amp;quot;hub-server&amp;quot;&amp;gt; tag to all
+ feeds that will be pubsubhubbubbed.
+ 2. every time one of those feeds changes, tell the hub
+ with a simple POST request.
+
+I have tested this by adding debug prints to a local hub
+server and checking that the server got the right publish
+requests.
+
+I can&amp;#39;t quite get the server to work, but I think the bug
+is not in my code. I think that the server expects to be
+able to grab the feed and see the feed&amp;#39;s actual URL in
+the link rel=&amp;quot;self&amp;quot;, but the default value for that drops
+the :port from the URL, and I cannot for the life of me
+figure out how to get the Atom generator deep inside
+django not to do that, or even where it is doing that,
+or even what code is running to generate the Atom feed.
+(I thought I knew but I added some assert False statements
+and it kept running!)
+
+Ignoring that particular problem, I would appreciate
+feedback on the right way to get the two values at
+the top of feeds.py marked NOTE(rsc).
+
+
+</summary></entry><entry><title>rietveld: correct tab handling
+</title><link href="http://codereview.appspot.com/124106" rel="alternate"></link><updated>2009-10-03T23:02:17+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:0a2a4f19bb815101f0ba2904aed7c35a</id><summary type="html">
+ This fixes the buggy tab rendering that can be seen at
+http://codereview.appspot.com/116075/diff/1/2
+
+The fundamental problem was that the tab code was
+not being told what column the text began in, so it
+didn&amp;#39;t know where to put the tab stops. Another problem
+was that some of the code assumed that string byte
+offsets were the same as column offsets, which is only
+true if there are no tabs.
+
+In the process of fixing this, I cleaned up the arguments
+to Fold and ExpandTabs and renamed them Break and
+_ExpandTabs so that I could be sure that I found all the
+call sites. I also wanted to verify that ExpandTabs was
+not being used from outside intra_region_diff.py.
+
+
+</summary></entry></feed> `
+
+type Feed struct {
+ XMLName Name `xml:"http://www.w3.org/2005/Atom feed"`
+ Title string `xml:"title"`
+ ID string `xml:"id"`
+ Link []Link `xml:"link"`
+ Updated time.Time `xml:"updated,attr"`
+ Author Person `xml:"author"`
+ Entry []Entry `xml:"entry"`
+}
+
+type Entry struct {
+ Title string `xml:"title"`
+ ID string `xml:"id"`
+ Link []Link `xml:"link"`
+ Updated time.Time `xml:"updated"`
+ Author Person `xml:"author"`
+ Summary Text `xml:"summary"`
+}
+
+type Link struct {
+ Rel string `xml:"rel,attr,omitempty"`
+ Href string `xml:"href,attr"`
+}
+
+type Person struct {
+ Name string `xml:"name"`
+ URI string `xml:"uri"`
+ Email string `xml:"email"`
+ InnerXML string `xml:",innerxml"`
+}
+
+type Text struct {
+ Type string `xml:"type,attr,omitempty"`
+ Body string `xml:",chardata"`
+}
+
+var atomFeed = Feed{
+ XMLName: Name{"http://www.w3.org/2005/Atom", "feed"},
+ Title: "Code Review - My issues",
+ Link: []Link{
+ {Rel: "alternate", Href: "http://codereview.appspot.com/"},
+ {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"},
+ },
+ ID: "http://codereview.appspot.com/",
+ Updated: ParseTime("2009-10-04T01:35:58+00:00"),
+ Author: Person{
+ Name: "rietveld<>",
+ InnerXML: "<name>rietveld&lt;&gt;</name>",
+ },
+ Entry: []Entry{
+ {
+ Title: "rietveld: an attempt at pubsubhubbub\n",
+ Link: []Link{
+ {Rel: "alternate", Href: "http://codereview.appspot.com/126085"},
+ },
+ Updated: ParseTime("2009-10-04T01:35:58+00:00"),
+ Author: Person{
+ Name: "email-address-removed",
+ InnerXML: "<name>email-address-removed</name>",
+ },
+ ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a",
+ Summary: Text{
+ Type: "html",
+ Body: `
+ An attempt at adding pubsubhubbub support to Rietveld.
+http://code.google.com/p/pubsubhubbub
+http://code.google.com/p/rietveld/issues/detail?id=155
+
+The server side of the protocol is trivial:
+ 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all
+ feeds that will be pubsubhubbubbed.
+ 2. every time one of those feeds changes, tell the hub
+ with a simple POST request.
+
+I have tested this by adding debug prints to a local hub
+server and checking that the server got the right publish
+requests.
+
+I can&#39;t quite get the server to work, but I think the bug
+is not in my code. I think that the server expects to be
+able to grab the feed and see the feed&#39;s actual URL in
+the link rel=&quot;self&quot;, but the default value for that drops
+the :port from the URL, and I cannot for the life of me
+figure out how to get the Atom generator deep inside
+django not to do that, or even where it is doing that,
+or even what code is running to generate the Atom feed.
+(I thought I knew but I added some assert False statements
+and it kept running!)
+
+Ignoring that particular problem, I would appreciate
+feedback on the right way to get the two values at
+the top of feeds.py marked NOTE(rsc).
+
+
+`,
+ },
+ },
+ {
+ Title: "rietveld: correct tab handling\n",
+ Link: []Link{
+ {Rel: "alternate", Href: "http://codereview.appspot.com/124106"},
+ },
+ Updated: ParseTime("2009-10-03T23:02:17+00:00"),
+ Author: Person{
+ Name: "email-address-removed",
+ InnerXML: "<name>email-address-removed</name>",
+ },
+ ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a",
+ Summary: Text{
+ Type: "html",
+ Body: `
+ This fixes the buggy tab rendering that can be seen at
+http://codereview.appspot.com/116075/diff/1/2
+
+The fundamental problem was that the tab code was
+not being told what column the text began in, so it
+didn&#39;t know where to put the tab stops. Another problem
+was that some of the code assumed that string byte
+offsets were the same as column offsets, which is only
+true if there are no tabs.
+
+In the process of fixing this, I cleaned up the arguments
+to Fold and ExpandTabs and renamed them Break and
+_ExpandTabs so that I could be sure that I found all the
+call sites. I also wanted to verify that ExpandTabs was
+not being used from outside intra_region_diff.py.
+
+
+`,
+ },
+ },
+ },
+}
+
+const pathTestString = `
+<Result>
+ <Before>1</Before>
+ <Items>
+ <Item1>
+ <Value>A</Value>
+ </Item1>
+ <Item2>
+ <Value>B</Value>
+ </Item2>
+ <Item1>
+ <Value>C</Value>
+ <Value>D</Value>
+ </Item1>
+ <_>
+ <Value>E</Value>
+ </_>
+ </Items>
+ <After>2</After>
+</Result>
+`
+
+type PathTestItem struct {
+ Value string
+}
+
+type PathTestA struct {
+ Items []PathTestItem `xml:">Item1"`
+ Before, After string
+}
+
+type PathTestB struct {
+ Other []PathTestItem `xml:"Items>Item1"`
+ Before, After string
+}
+
+type PathTestC struct {
+ Values1 []string `xml:"Items>Item1>Value"`
+ Values2 []string `xml:"Items>Item2>Value"`
+ Before, After string
+}
+
+type PathTestSet struct {
+ Item1 []PathTestItem
+}
+
+type PathTestD struct {
+ Other PathTestSet `xml:"Items"`
+ Before, After string
+}
+
+type PathTestE struct {
+ Underline string `xml:"Items>_>Value"`
+ Before, After string
+}
+
+var pathTests = []any{
+ &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"},
+ &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"},
+ &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"},
+ &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"},
+ &PathTestE{Underline: "E", Before: "1", After: "2"},
+}
+
+func TestUnmarshalPaths(t *testing.T) {
+ for _, pt := range pathTests {
+ v := reflect.New(reflect.TypeOf(pt).Elem()).Interface()
+ if err := Unmarshal([]byte(pathTestString), v); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if !reflect.DeepEqual(v, pt) {
+ t.Fatalf("have %#v\nwant %#v", v, pt)
+ }
+ }
+}
+
+type BadPathTestA struct {
+ First string `xml:"items>item1"`
+ Other string `xml:"items>item2"`
+ Second string `xml:"items"`
+}
+
+type BadPathTestB struct {
+ Other string `xml:"items>item2>value"`
+ First string `xml:"items>item1"`
+ Second string `xml:"items>item1>value"`
+}
+
+type BadPathTestC struct {
+ First string
+ Second string `xml:"First"`
+}
+
+type BadPathTestD struct {
+ BadPathEmbeddedA
+ BadPathEmbeddedB
+}
+
+type BadPathEmbeddedA struct {
+ First string
+}
+
+type BadPathEmbeddedB struct {
+ Second string `xml:"First"`
+}
+
+var badPathTests = []struct {
+ v, e any
+}{
+ {&BadPathTestA{}, &TagPathError{reflect.TypeFor[BadPathTestA](), "First", "items>item1", "Second", "items"}},
+ {&BadPathTestB{}, &TagPathError{reflect.TypeFor[BadPathTestB](), "First", "items>item1", "Second", "items>item1>value"}},
+ {&BadPathTestC{}, &TagPathError{reflect.TypeFor[BadPathTestC](), "First", "", "Second", "First"}},
+ {&BadPathTestD{}, &TagPathError{reflect.TypeFor[BadPathTestD](), "First", "", "Second", "First"}},
+}
+
+func TestUnmarshalBadPaths(t *testing.T) {
+ for _, tt := range badPathTests {
+ err := Unmarshal([]byte(pathTestString), tt.v)
+ if !reflect.DeepEqual(err, tt.e) {
+ t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e)
+ }
+ }
+}
+
+const OK = "OK"
+const withoutNameTypeData = `
+<?xml version="1.0" charset="utf-8"?>
+<Test3 Attr="OK" />`
+
+type TestThree struct {
+ XMLName Name `xml:"Test3"`
+ Attr string `xml:",attr"`
+}
+
+func TestUnmarshalWithoutNameType(t *testing.T) {
+ var x TestThree
+ if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if x.Attr != OK {
+ t.Fatalf("have %v\nwant %v", x.Attr, OK)
+ }
+}
+
+func TestUnmarshalAttr(t *testing.T) {
+ type ParamVal struct {
+ Int int `xml:"int,attr"`
+ }
+
+ type ParamPtr struct {
+ Int *int `xml:"int,attr"`
+ }
+
+ type ParamStringPtr struct {
+ Int *string `xml:"int,attr"`
+ }
+
+ x := []byte(`<Param int="1" />`)
+
+ p1 := &ParamPtr{}
+ if err := Unmarshal(x, p1); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if p1.Int == nil {
+ t.Fatalf("Unmarshal failed in to *int field")
+ } else if *p1.Int != 1 {
+ t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1)
+ }
+
+ p2 := &ParamVal{}
+ if err := Unmarshal(x, p2); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if p2.Int != 1 {
+ t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1)
+ }
+
+ p3 := &ParamStringPtr{}
+ if err := Unmarshal(x, p3); err != nil {
+ t.Fatalf("Unmarshal: %s", err)
+ }
+ if p3.Int == nil {
+ t.Fatalf("Unmarshal failed in to *string field")
+ } else if *p3.Int != "1" {
+ t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1)
+ }
+}
+
+type Tables struct {
+ HTable string `xml:"http://www.w3.org/TR/html4/ table"`
+ FTable string `xml:"http://www.w3schools.com/furniture table"`
+}
+
+var tables = []struct {
+ xml string
+ tab Tables
+ ns string
+}{
+ {
+ xml: `<Tables>` +
+ `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` +
+ `<table xmlns="http://www.w3schools.com/furniture">world</table>` +
+ `</Tables>`,
+ tab: Tables{"hello", "world"},
+ },
+ {
+ xml: `<Tables>` +
+ `<table xmlns="http://www.w3schools.com/furniture">world</table>` +
+ `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` +
+ `</Tables>`,
+ tab: Tables{"hello", "world"},
+ },
+ {
+ xml: `<Tables xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/">` +
+ `<f:table>world</f:table>` +
+ `<h:table>hello</h:table>` +
+ `</Tables>`,
+ tab: Tables{"hello", "world"},
+ },
+ {
+ xml: `<Tables>` +
+ `<table>bogus</table>` +
+ `</Tables>`,
+ tab: Tables{},
+ },
+ {
+ xml: `<Tables>` +
+ `<table>only</table>` +
+ `</Tables>`,
+ tab: Tables{HTable: "only"},
+ ns: "http://www.w3.org/TR/html4/",
+ },
+ {
+ xml: `<Tables>` +
+ `<table>only</table>` +
+ `</Tables>`,
+ tab: Tables{FTable: "only"},
+ ns: "http://www.w3schools.com/furniture",
+ },
+ {
+ xml: `<Tables>` +
+ `<table>only</table>` +
+ `</Tables>`,
+ tab: Tables{},
+ ns: "something else entirely",
+ },
+}
+
+func TestUnmarshalNS(t *testing.T) {
+ for i, tt := range tables {
+ var dst Tables
+ var err error
+ if tt.ns != "" {
+ d := NewDecoder(strings.NewReader(tt.xml))
+ d.DefaultSpace = tt.ns
+ err = d.Decode(&dst)
+ } else {
+ err = Unmarshal([]byte(tt.xml), &dst)
+ }
+ if err != nil {
+ t.Errorf("#%d: Unmarshal: %v", i, err)
+ continue
+ }
+ want := tt.tab
+ if dst != want {
+ t.Errorf("#%d: dst=%+v, want %+v", i, dst, want)
+ }
+ }
+}
+
+func TestMarshalNS(t *testing.T) {
+ dst := Tables{"hello", "world"}
+ data, err := Marshal(&dst)
+ if err != nil {
+ t.Fatalf("Marshal: %v", err)
+ }
+ want := `<Tables><table xmlns="http://www.w3.org/TR/html4/">hello</table><table xmlns="http://www.w3schools.com/furniture">world</table></Tables>`
+ str := string(data)
+ if str != want {
+ t.Errorf("have: %q\nwant: %q\n", str, want)
+ }
+}
+
+type TableAttrs struct {
+ TAttr TAttr
+}
+
+type TAttr struct {
+ HTable string `xml:"http://www.w3.org/TR/html4/ table,attr"`
+ FTable string `xml:"http://www.w3schools.com/furniture table,attr"`
+ Lang string `xml:"http://www.w3.org/XML/1998/namespace lang,attr,omitempty"`
+ Other1 string `xml:"http://golang.org/xml/ other,attr,omitempty"`
+ Other2 string `xml:"http://golang.org/xmlfoo/ other,attr,omitempty"`
+ Other3 string `xml:"http://golang.org/json/ other,attr,omitempty"`
+ Other4 string `xml:"http://golang.org/2/json/ other,attr,omitempty"`
+}
+
+var tableAttrs = []struct {
+ xml string
+ tab TableAttrs
+ ns string
+}{
+ {
+ xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` +
+ `h:table="hello" f:table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}},
+ },
+ {
+ xml: `<TableAttrs><TAttr xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` +
+ `h:table="hello" f:table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}},
+ },
+ {
+ xml: `<TableAttrs><TAttr ` +
+ `h:table="hello" f:table="world" xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}},
+ },
+ {
+ // Default space does not apply to attribute names.
+ xml: `<TableAttrs xmlns="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` +
+ `h:table="hello" table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}},
+ },
+ {
+ // Default space does not apply to attribute names.
+ xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr xmlns="http://www.w3.org/TR/html4/" ` +
+ `table="hello" f:table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "", FTable: "world"}},
+ },
+ {
+ xml: `<TableAttrs><TAttr ` +
+ `table="bogus" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{},
+ },
+ {
+ // Default space does not apply to attribute names.
+ xml: `<TableAttrs xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` +
+ `h:table="hello" table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}},
+ ns: "http://www.w3schools.com/furniture",
+ },
+ {
+ // Default space does not apply to attribute names.
+ xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr ` +
+ `table="hello" f:table="world" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{TAttr{HTable: "", FTable: "world"}},
+ ns: "http://www.w3.org/TR/html4/",
+ },
+ {
+ xml: `<TableAttrs><TAttr ` +
+ `table="bogus" ` +
+ `/></TableAttrs>`,
+ tab: TableAttrs{},
+ ns: "something else entirely",
+ },
+}
+
+func TestUnmarshalNSAttr(t *testing.T) {
+ for i, tt := range tableAttrs {
+ var dst TableAttrs
+ var err error
+ if tt.ns != "" {
+ d := NewDecoder(strings.NewReader(tt.xml))
+ d.DefaultSpace = tt.ns
+ err = d.Decode(&dst)
+ } else {
+ err = Unmarshal([]byte(tt.xml), &dst)
+ }
+ if err != nil {
+ t.Errorf("#%d: Unmarshal: %v", i, err)
+ continue
+ }
+ want := tt.tab
+ if dst != want {
+ t.Errorf("#%d: dst=%+v, want %+v", i, dst, want)
+ }
+ }
+}
+
+func TestMarshalNSAttr(t *testing.T) {
+ src := TableAttrs{TAttr{"hello", "world", "en_US", "other1", "other2", "other3", "other4"}}
+ data, err := Marshal(&src)
+ if err != nil {
+ t.Fatalf("Marshal: %v", err)
+ }
+ want := `<TableAttrs><TAttr xmlns:html4="http://www.w3.org/TR/html4/" html4:table="hello" xmlns:furniture="http://www.w3schools.com/furniture" furniture:table="world" xml:lang="en_US" xmlns:_xml="http://golang.org/xml/" _xml:other="other1" xmlns:_xmlfoo="http://golang.org/xmlfoo/" _xmlfoo:other="other2" xmlns:json="http://golang.org/json/" json:other="other3" xmlns:json_1="http://golang.org/2/json/" json_1:other="other4"></TAttr></TableAttrs>`
+ str := string(data)
+ if str != want {
+ t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want)
+ }
+
+ var dst TableAttrs
+ if err := Unmarshal(data, &dst); err != nil {
+ t.Errorf("Unmarshal: %v", err)
+ }
+
+ if dst != src {
+ t.Errorf("Unmarshal = %q, want %q", dst, src)
+ }
+}
+
+type MyCharData struct {
+ body string
+}
+
+func (m *MyCharData) UnmarshalXML(d *Decoder, start StartElement) error {
+ for {
+ t, err := d.Token()
+ if err == io.EOF { // found end of element
+ break
+ }
+ if err != nil {
+ return err
+ }
+ if char, ok := t.(CharData); ok {
+ m.body += string(char)
+ }
+ }
+ return nil
+}
+
+var _ Unmarshaler = (*MyCharData)(nil)
+
+func (m *MyCharData) UnmarshalXMLAttr(attr Attr) error {
+ panic("must not call")
+}
+
+type MyAttr struct {
+ attr string
+}
+
+func (m *MyAttr) UnmarshalXMLAttr(attr Attr) error {
+ m.attr = attr.Value
+ return nil
+}
+
+var _ UnmarshalerAttr = (*MyAttr)(nil)
+
+type MyStruct struct {
+ Data *MyCharData
+ Attr *MyAttr `xml:",attr"`
+
+ Data2 MyCharData
+ Attr2 MyAttr `xml:",attr"`
+}
+
+func TestUnmarshaler(t *testing.T) {
+ xml := `<?xml version="1.0" encoding="utf-8"?>
+ <MyStruct Attr="attr1" Attr2="attr2">
+ <Data>hello <!-- comment -->world</Data>
+ <Data2>howdy <!-- comment -->world</Data2>
+ </MyStruct>
+ `
+
+ var m MyStruct
+ if err := Unmarshal([]byte(xml), &m); err != nil {
+ t.Fatal(err)
+ }
+
+ if m.Data == nil || m.Attr == nil || m.Data.body != "hello world" || m.Attr.attr != "attr1" || m.Data2.body != "howdy world" || m.Attr2.attr != "attr2" {
+ t.Errorf("m=%#+v\n", m)
+ }
+}
+
+type Pea struct {
+ Cotelydon string
+}
+
+type Pod struct {
+ Pea any `xml:"Pea"`
+}
+
+// https://golang.org/issue/6836
+func TestUnmarshalIntoInterface(t *testing.T) {
+ pod := new(Pod)
+ pod.Pea = new(Pea)
+ xml := `<Pod><Pea><Cotelydon>Green stuff</Cotelydon></Pea></Pod>`
+ err := Unmarshal([]byte(xml), pod)
+ if err != nil {
+ t.Fatalf("failed to unmarshal %q: %v", xml, err)
+ }
+ pea, ok := pod.Pea.(*Pea)
+ if !ok {
+ t.Fatalf("unmarshaled into wrong type: have %T want *Pea", pod.Pea)
+ }
+ have, want := pea.Cotelydon, "Green stuff"
+ if have != want {
+ t.Errorf("failed to unmarshal into interface, have %q want %q", have, want)
+ }
+}
+
+type X struct {
+ D string `xml:",comment"`
+}
+
+// Issue 11112. Unmarshal must reject invalid comments.
+func TestMalformedComment(t *testing.T) {
+ testData := []string{
+ "<X><!-- a---></X>",
+ "<X><!-- -- --></X>",
+ "<X><!-- a--b --></X>",
+ "<X><!------></X>",
+ }
+ for i, test := range testData {
+ data := []byte(test)
+ v := new(X)
+ if err := Unmarshal(data, v); err == nil {
+ t.Errorf("%d: unmarshal should reject invalid comments", i)
+ }
+ }
+}
+
+type IXField struct {
+ Five int `xml:"five"`
+ NotInnerXML []string `xml:",innerxml"`
+}
+
+// Issue 15600. ",innerxml" on a field that can't hold it.
+func TestInvalidInnerXMLType(t *testing.T) {
+ v := new(IXField)
+ if err := Unmarshal([]byte(`<tag><five>5</five><innertag/></tag>`), v); err != nil {
+ t.Errorf("Unmarshal failed: got %v", err)
+ }
+ if v.Five != 5 {
+ t.Errorf("Five = %v, want 5", v.Five)
+ }
+ if v.NotInnerXML != nil {
+ t.Errorf("NotInnerXML = %v, want nil", v.NotInnerXML)
+ }
+}
+
+type Child struct {
+ G struct {
+ I int
+ }
+}
+
+type ChildToEmbed struct {
+ X bool
+}
+
+type Parent struct {
+ I int
+ IPtr *int
+ Is []int
+ IPtrs []*int
+ F float32
+ FPtr *float32
+ Fs []float32
+ FPtrs []*float32
+ B bool
+ BPtr *bool
+ Bs []bool
+ BPtrs []*bool
+ Bytes []byte
+ BytesPtr *[]byte
+ S string
+ SPtr *string
+ Ss []string
+ SPtrs []*string
+ MyI MyInt
+ Child Child
+ Children []Child
+ ChildPtr *Child
+ ChildToEmbed
+}
+
+const (
+ emptyXML = `
+<Parent>
+ <I></I>
+ <IPtr></IPtr>
+ <Is></Is>
+ <IPtrs></IPtrs>
+ <F></F>
+ <FPtr></FPtr>
+ <Fs></Fs>
+ <FPtrs></FPtrs>
+ <B></B>
+ <BPtr></BPtr>
+ <Bs></Bs>
+ <BPtrs></BPtrs>
+ <Bytes></Bytes>
+ <BytesPtr></BytesPtr>
+ <S></S>
+ <SPtr></SPtr>
+ <Ss></Ss>
+ <SPtrs></SPtrs>
+ <MyI></MyI>
+ <Child></Child>
+ <Children></Children>
+ <ChildPtr></ChildPtr>
+ <X></X>
+</Parent>
+`
+)
+
+// golang.org/issues/13417
+func TestUnmarshalEmptyValues(t *testing.T) {
+ // Test first with a zero-valued dst.
+ v := new(Parent)
+ if err := Unmarshal([]byte(emptyXML), v); err != nil {
+ t.Fatalf("zero: Unmarshal failed: got %v", err)
+ }
+
+ zBytes, zInt, zStr, zFloat, zBool := []byte{}, 0, "", float32(0), false
+ want := &Parent{
+ IPtr: &zInt,
+ Is: []int{zInt},
+ IPtrs: []*int{&zInt},
+ FPtr: &zFloat,
+ Fs: []float32{zFloat},
+ FPtrs: []*float32{&zFloat},
+ BPtr: &zBool,
+ Bs: []bool{zBool},
+ BPtrs: []*bool{&zBool},
+ Bytes: []byte{},
+ BytesPtr: &zBytes,
+ SPtr: &zStr,
+ Ss: []string{zStr},
+ SPtrs: []*string{&zStr},
+ Children: []Child{{}},
+ ChildPtr: new(Child),
+ ChildToEmbed: ChildToEmbed{},
+ }
+ if !reflect.DeepEqual(v, want) {
+ t.Fatalf("zero: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+
+ // Test with a pre-populated dst.
+ // Multiple addressable copies, as pointer-to fields will replace value during unmarshal.
+ vBytes0, vInt0, vStr0, vFloat0, vBool0 := []byte("x"), 1, "x", float32(1), true
+ vBytes1, vInt1, vStr1, vFloat1, vBool1 := []byte("x"), 1, "x", float32(1), true
+ vInt2, vStr2, vFloat2, vBool2 := 1, "x", float32(1), true
+ v = &Parent{
+ I: vInt0,
+ IPtr: &vInt1,
+ Is: []int{vInt0},
+ IPtrs: []*int{&vInt2},
+ F: vFloat0,
+ FPtr: &vFloat1,
+ Fs: []float32{vFloat0},
+ FPtrs: []*float32{&vFloat2},
+ B: vBool0,
+ BPtr: &vBool1,
+ Bs: []bool{vBool0},
+ BPtrs: []*bool{&vBool2},
+ Bytes: vBytes0,
+ BytesPtr: &vBytes1,
+ S: vStr0,
+ SPtr: &vStr1,
+ Ss: []string{vStr0},
+ SPtrs: []*string{&vStr2},
+ MyI: MyInt(vInt0),
+ Child: Child{G: struct{ I int }{I: vInt0}},
+ Children: []Child{{G: struct{ I int }{I: vInt0}}},
+ ChildPtr: &Child{G: struct{ I int }{I: vInt0}},
+ ChildToEmbed: ChildToEmbed{X: vBool0},
+ }
+ if err := Unmarshal([]byte(emptyXML), v); err != nil {
+ t.Fatalf("populated: Unmarshal failed: got %v", err)
+ }
+
+ want = &Parent{
+ IPtr: &zInt,
+ Is: []int{vInt0, zInt},
+ IPtrs: []*int{&vInt0, &zInt},
+ FPtr: &zFloat,
+ Fs: []float32{vFloat0, zFloat},
+ FPtrs: []*float32{&vFloat0, &zFloat},
+ BPtr: &zBool,
+ Bs: []bool{vBool0, zBool},
+ BPtrs: []*bool{&vBool0, &zBool},
+ Bytes: []byte{},
+ BytesPtr: &zBytes,
+ SPtr: &zStr,
+ Ss: []string{vStr0, zStr},
+ SPtrs: []*string{&vStr0, &zStr},
+ Child: Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value)
+ Children: []Child{{G: struct{ I int }{I: vInt0}}, {}},
+ ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value)
+ }
+ if !reflect.DeepEqual(v, want) {
+ t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+}
+
+type WhitespaceValuesParent struct {
+ BFalse bool
+ BTrue bool
+ I int
+ INeg int
+ I8 int8
+ I8Neg int8
+ I16 int16
+ I16Neg int16
+ I32 int32
+ I32Neg int32
+ I64 int64
+ I64Neg int64
+ UI uint
+ UI8 uint8
+ UI16 uint16
+ UI32 uint32
+ UI64 uint64
+ F32 float32
+ F32Neg float32
+ F64 float64
+ F64Neg float64
+}
+
+const whitespaceValuesXML = `
+<WhitespaceValuesParent>
+ <BFalse> false </BFalse>
+ <BTrue> true </BTrue>
+ <I> 266703 </I>
+ <INeg> -266703 </INeg>
+ <I8> 112 </I8>
+ <I8Neg> -112 </I8Neg>
+ <I16> 6703 </I16>
+ <I16Neg> -6703 </I16Neg>
+ <I32> 266703 </I32>
+ <I32Neg> -266703 </I32Neg>
+ <I64> 266703 </I64>
+ <I64Neg> -266703 </I64Neg>
+ <UI> 266703 </UI>
+ <UI8> 112 </UI8>
+ <UI16> 6703 </UI16>
+ <UI32> 266703 </UI32>
+ <UI64> 266703 </UI64>
+ <F32> 266.703 </F32>
+ <F32Neg> -266.703 </F32Neg>
+ <F64> 266.703 </F64>
+ <F64Neg> -266.703 </F64Neg>
+</WhitespaceValuesParent>
+`
+
+// golang.org/issues/22146
+func TestUnmarshalWhitespaceValues(t *testing.T) {
+ v := WhitespaceValuesParent{}
+ if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil {
+ t.Fatalf("whitespace values: Unmarshal failed: got %v", err)
+ }
+
+ want := WhitespaceValuesParent{
+ BFalse: false,
+ BTrue: true,
+ I: 266703,
+ INeg: -266703,
+ I8: 112,
+ I8Neg: -112,
+ I16: 6703,
+ I16Neg: -6703,
+ I32: 266703,
+ I32Neg: -266703,
+ I64: 266703,
+ I64Neg: -266703,
+ UI: 266703,
+ UI8: 112,
+ UI16: 6703,
+ UI32: 266703,
+ UI64: 266703,
+ F32: 266.703,
+ F32Neg: -266.703,
+ F64: 266.703,
+ F64Neg: -266.703,
+ }
+ if v != want {
+ t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+}
+
+type WhitespaceAttrsParent struct {
+ BFalse bool `xml:",attr"`
+ BTrue bool `xml:",attr"`
+ I int `xml:",attr"`
+ INeg int `xml:",attr"`
+ I8 int8 `xml:",attr"`
+ I8Neg int8 `xml:",attr"`
+ I16 int16 `xml:",attr"`
+ I16Neg int16 `xml:",attr"`
+ I32 int32 `xml:",attr"`
+ I32Neg int32 `xml:",attr"`
+ I64 int64 `xml:",attr"`
+ I64Neg int64 `xml:",attr"`
+ UI uint `xml:",attr"`
+ UI8 uint8 `xml:",attr"`
+ UI16 uint16 `xml:",attr"`
+ UI32 uint32 `xml:",attr"`
+ UI64 uint64 `xml:",attr"`
+ F32 float32 `xml:",attr"`
+ F32Neg float32 `xml:",attr"`
+ F64 float64 `xml:",attr"`
+ F64Neg float64 `xml:",attr"`
+}
+
+const whitespaceAttrsXML = `
+<WhitespaceAttrsParent
+ BFalse=" false "
+ BTrue=" true "
+ I=" 266703 "
+ INeg=" -266703 "
+ I8=" 112 "
+ I8Neg=" -112 "
+ I16=" 6703 "
+ I16Neg=" -6703 "
+ I32=" 266703 "
+ I32Neg=" -266703 "
+ I64=" 266703 "
+ I64Neg=" -266703 "
+ UI=" 266703 "
+ UI8=" 112 "
+ UI16=" 6703 "
+ UI32=" 266703 "
+ UI64=" 266703 "
+ F32=" 266.703 "
+ F32Neg=" -266.703 "
+ F64=" 266.703 "
+ F64Neg=" -266.703 "
+>
+</WhitespaceAttrsParent>
+`
+
+// golang.org/issues/22146
+func TestUnmarshalWhitespaceAttrs(t *testing.T) {
+ v := WhitespaceAttrsParent{}
+ if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil {
+ t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err)
+ }
+
+ want := WhitespaceAttrsParent{
+ BFalse: false,
+ BTrue: true,
+ I: 266703,
+ INeg: -266703,
+ I8: 112,
+ I8Neg: -112,
+ I16: 6703,
+ I16Neg: -6703,
+ I32: 266703,
+ I32Neg: -266703,
+ I64: 266703,
+ I64Neg: -266703,
+ UI: 266703,
+ UI8: 112,
+ UI16: 6703,
+ UI32: 266703,
+ UI64: 266703,
+ F32: 266.703,
+ F32Neg: -266.703,
+ F64: 266.703,
+ F64Neg: -266.703,
+ }
+ if v != want {
+ t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want)
+ }
+}
+
+// golang.org/issues/53350
+func TestUnmarshalIntoNil(t *testing.T) {
+ type T struct {
+ A int `xml:"A"`
+ }
+
+ var nilPointer *T
+ err := Unmarshal([]byte("<T><A>1</A></T>"), nilPointer)
+
+ if err == nil {
+ t.Fatalf("no error in unmarshalling")
+ }
+
+}
+
+func TestCVE202228131(t *testing.T) {
+ type nested struct {
+ Parent *nested `xml:",any"`
+ }
+ var n nested
+ err := Unmarshal(bytes.Repeat([]byte("<a>"), maxUnmarshalDepth+1), &n)
+ if err == nil {
+ t.Fatal("Unmarshal did not fail")
+ } else if !errors.Is(err, errUnmarshalDepth) {
+ t.Fatalf("Unmarshal unexpected error: got %q, want %q", err, errUnmarshalDepth)
+ }
+}
+
+func TestCVE202230633(t *testing.T) {
+ if testing.Short() || runtime.GOARCH == "wasm" {
+ t.Skip("test requires significant memory")
+ }
+ defer func() {
+ p := recover()
+ if p != nil {
+ t.Fatal("Unmarshal panicked")
+ }
+ }()
+ var example struct {
+ Things []string
+ }
+ Unmarshal(bytes.Repeat([]byte("<a>"), 17_000_000), &example)
+}
diff --git a/src/encoding/xml/typeinfo.go b/src/encoding/xml/typeinfo.go
new file mode 100644
index 0000000..b18ed28
--- /dev/null
+++ b/src/encoding/xml/typeinfo.go
@@ -0,0 +1,367 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "sync"
+)
+
+// typeInfo holds details for the xml representation of a type.
+type typeInfo struct {
+ xmlname *fieldInfo
+ fields []fieldInfo
+}
+
+// fieldInfo holds details for the xml representation of a single field.
+type fieldInfo struct {
+ idx []int
+ name string
+ xmlns string
+ flags fieldFlags
+ parents []string
+}
+
+type fieldFlags int
+
+const (
+ fElement fieldFlags = 1 << iota
+ fAttr
+ fCDATA
+ fCharData
+ fInnerXML
+ fComment
+ fAny
+
+ fOmitEmpty
+
+ fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny
+
+ xmlName = "XMLName"
+)
+
+var tinfoMap sync.Map // map[reflect.Type]*typeInfo
+
+var nameType = reflect.TypeFor[Name]()
+
+// getTypeInfo returns the typeInfo structure with details necessary
+// for marshaling and unmarshaling typ.
+func getTypeInfo(typ reflect.Type) (*typeInfo, error) {
+ if ti, ok := tinfoMap.Load(typ); ok {
+ return ti.(*typeInfo), nil
+ }
+
+ tinfo := &typeInfo{}
+ if typ.Kind() == reflect.Struct && typ != nameType {
+ n := typ.NumField()
+ for i := 0; i < n; i++ {
+ f := typ.Field(i)
+ if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" {
+ continue // Private field
+ }
+
+ // For embedded structs, embed its fields.
+ if f.Anonymous {
+ t := f.Type
+ if t.Kind() == reflect.Pointer {
+ t = t.Elem()
+ }
+ if t.Kind() == reflect.Struct {
+ inner, err := getTypeInfo(t)
+ if err != nil {
+ return nil, err
+ }
+ if tinfo.xmlname == nil {
+ tinfo.xmlname = inner.xmlname
+ }
+ for _, finfo := range inner.fields {
+ finfo.idx = append([]int{i}, finfo.idx...)
+ if err := addFieldInfo(typ, tinfo, &finfo); err != nil {
+ return nil, err
+ }
+ }
+ continue
+ }
+ }
+
+ finfo, err := structFieldInfo(typ, &f)
+ if err != nil {
+ return nil, err
+ }
+
+ if f.Name == xmlName {
+ tinfo.xmlname = finfo
+ continue
+ }
+
+ // Add the field if it doesn't conflict with other fields.
+ if err := addFieldInfo(typ, tinfo, finfo); err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ ti, _ := tinfoMap.LoadOrStore(typ, tinfo)
+ return ti.(*typeInfo), nil
+}
+
+// structFieldInfo builds and returns a fieldInfo for f.
+func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) {
+ finfo := &fieldInfo{idx: f.Index}
+
+ // Split the tag from the xml namespace if necessary.
+ tag := f.Tag.Get("xml")
+ if ns, t, ok := strings.Cut(tag, " "); ok {
+ finfo.xmlns, tag = ns, t
+ }
+
+ // Parse flags.
+ tokens := strings.Split(tag, ",")
+ if len(tokens) == 1 {
+ finfo.flags = fElement
+ } else {
+ tag = tokens[0]
+ for _, flag := range tokens[1:] {
+ switch flag {
+ case "attr":
+ finfo.flags |= fAttr
+ case "cdata":
+ finfo.flags |= fCDATA
+ case "chardata":
+ finfo.flags |= fCharData
+ case "innerxml":
+ finfo.flags |= fInnerXML
+ case "comment":
+ finfo.flags |= fComment
+ case "any":
+ finfo.flags |= fAny
+ case "omitempty":
+ finfo.flags |= fOmitEmpty
+ }
+ }
+
+ // Validate the flags used.
+ valid := true
+ switch mode := finfo.flags & fMode; mode {
+ case 0:
+ finfo.flags |= fElement
+ case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr:
+ if f.Name == xmlName || tag != "" && mode != fAttr {
+ valid = false
+ }
+ default:
+ // This will also catch multiple modes in a single field.
+ valid = false
+ }
+ if finfo.flags&fMode == fAny {
+ finfo.flags |= fElement
+ }
+ if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 {
+ valid = false
+ }
+ if !valid {
+ return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q",
+ f.Name, typ, f.Tag.Get("xml"))
+ }
+ }
+
+ // Use of xmlns without a name is not allowed.
+ if finfo.xmlns != "" && tag == "" {
+ return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q",
+ f.Name, typ, f.Tag.Get("xml"))
+ }
+
+ if f.Name == xmlName {
+ // The XMLName field records the XML element name. Don't
+ // process it as usual because its name should default to
+ // empty rather than to the field name.
+ finfo.name = tag
+ return finfo, nil
+ }
+
+ if tag == "" {
+ // If the name part of the tag is completely empty, get
+ // default from XMLName of underlying struct if feasible,
+ // or field name otherwise.
+ if xmlname := lookupXMLName(f.Type); xmlname != nil {
+ finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name
+ } else {
+ finfo.name = f.Name
+ }
+ return finfo, nil
+ }
+
+ // Prepare field name and parents.
+ parents := strings.Split(tag, ">")
+ if parents[0] == "" {
+ parents[0] = f.Name
+ }
+ if parents[len(parents)-1] == "" {
+ return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ)
+ }
+ finfo.name = parents[len(parents)-1]
+ if len(parents) > 1 {
+ if (finfo.flags & fElement) == 0 {
+ return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ","))
+ }
+ finfo.parents = parents[:len(parents)-1]
+ }
+
+ // If the field type has an XMLName field, the names must match
+ // so that the behavior of both marshaling and unmarshaling
+ // is straightforward and unambiguous.
+ if finfo.flags&fElement != 0 {
+ ftyp := f.Type
+ xmlname := lookupXMLName(ftyp)
+ if xmlname != nil && xmlname.name != finfo.name {
+ return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName",
+ finfo.name, typ, f.Name, xmlname.name, ftyp)
+ }
+ }
+ return finfo, nil
+}
+
+// lookupXMLName returns the fieldInfo for typ's XMLName field
+// in case it exists and has a valid xml field tag, otherwise
+// it returns nil.
+func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) {
+ for typ.Kind() == reflect.Pointer {
+ typ = typ.Elem()
+ }
+ if typ.Kind() != reflect.Struct {
+ return nil
+ }
+ for i, n := 0, typ.NumField(); i < n; i++ {
+ f := typ.Field(i)
+ if f.Name != xmlName {
+ continue
+ }
+ finfo, err := structFieldInfo(typ, &f)
+ if err == nil && finfo.name != "" {
+ return finfo
+ }
+ // Also consider errors as a non-existent field tag
+ // and let getTypeInfo itself report the error.
+ break
+ }
+ return nil
+}
+
+// addFieldInfo adds finfo to tinfo.fields if there are no
+// conflicts, or if conflicts arise from previous fields that were
+// obtained from deeper embedded structures than finfo. In the latter
+// case, the conflicting entries are dropped.
+// A conflict occurs when the path (parent + name) to a field is
+// itself a prefix of another path, or when two paths match exactly.
+// It is okay for field paths to share a common, shorter prefix.
+func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error {
+ var conflicts []int
+Loop:
+ // First, figure all conflicts. Most working code will have none.
+ for i := range tinfo.fields {
+ oldf := &tinfo.fields[i]
+ if oldf.flags&fMode != newf.flags&fMode {
+ continue
+ }
+ if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns {
+ continue
+ }
+ minl := min(len(newf.parents), len(oldf.parents))
+ for p := 0; p < minl; p++ {
+ if oldf.parents[p] != newf.parents[p] {
+ continue Loop
+ }
+ }
+ if len(oldf.parents) > len(newf.parents) {
+ if oldf.parents[len(newf.parents)] == newf.name {
+ conflicts = append(conflicts, i)
+ }
+ } else if len(oldf.parents) < len(newf.parents) {
+ if newf.parents[len(oldf.parents)] == oldf.name {
+ conflicts = append(conflicts, i)
+ }
+ } else {
+ if newf.name == oldf.name && newf.xmlns == oldf.xmlns {
+ conflicts = append(conflicts, i)
+ }
+ }
+ }
+ // Without conflicts, add the new field and return.
+ if conflicts == nil {
+ tinfo.fields = append(tinfo.fields, *newf)
+ return nil
+ }
+
+ // If any conflict is shallower, ignore the new field.
+ // This matches the Go field resolution on embedding.
+ for _, i := range conflicts {
+ if len(tinfo.fields[i].idx) < len(newf.idx) {
+ return nil
+ }
+ }
+
+ // Otherwise, if any of them is at the same depth level, it's an error.
+ for _, i := range conflicts {
+ oldf := &tinfo.fields[i]
+ if len(oldf.idx) == len(newf.idx) {
+ f1 := typ.FieldByIndex(oldf.idx)
+ f2 := typ.FieldByIndex(newf.idx)
+ return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")}
+ }
+ }
+
+ // Otherwise, the new field is shallower, and thus takes precedence,
+ // so drop the conflicting fields from tinfo and append the new one.
+ for c := len(conflicts) - 1; c >= 0; c-- {
+ i := conflicts[c]
+ copy(tinfo.fields[i:], tinfo.fields[i+1:])
+ tinfo.fields = tinfo.fields[:len(tinfo.fields)-1]
+ }
+ tinfo.fields = append(tinfo.fields, *newf)
+ return nil
+}
+
+// A TagPathError represents an error in the unmarshaling process
+// caused by the use of field tags with conflicting paths.
+type TagPathError struct {
+ Struct reflect.Type
+ Field1, Tag1 string
+ Field2, Tag2 string
+}
+
+func (e *TagPathError) Error() string {
+ return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2)
+}
+
+const (
+ initNilPointers = true
+ dontInitNilPointers = false
+)
+
+// value returns v's field value corresponding to finfo.
+// It's equivalent to v.FieldByIndex(finfo.idx), but when passed
+// initNilPointers, it initializes and dereferences pointers as necessary.
+// When passed dontInitNilPointers and a nil pointer is reached, the function
+// returns a zero reflect.Value.
+func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value {
+ for i, x := range finfo.idx {
+ if i > 0 {
+ t := v.Type()
+ if t.Kind() == reflect.Pointer && t.Elem().Kind() == reflect.Struct {
+ if v.IsNil() {
+ if !shouldInitNilPointers {
+ return reflect.Value{}
+ }
+ v.Set(reflect.New(v.Type().Elem()))
+ }
+ v = v.Elem()
+ }
+ }
+ v = v.Field(x)
+ }
+ return v
+}
diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go
new file mode 100644
index 0000000..73eedad
--- /dev/null
+++ b/src/encoding/xml/xml.go
@@ -0,0 +1,2060 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package xml implements a simple XML 1.0 parser that
+// understands XML name spaces.
+package xml
+
+// References:
+// Annotated XML spec: https://www.xml.com/axml/testaxml.htm
+// XML name spaces: https://www.w3.org/TR/REC-xml-names/
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// A SyntaxError represents a syntax error in the XML input stream.
+type SyntaxError struct {
+ Msg string
+ Line int
+}
+
+func (e *SyntaxError) Error() string {
+ return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg
+}
+
+// A Name represents an XML name (Local) annotated
+// with a name space identifier (Space).
+// In tokens returned by [Decoder.Token], the Space identifier
+// is given as a canonical URL, not the short prefix used
+// in the document being parsed.
+type Name struct {
+ Space, Local string
+}
+
+// An Attr represents an attribute in an XML element (Name=Value).
+type Attr struct {
+ Name Name
+ Value string
+}
+
+// A Token is an interface holding one of the token types:
+// [StartElement], [EndElement], [CharData], [Comment], [ProcInst], or [Directive].
+type Token any
+
+// A StartElement represents an XML start element.
+type StartElement struct {
+ Name Name
+ Attr []Attr
+}
+
+// Copy creates a new copy of StartElement.
+func (e StartElement) Copy() StartElement {
+ attrs := make([]Attr, len(e.Attr))
+ copy(attrs, e.Attr)
+ e.Attr = attrs
+ return e
+}
+
+// End returns the corresponding XML end element.
+func (e StartElement) End() EndElement {
+ return EndElement{e.Name}
+}
+
+// An EndElement represents an XML end element.
+type EndElement struct {
+ Name Name
+}
+
+// A CharData represents XML character data (raw text),
+// in which XML escape sequences have been replaced by
+// the characters they represent.
+type CharData []byte
+
+// Copy creates a new copy of CharData.
+func (c CharData) Copy() CharData { return CharData(bytes.Clone(c)) }
+
+// A Comment represents an XML comment of the form <!--comment-->.
+// The bytes do not include the <!-- and --> comment markers.
+type Comment []byte
+
+// Copy creates a new copy of Comment.
+func (c Comment) Copy() Comment { return Comment(bytes.Clone(c)) }
+
+// A ProcInst represents an XML processing instruction of the form <?target inst?>
+type ProcInst struct {
+ Target string
+ Inst []byte
+}
+
+// Copy creates a new copy of ProcInst.
+func (p ProcInst) Copy() ProcInst {
+ p.Inst = bytes.Clone(p.Inst)
+ return p
+}
+
+// A Directive represents an XML directive of the form <!text>.
+// The bytes do not include the <! and > markers.
+type Directive []byte
+
+// Copy creates a new copy of Directive.
+func (d Directive) Copy() Directive { return Directive(bytes.Clone(d)) }
+
+// CopyToken returns a copy of a Token.
+func CopyToken(t Token) Token {
+ switch v := t.(type) {
+ case CharData:
+ return v.Copy()
+ case Comment:
+ return v.Copy()
+ case Directive:
+ return v.Copy()
+ case ProcInst:
+ return v.Copy()
+ case StartElement:
+ return v.Copy()
+ }
+ return t
+}
+
+// A TokenReader is anything that can decode a stream of XML tokens, including a
+// [Decoder].
+//
+// When Token encounters an error or end-of-file condition after successfully
+// reading a token, it returns the token. It may return the (non-nil) error from
+// the same call or return the error (and a nil token) from a subsequent call.
+// An instance of this general case is that a TokenReader returning a non-nil
+// token at the end of the token stream may return either io.EOF or a nil error.
+// The next Read should return nil, [io.EOF].
+//
+// Implementations of Token are discouraged from returning a nil token with a
+// nil error. Callers should treat a return of nil, nil as indicating that
+// nothing happened; in particular it does not indicate EOF.
+type TokenReader interface {
+ Token() (Token, error)
+}
+
+// A Decoder represents an XML parser reading a particular input stream.
+// The parser assumes that its input is encoded in UTF-8.
+type Decoder struct {
+ // Strict defaults to true, enforcing the requirements
+ // of the XML specification.
+ // If set to false, the parser allows input containing common
+ // mistakes:
+ // * If an element is missing an end tag, the parser invents
+ // end tags as necessary to keep the return values from Token
+ // properly balanced.
+ // * In attribute values and character data, unknown or malformed
+ // character entities (sequences beginning with &) are left alone.
+ //
+ // Setting:
+ //
+ // d.Strict = false
+ // d.AutoClose = xml.HTMLAutoClose
+ // d.Entity = xml.HTMLEntity
+ //
+ // creates a parser that can handle typical HTML.
+ //
+ // Strict mode does not enforce the requirements of the XML name spaces TR.
+ // In particular it does not reject name space tags using undefined prefixes.
+ // Such tags are recorded with the unknown prefix as the name space URL.
+ Strict bool
+
+ // When Strict == false, AutoClose indicates a set of elements to
+ // consider closed immediately after they are opened, regardless
+ // of whether an end element is present.
+ AutoClose []string
+
+ // Entity can be used to map non-standard entity names to string replacements.
+ // The parser behaves as if these standard mappings are present in the map,
+ // regardless of the actual map content:
+ //
+ // "lt": "<",
+ // "gt": ">",
+ // "amp": "&",
+ // "apos": "'",
+ // "quot": `"`,
+ Entity map[string]string
+
+ // CharsetReader, if non-nil, defines a function to generate
+ // charset-conversion readers, converting from the provided
+ // non-UTF-8 charset into UTF-8. If CharsetReader is nil or
+ // returns an error, parsing stops with an error. One of the
+ // CharsetReader's result values must be non-nil.
+ CharsetReader func(charset string, input io.Reader) (io.Reader, error)
+
+ // DefaultSpace sets the default name space used for unadorned tags,
+ // as if the entire XML stream were wrapped in an element containing
+ // the attribute xmlns="DefaultSpace".
+ DefaultSpace string
+
+ r io.ByteReader
+ t TokenReader
+ buf bytes.Buffer
+ saved *bytes.Buffer
+ stk *stack
+ free *stack
+ needClose bool
+ toClose Name
+ nextToken Token
+ nextByte int
+ ns map[string]string
+ err error
+ line int
+ linestart int64
+ offset int64
+ unmarshalDepth int
+}
+
+// NewDecoder creates a new XML parser reading from r.
+// If r does not implement [io.ByteReader], NewDecoder will
+// do its own buffering.
+func NewDecoder(r io.Reader) *Decoder {
+ d := &Decoder{
+ ns: make(map[string]string),
+ nextByte: -1,
+ line: 1,
+ Strict: true,
+ }
+ d.switchToReader(r)
+ return d
+}
+
+// NewTokenDecoder creates a new XML parser using an underlying token stream.
+func NewTokenDecoder(t TokenReader) *Decoder {
+ // Is it already a Decoder?
+ if d, ok := t.(*Decoder); ok {
+ return d
+ }
+ d := &Decoder{
+ ns: make(map[string]string),
+ t: t,
+ nextByte: -1,
+ line: 1,
+ Strict: true,
+ }
+ return d
+}
+
+// Token returns the next XML token in the input stream.
+// At the end of the input stream, Token returns nil, [io.EOF].
+//
+// Slices of bytes in the returned token data refer to the
+// parser's internal buffer and remain valid only until the next
+// call to Token. To acquire a copy of the bytes, call [CopyToken]
+// or the token's Copy method.
+//
+// Token expands self-closing elements such as <br>
+// into separate start and end elements returned by successive calls.
+//
+// Token guarantees that the [StartElement] and [EndElement]
+// tokens it returns are properly nested and matched:
+// if Token encounters an unexpected end element
+// or EOF before all expected end elements,
+// it will return an error.
+//
+// If [Decoder.CharsetReader] is called and returns an error,
+// the error is wrapped and returned.
+//
+// Token implements XML name spaces as described by
+// https://www.w3.org/TR/REC-xml-names/. Each of the
+// [Name] structures contained in the Token has the Space
+// set to the URL identifying its name space when known.
+// If Token encounters an unrecognized name space prefix,
+// it uses the prefix as the Space rather than report an error.
+func (d *Decoder) Token() (Token, error) {
+ var t Token
+ var err error
+ if d.stk != nil && d.stk.kind == stkEOF {
+ return nil, io.EOF
+ }
+ if d.nextToken != nil {
+ t = d.nextToken
+ d.nextToken = nil
+ } else {
+ if t, err = d.rawToken(); t == nil && err != nil {
+ if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
+ err = d.syntaxError("unexpected EOF")
+ }
+ return nil, err
+ }
+ // We still have a token to process, so clear any
+ // errors (e.g. EOF) and proceed.
+ err = nil
+ }
+ if !d.Strict {
+ if t1, ok := d.autoClose(t); ok {
+ d.nextToken = t
+ t = t1
+ }
+ }
+ switch t1 := t.(type) {
+ case StartElement:
+ // In XML name spaces, the translations listed in the
+ // attributes apply to the element name and
+ // to the other attribute names, so process
+ // the translations first.
+ for _, a := range t1.Attr {
+ if a.Name.Space == xmlnsPrefix {
+ v, ok := d.ns[a.Name.Local]
+ d.pushNs(a.Name.Local, v, ok)
+ d.ns[a.Name.Local] = a.Value
+ }
+ if a.Name.Space == "" && a.Name.Local == xmlnsPrefix {
+ // Default space for untagged names
+ v, ok := d.ns[""]
+ d.pushNs("", v, ok)
+ d.ns[""] = a.Value
+ }
+ }
+
+ d.pushElement(t1.Name)
+ d.translate(&t1.Name, true)
+ for i := range t1.Attr {
+ d.translate(&t1.Attr[i].Name, false)
+ }
+ t = t1
+
+ case EndElement:
+ if !d.popElement(&t1) {
+ return nil, d.err
+ }
+ t = t1
+ }
+ return t, err
+}
+
+const (
+ xmlURL = "http://www.w3.org/XML/1998/namespace"
+ xmlnsPrefix = "xmlns"
+ xmlPrefix = "xml"
+)
+
+// Apply name space translation to name n.
+// The default name space (for Space=="")
+// applies only to element names, not to attribute names.
+func (d *Decoder) translate(n *Name, isElementName bool) {
+ switch {
+ case n.Space == xmlnsPrefix:
+ return
+ case n.Space == "" && !isElementName:
+ return
+ case n.Space == xmlPrefix:
+ n.Space = xmlURL
+ case n.Space == "" && n.Local == xmlnsPrefix:
+ return
+ }
+ if v, ok := d.ns[n.Space]; ok {
+ n.Space = v
+ } else if n.Space == "" {
+ n.Space = d.DefaultSpace
+ }
+}
+
+func (d *Decoder) switchToReader(r io.Reader) {
+ // Get efficient byte at a time reader.
+ // Assume that if reader has its own
+ // ReadByte, it's efficient enough.
+ // Otherwise, use bufio.
+ if rb, ok := r.(io.ByteReader); ok {
+ d.r = rb
+ } else {
+ d.r = bufio.NewReader(r)
+ }
+}
+
+// Parsing state - stack holds old name space translations
+// and the current set of open elements. The translations to pop when
+// ending a given tag are *below* it on the stack, which is
+// more work but forced on us by XML.
+type stack struct {
+ next *stack
+ kind int
+ name Name
+ ok bool
+}
+
+const (
+ stkStart = iota
+ stkNs
+ stkEOF
+)
+
+func (d *Decoder) push(kind int) *stack {
+ s := d.free
+ if s != nil {
+ d.free = s.next
+ } else {
+ s = new(stack)
+ }
+ s.next = d.stk
+ s.kind = kind
+ d.stk = s
+ return s
+}
+
+func (d *Decoder) pop() *stack {
+ s := d.stk
+ if s != nil {
+ d.stk = s.next
+ s.next = d.free
+ d.free = s
+ }
+ return s
+}
+
+// Record that after the current element is finished
+// (that element is already pushed on the stack)
+// Token should return EOF until popEOF is called.
+func (d *Decoder) pushEOF() {
+ // Walk down stack to find Start.
+ // It might not be the top, because there might be stkNs
+ // entries above it.
+ start := d.stk
+ for start.kind != stkStart {
+ start = start.next
+ }
+ // The stkNs entries below a start are associated with that
+ // element too; skip over them.
+ for start.next != nil && start.next.kind == stkNs {
+ start = start.next
+ }
+ s := d.free
+ if s != nil {
+ d.free = s.next
+ } else {
+ s = new(stack)
+ }
+ s.kind = stkEOF
+ s.next = start.next
+ start.next = s
+}
+
+// Undo a pushEOF.
+// The element must have been finished, so the EOF should be at the top of the stack.
+func (d *Decoder) popEOF() bool {
+ if d.stk == nil || d.stk.kind != stkEOF {
+ return false
+ }
+ d.pop()
+ return true
+}
+
+// Record that we are starting an element with the given name.
+func (d *Decoder) pushElement(name Name) {
+ s := d.push(stkStart)
+ s.name = name
+}
+
+// Record that we are changing the value of ns[local].
+// The old value is url, ok.
+func (d *Decoder) pushNs(local string, url string, ok bool) {
+ s := d.push(stkNs)
+ s.name.Local = local
+ s.name.Space = url
+ s.ok = ok
+}
+
+// Creates a SyntaxError with the current line number.
+func (d *Decoder) syntaxError(msg string) error {
+ return &SyntaxError{Msg: msg, Line: d.line}
+}
+
+// Record that we are ending an element with the given name.
+// The name must match the record at the top of the stack,
+// which must be a pushElement record.
+// After popping the element, apply any undo records from
+// the stack to restore the name translations that existed
+// before we saw this element.
+func (d *Decoder) popElement(t *EndElement) bool {
+ s := d.pop()
+ name := t.Name
+ switch {
+ case s == nil || s.kind != stkStart:
+ d.err = d.syntaxError("unexpected end element </" + name.Local + ">")
+ return false
+ case s.name.Local != name.Local:
+ if !d.Strict {
+ d.needClose = true
+ d.toClose = t.Name
+ t.Name = s.name
+ return true
+ }
+ d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
+ return false
+ case s.name.Space != name.Space:
+ d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
+ " closed by </" + name.Local + "> in space " + name.Space)
+ return false
+ }
+
+ d.translate(&t.Name, true)
+
+ // Pop stack until a Start or EOF is on the top, undoing the
+ // translations that were associated with the element we just closed.
+ for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF {
+ s := d.pop()
+ if s.ok {
+ d.ns[s.name.Local] = s.name.Space
+ } else {
+ delete(d.ns, s.name.Local)
+ }
+ }
+
+ return true
+}
+
+// If the top element on the stack is autoclosing and
+// t is not the end tag, invent the end tag.
+func (d *Decoder) autoClose(t Token) (Token, bool) {
+ if d.stk == nil || d.stk.kind != stkStart {
+ return nil, false
+ }
+ for _, s := range d.AutoClose {
+ if strings.EqualFold(s, d.stk.name.Local) {
+ // This one should be auto closed if t doesn't close it.
+ et, ok := t.(EndElement)
+ if !ok || !strings.EqualFold(et.Name.Local, d.stk.name.Local) {
+ return EndElement{d.stk.name}, true
+ }
+ break
+ }
+ }
+ return nil, false
+}
+
+var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method")
+
+// RawToken is like [Decoder.Token] but does not verify that
+// start and end elements match and does not translate
+// name space prefixes to their corresponding URLs.
+func (d *Decoder) RawToken() (Token, error) {
+ if d.unmarshalDepth > 0 {
+ return nil, errRawToken
+ }
+ return d.rawToken()
+}
+
+func (d *Decoder) rawToken() (Token, error) {
+ if d.t != nil {
+ return d.t.Token()
+ }
+ if d.err != nil {
+ return nil, d.err
+ }
+ if d.needClose {
+ // The last element we read was self-closing and
+ // we returned just the StartElement half.
+ // Return the EndElement half now.
+ d.needClose = false
+ return EndElement{d.toClose}, nil
+ }
+
+ b, ok := d.getc()
+ if !ok {
+ return nil, d.err
+ }
+
+ if b != '<' {
+ // Text section.
+ d.ungetc(b)
+ data := d.text(-1, false)
+ if data == nil {
+ return nil, d.err
+ }
+ return CharData(data), nil
+ }
+
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ switch b {
+ case '/':
+ // </: End element
+ var name Name
+ if name, ok = d.nsname(); !ok {
+ if d.err == nil {
+ d.err = d.syntaxError("expected element name after </")
+ }
+ return nil, d.err
+ }
+ d.space()
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != '>' {
+ d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
+ return nil, d.err
+ }
+ return EndElement{name}, nil
+
+ case '?':
+ // <?: Processing instruction.
+ var target string
+ if target, ok = d.name(); !ok {
+ if d.err == nil {
+ d.err = d.syntaxError("expected target name after <?")
+ }
+ return nil, d.err
+ }
+ d.space()
+ d.buf.Reset()
+ var b0 byte
+ for {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ d.buf.WriteByte(b)
+ if b0 == '?' && b == '>' {
+ break
+ }
+ b0 = b
+ }
+ data := d.buf.Bytes()
+ data = data[0 : len(data)-2] // chop ?>
+
+ if target == "xml" {
+ content := string(data)
+ ver := procInst("version", content)
+ if ver != "" && ver != "1.0" {
+ d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver)
+ return nil, d.err
+ }
+ enc := procInst("encoding", content)
+ if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
+ if d.CharsetReader == nil {
+ d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc)
+ return nil, d.err
+ }
+ newr, err := d.CharsetReader(enc, d.r.(io.Reader))
+ if err != nil {
+ d.err = fmt.Errorf("xml: opening charset %q: %w", enc, err)
+ return nil, d.err
+ }
+ if newr == nil {
+ panic("CharsetReader returned a nil Reader for charset " + enc)
+ }
+ d.switchToReader(newr)
+ }
+ }
+ return ProcInst{target, data}, nil
+
+ case '!':
+ // <!: Maybe comment, maybe CDATA.
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ switch b {
+ case '-': // <!-
+ // Probably <!-- for a comment.
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != '-' {
+ d.err = d.syntaxError("invalid sequence <!- not part of <!--")
+ return nil, d.err
+ }
+ // Look for terminator.
+ d.buf.Reset()
+ var b0, b1 byte
+ for {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ d.buf.WriteByte(b)
+ if b0 == '-' && b1 == '-' {
+ if b != '>' {
+ d.err = d.syntaxError(
+ `invalid sequence "--" not allowed in comments`)
+ return nil, d.err
+ }
+ break
+ }
+ b0, b1 = b1, b
+ }
+ data := d.buf.Bytes()
+ data = data[0 : len(data)-3] // chop -->
+ return Comment(data), nil
+
+ case '[': // <![
+ // Probably <![CDATA[.
+ for i := 0; i < 6; i++ {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != "CDATA["[i] {
+ d.err = d.syntaxError("invalid <![ sequence")
+ return nil, d.err
+ }
+ }
+ // Have <![CDATA[. Read text until ]]>.
+ data := d.text(-1, true)
+ if data == nil {
+ return nil, d.err
+ }
+ return CharData(data), nil
+ }
+
+ // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc.
+ // We don't care, but accumulate for caller. Quoted angle
+ // brackets do not count for nesting.
+ d.buf.Reset()
+ d.buf.WriteByte(b)
+ inquote := uint8(0)
+ depth := 0
+ for {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if inquote == 0 && b == '>' && depth == 0 {
+ break
+ }
+ HandleB:
+ d.buf.WriteByte(b)
+ switch {
+ case b == inquote:
+ inquote = 0
+
+ case inquote != 0:
+ // in quotes, no special action
+
+ case b == '\'' || b == '"':
+ inquote = b
+
+ case b == '>' && inquote == 0:
+ depth--
+
+ case b == '<' && inquote == 0:
+ // Look for <!-- to begin comment.
+ s := "!--"
+ for i := 0; i < len(s); i++ {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != s[i] {
+ for j := 0; j < i; j++ {
+ d.buf.WriteByte(s[j])
+ }
+ depth++
+ goto HandleB
+ }
+ }
+
+ // Remove < that was written above.
+ d.buf.Truncate(d.buf.Len() - 1)
+
+ // Look for terminator.
+ var b0, b1 byte
+ for {
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b0 == '-' && b1 == '-' && b == '>' {
+ break
+ }
+ b0, b1 = b1, b
+ }
+
+ // Replace the comment with a space in the returned Directive
+ // body, so that markup parts that were separated by the comment
+ // (like a "<" and a "!") don't get joined when re-encoding the
+ // Directive, taking new semantic meaning.
+ d.buf.WriteByte(' ')
+ }
+ }
+ return Directive(d.buf.Bytes()), nil
+ }
+
+ // Must be an open element like <a href="foo">
+ d.ungetc(b)
+
+ var (
+ name Name
+ empty bool
+ attr []Attr
+ )
+ if name, ok = d.nsname(); !ok {
+ if d.err == nil {
+ d.err = d.syntaxError("expected element name after <")
+ }
+ return nil, d.err
+ }
+
+ attr = []Attr{}
+ for {
+ d.space()
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b == '/' {
+ empty = true
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != '>' {
+ d.err = d.syntaxError("expected /> in element")
+ return nil, d.err
+ }
+ break
+ }
+ if b == '>' {
+ break
+ }
+ d.ungetc(b)
+
+ a := Attr{}
+ if a.Name, ok = d.nsname(); !ok {
+ if d.err == nil {
+ d.err = d.syntaxError("expected attribute name in element")
+ }
+ return nil, d.err
+ }
+ d.space()
+ if b, ok = d.mustgetc(); !ok {
+ return nil, d.err
+ }
+ if b != '=' {
+ if d.Strict {
+ d.err = d.syntaxError("attribute name without = in element")
+ return nil, d.err
+ }
+ d.ungetc(b)
+ a.Value = a.Name.Local
+ } else {
+ d.space()
+ data := d.attrval()
+ if data == nil {
+ return nil, d.err
+ }
+ a.Value = string(data)
+ }
+ attr = append(attr, a)
+ }
+ if empty {
+ d.needClose = true
+ d.toClose = name
+ }
+ return StartElement{name, attr}, nil
+}
+
+func (d *Decoder) attrval() []byte {
+ b, ok := d.mustgetc()
+ if !ok {
+ return nil
+ }
+ // Handle quoted attribute values
+ if b == '"' || b == '\'' {
+ return d.text(int(b), false)
+ }
+ // Handle unquoted attribute values for strict parsers
+ if d.Strict {
+ d.err = d.syntaxError("unquoted or missing attribute value in element")
+ return nil
+ }
+ // Handle unquoted attribute values for unstrict parsers
+ d.ungetc(b)
+ d.buf.Reset()
+ for {
+ b, ok = d.mustgetc()
+ if !ok {
+ return nil
+ }
+ // https://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2
+ if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
+ '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
+ d.buf.WriteByte(b)
+ } else {
+ d.ungetc(b)
+ break
+ }
+ }
+ return d.buf.Bytes()
+}
+
+// Skip spaces if any
+func (d *Decoder) space() {
+ for {
+ b, ok := d.getc()
+ if !ok {
+ return
+ }
+ switch b {
+ case ' ', '\r', '\n', '\t':
+ default:
+ d.ungetc(b)
+ return
+ }
+ }
+}
+
+// Read a single byte.
+// If there is no byte to read, return ok==false
+// and leave the error in d.err.
+// Maintain line number.
+func (d *Decoder) getc() (b byte, ok bool) {
+ if d.err != nil {
+ return 0, false
+ }
+ if d.nextByte >= 0 {
+ b = byte(d.nextByte)
+ d.nextByte = -1
+ } else {
+ b, d.err = d.r.ReadByte()
+ if d.err != nil {
+ return 0, false
+ }
+ if d.saved != nil {
+ d.saved.WriteByte(b)
+ }
+ }
+ if b == '\n' {
+ d.line++
+ d.linestart = d.offset + 1
+ }
+ d.offset++
+ return b, true
+}
+
+// InputOffset returns the input stream byte offset of the current decoder position.
+// The offset gives the location of the end of the most recently returned token
+// and the beginning of the next token.
+func (d *Decoder) InputOffset() int64 {
+ return d.offset
+}
+
+// InputPos returns the line of the current decoder position and the 1 based
+// input position of the line. The position gives the location of the end of the
+// most recently returned token.
+func (d *Decoder) InputPos() (line, column int) {
+ return d.line, int(d.offset-d.linestart) + 1
+}
+
+// Return saved offset.
+// If we did ungetc (nextByte >= 0), have to back up one.
+func (d *Decoder) savedOffset() int {
+ n := d.saved.Len()
+ if d.nextByte >= 0 {
+ n--
+ }
+ return n
+}
+
+// Must read a single byte.
+// If there is no byte to read,
+// set d.err to SyntaxError("unexpected EOF")
+// and return ok==false
+func (d *Decoder) mustgetc() (b byte, ok bool) {
+ if b, ok = d.getc(); !ok {
+ if d.err == io.EOF {
+ d.err = d.syntaxError("unexpected EOF")
+ }
+ }
+ return
+}
+
+// Unread a single byte.
+func (d *Decoder) ungetc(b byte) {
+ if b == '\n' {
+ d.line--
+ }
+ d.nextByte = int(b)
+ d.offset--
+}
+
+var entity = map[string]rune{
+ "lt": '<',
+ "gt": '>',
+ "amp": '&',
+ "apos": '\'',
+ "quot": '"',
+}
+
+// Read plain text section (XML calls it character data).
+// If quote >= 0, we are in a quoted string and need to find the matching quote.
+// If cdata == true, we are in a <![CDATA[ section and need to find ]]>.
+// On failure return nil and leave the error in d.err.
+func (d *Decoder) text(quote int, cdata bool) []byte {
+ var b0, b1 byte
+ var trunc int
+ d.buf.Reset()
+Input:
+ for {
+ b, ok := d.getc()
+ if !ok {
+ if cdata {
+ if d.err == io.EOF {
+ d.err = d.syntaxError("unexpected EOF in CDATA section")
+ }
+ return nil
+ }
+ break Input
+ }
+
+ // <![CDATA[ section ends with ]]>.
+ // It is an error for ]]> to appear in ordinary text.
+ if b0 == ']' && b1 == ']' && b == '>' {
+ if cdata {
+ trunc = 2
+ break Input
+ }
+ d.err = d.syntaxError("unescaped ]]> not in CDATA section")
+ return nil
+ }
+
+ // Stop reading text if we see a <.
+ if b == '<' && !cdata {
+ if quote >= 0 {
+ d.err = d.syntaxError("unescaped < inside quoted string")
+ return nil
+ }
+ d.ungetc('<')
+ break Input
+ }
+ if quote >= 0 && b == byte(quote) {
+ break Input
+ }
+ if b == '&' && !cdata {
+ // Read escaped character expression up to semicolon.
+ // XML in all its glory allows a document to define and use
+ // its own character names with <!ENTITY ...> directives.
+ // Parsers are required to recognize lt, gt, amp, apos, and quot
+ // even if they have not been declared.
+ before := d.buf.Len()
+ d.buf.WriteByte('&')
+ var ok bool
+ var text string
+ var haveText bool
+ if b, ok = d.mustgetc(); !ok {
+ return nil
+ }
+ if b == '#' {
+ d.buf.WriteByte(b)
+ if b, ok = d.mustgetc(); !ok {
+ return nil
+ }
+ base := 10
+ if b == 'x' {
+ base = 16
+ d.buf.WriteByte(b)
+ if b, ok = d.mustgetc(); !ok {
+ return nil
+ }
+ }
+ start := d.buf.Len()
+ for '0' <= b && b <= '9' ||
+ base == 16 && 'a' <= b && b <= 'f' ||
+ base == 16 && 'A' <= b && b <= 'F' {
+ d.buf.WriteByte(b)
+ if b, ok = d.mustgetc(); !ok {
+ return nil
+ }
+ }
+ if b != ';' {
+ d.ungetc(b)
+ } else {
+ s := string(d.buf.Bytes()[start:])
+ d.buf.WriteByte(';')
+ n, err := strconv.ParseUint(s, base, 64)
+ if err == nil && n <= unicode.MaxRune {
+ text = string(rune(n))
+ haveText = true
+ }
+ }
+ } else {
+ d.ungetc(b)
+ if !d.readName() {
+ if d.err != nil {
+ return nil
+ }
+ }
+ if b, ok = d.mustgetc(); !ok {
+ return nil
+ }
+ if b != ';' {
+ d.ungetc(b)
+ } else {
+ name := d.buf.Bytes()[before+1:]
+ d.buf.WriteByte(';')
+ if isName(name) {
+ s := string(name)
+ if r, ok := entity[s]; ok {
+ text = string(r)
+ haveText = true
+ } else if d.Entity != nil {
+ text, haveText = d.Entity[s]
+ }
+ }
+ }
+ }
+
+ if haveText {
+ d.buf.Truncate(before)
+ d.buf.WriteString(text)
+ b0, b1 = 0, 0
+ continue Input
+ }
+ if !d.Strict {
+ b0, b1 = 0, 0
+ continue Input
+ }
+ ent := string(d.buf.Bytes()[before:])
+ if ent[len(ent)-1] != ';' {
+ ent += " (no semicolon)"
+ }
+ d.err = d.syntaxError("invalid character entity " + ent)
+ return nil
+ }
+
+ // We must rewrite unescaped \r and \r\n into \n.
+ if b == '\r' {
+ d.buf.WriteByte('\n')
+ } else if b1 == '\r' && b == '\n' {
+ // Skip \r\n--we already wrote \n.
+ } else {
+ d.buf.WriteByte(b)
+ }
+
+ b0, b1 = b1, b
+ }
+ data := d.buf.Bytes()
+ data = data[0 : len(data)-trunc]
+
+ // Inspect each rune for being a disallowed character.
+ buf := data
+ for len(buf) > 0 {
+ r, size := utf8.DecodeRune(buf)
+ if r == utf8.RuneError && size == 1 {
+ d.err = d.syntaxError("invalid UTF-8")
+ return nil
+ }
+ buf = buf[size:]
+ if !isInCharacterRange(r) {
+ d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r))
+ return nil
+ }
+ }
+
+ return data
+}
+
+// Decide whether the given rune is in the XML Character Range, per
+// the Char production of https://www.xml.com/axml/testaxml.htm,
+// Section 2.2 Characters.
+func isInCharacterRange(r rune) (inrange bool) {
+ return r == 0x09 ||
+ r == 0x0A ||
+ r == 0x0D ||
+ r >= 0x20 && r <= 0xD7FF ||
+ r >= 0xE000 && r <= 0xFFFD ||
+ r >= 0x10000 && r <= 0x10FFFF
+}
+
+// Get name space name: name with a : stuck in the middle.
+// The part before the : is the name space identifier.
+func (d *Decoder) nsname() (name Name, ok bool) {
+ s, ok := d.name()
+ if !ok {
+ return
+ }
+ if strings.Count(s, ":") > 1 {
+ return name, false
+ } else if space, local, ok := strings.Cut(s, ":"); !ok || space == "" || local == "" {
+ name.Local = s
+ } else {
+ name.Space = space
+ name.Local = local
+ }
+ return name, true
+}
+
+// Get name: /first(first|second)*/
+// Do not set d.err if the name is missing (unless unexpected EOF is received):
+// let the caller provide better context.
+func (d *Decoder) name() (s string, ok bool) {
+ d.buf.Reset()
+ if !d.readName() {
+ return "", false
+ }
+
+ // Now we check the characters.
+ b := d.buf.Bytes()
+ if !isName(b) {
+ d.err = d.syntaxError("invalid XML name: " + string(b))
+ return "", false
+ }
+ return string(b), true
+}
+
+// Read a name and append its bytes to d.buf.
+// The name is delimited by any single-byte character not valid in names.
+// All multi-byte characters are accepted; the caller must check their validity.
+func (d *Decoder) readName() (ok bool) {
+ var b byte
+ if b, ok = d.mustgetc(); !ok {
+ return
+ }
+ if b < utf8.RuneSelf && !isNameByte(b) {
+ d.ungetc(b)
+ return false
+ }
+ d.buf.WriteByte(b)
+
+ for {
+ if b, ok = d.mustgetc(); !ok {
+ return
+ }
+ if b < utf8.RuneSelf && !isNameByte(b) {
+ d.ungetc(b)
+ break
+ }
+ d.buf.WriteByte(b)
+ }
+ return true
+}
+
+func isNameByte(c byte) bool {
+ return 'A' <= c && c <= 'Z' ||
+ 'a' <= c && c <= 'z' ||
+ '0' <= c && c <= '9' ||
+ c == '_' || c == ':' || c == '.' || c == '-'
+}
+
+func isName(s []byte) bool {
+ if len(s) == 0 {
+ return false
+ }
+ c, n := utf8.DecodeRune(s)
+ if c == utf8.RuneError && n == 1 {
+ return false
+ }
+ if !unicode.Is(first, c) {
+ return false
+ }
+ for n < len(s) {
+ s = s[n:]
+ c, n = utf8.DecodeRune(s)
+ if c == utf8.RuneError && n == 1 {
+ return false
+ }
+ if !unicode.Is(first, c) && !unicode.Is(second, c) {
+ return false
+ }
+ }
+ return true
+}
+
+func isNameString(s string) bool {
+ if len(s) == 0 {
+ return false
+ }
+ c, n := utf8.DecodeRuneInString(s)
+ if c == utf8.RuneError && n == 1 {
+ return false
+ }
+ if !unicode.Is(first, c) {
+ return false
+ }
+ for n < len(s) {
+ s = s[n:]
+ c, n = utf8.DecodeRuneInString(s)
+ if c == utf8.RuneError && n == 1 {
+ return false
+ }
+ if !unicode.Is(first, c) && !unicode.Is(second, c) {
+ return false
+ }
+ }
+ return true
+}
+
+// These tables were generated by cut and paste from Appendix B of
+// the XML spec at https://www.xml.com/axml/testaxml.htm
+// and then reformatting. First corresponds to (Letter | '_' | ':')
+// and second corresponds to NameChar.
+
+var first = &unicode.RangeTable{
+ R16: []unicode.Range16{
+ {0x003A, 0x003A, 1},
+ {0x0041, 0x005A, 1},
+ {0x005F, 0x005F, 1},
+ {0x0061, 0x007A, 1},
+ {0x00C0, 0x00D6, 1},
+ {0x00D8, 0x00F6, 1},
+ {0x00F8, 0x00FF, 1},
+ {0x0100, 0x0131, 1},
+ {0x0134, 0x013E, 1},
+ {0x0141, 0x0148, 1},
+ {0x014A, 0x017E, 1},
+ {0x0180, 0x01C3, 1},
+ {0x01CD, 0x01F0, 1},
+ {0x01F4, 0x01F5, 1},
+ {0x01FA, 0x0217, 1},
+ {0x0250, 0x02A8, 1},
+ {0x02BB, 0x02C1, 1},
+ {0x0386, 0x0386, 1},
+ {0x0388, 0x038A, 1},
+ {0x038C, 0x038C, 1},
+ {0x038E, 0x03A1, 1},
+ {0x03A3, 0x03CE, 1},
+ {0x03D0, 0x03D6, 1},
+ {0x03DA, 0x03E0, 2},
+ {0x03E2, 0x03F3, 1},
+ {0x0401, 0x040C, 1},
+ {0x040E, 0x044F, 1},
+ {0x0451, 0x045C, 1},
+ {0x045E, 0x0481, 1},
+ {0x0490, 0x04C4, 1},
+ {0x04C7, 0x04C8, 1},
+ {0x04CB, 0x04CC, 1},
+ {0x04D0, 0x04EB, 1},
+ {0x04EE, 0x04F5, 1},
+ {0x04F8, 0x04F9, 1},
+ {0x0531, 0x0556, 1},
+ {0x0559, 0x0559, 1},
+ {0x0561, 0x0586, 1},
+ {0x05D0, 0x05EA, 1},
+ {0x05F0, 0x05F2, 1},
+ {0x0621, 0x063A, 1},
+ {0x0641, 0x064A, 1},
+ {0x0671, 0x06B7, 1},
+ {0x06BA, 0x06BE, 1},
+ {0x06C0, 0x06CE, 1},
+ {0x06D0, 0x06D3, 1},
+ {0x06D5, 0x06D5, 1},
+ {0x06E5, 0x06E6, 1},
+ {0x0905, 0x0939, 1},
+ {0x093D, 0x093D, 1},
+ {0x0958, 0x0961, 1},
+ {0x0985, 0x098C, 1},
+ {0x098F, 0x0990, 1},
+ {0x0993, 0x09A8, 1},
+ {0x09AA, 0x09B0, 1},
+ {0x09B2, 0x09B2, 1},
+ {0x09B6, 0x09B9, 1},
+ {0x09DC, 0x09DD, 1},
+ {0x09DF, 0x09E1, 1},
+ {0x09F0, 0x09F1, 1},
+ {0x0A05, 0x0A0A, 1},
+ {0x0A0F, 0x0A10, 1},
+ {0x0A13, 0x0A28, 1},
+ {0x0A2A, 0x0A30, 1},
+ {0x0A32, 0x0A33, 1},
+ {0x0A35, 0x0A36, 1},
+ {0x0A38, 0x0A39, 1},
+ {0x0A59, 0x0A5C, 1},
+ {0x0A5E, 0x0A5E, 1},
+ {0x0A72, 0x0A74, 1},
+ {0x0A85, 0x0A8B, 1},
+ {0x0A8D, 0x0A8D, 1},
+ {0x0A8F, 0x0A91, 1},
+ {0x0A93, 0x0AA8, 1},
+ {0x0AAA, 0x0AB0, 1},
+ {0x0AB2, 0x0AB3, 1},
+ {0x0AB5, 0x0AB9, 1},
+ {0x0ABD, 0x0AE0, 0x23},
+ {0x0B05, 0x0B0C, 1},
+ {0x0B0F, 0x0B10, 1},
+ {0x0B13, 0x0B28, 1},
+ {0x0B2A, 0x0B30, 1},
+ {0x0B32, 0x0B33, 1},
+ {0x0B36, 0x0B39, 1},
+ {0x0B3D, 0x0B3D, 1},
+ {0x0B5C, 0x0B5D, 1},
+ {0x0B5F, 0x0B61, 1},
+ {0x0B85, 0x0B8A, 1},
+ {0x0B8E, 0x0B90, 1},
+ {0x0B92, 0x0B95, 1},
+ {0x0B99, 0x0B9A, 1},
+ {0x0B9C, 0x0B9C, 1},
+ {0x0B9E, 0x0B9F, 1},
+ {0x0BA3, 0x0BA4, 1},
+ {0x0BA8, 0x0BAA, 1},
+ {0x0BAE, 0x0BB5, 1},
+ {0x0BB7, 0x0BB9, 1},
+ {0x0C05, 0x0C0C, 1},
+ {0x0C0E, 0x0C10, 1},
+ {0x0C12, 0x0C28, 1},
+ {0x0C2A, 0x0C33, 1},
+ {0x0C35, 0x0C39, 1},
+ {0x0C60, 0x0C61, 1},
+ {0x0C85, 0x0C8C, 1},
+ {0x0C8E, 0x0C90, 1},
+ {0x0C92, 0x0CA8, 1},
+ {0x0CAA, 0x0CB3, 1},
+ {0x0CB5, 0x0CB9, 1},
+ {0x0CDE, 0x0CDE, 1},
+ {0x0CE0, 0x0CE1, 1},
+ {0x0D05, 0x0D0C, 1},
+ {0x0D0E, 0x0D10, 1},
+ {0x0D12, 0x0D28, 1},
+ {0x0D2A, 0x0D39, 1},
+ {0x0D60, 0x0D61, 1},
+ {0x0E01, 0x0E2E, 1},
+ {0x0E30, 0x0E30, 1},
+ {0x0E32, 0x0E33, 1},
+ {0x0E40, 0x0E45, 1},
+ {0x0E81, 0x0E82, 1},
+ {0x0E84, 0x0E84, 1},
+ {0x0E87, 0x0E88, 1},
+ {0x0E8A, 0x0E8D, 3},
+ {0x0E94, 0x0E97, 1},
+ {0x0E99, 0x0E9F, 1},
+ {0x0EA1, 0x0EA3, 1},
+ {0x0EA5, 0x0EA7, 2},
+ {0x0EAA, 0x0EAB, 1},
+ {0x0EAD, 0x0EAE, 1},
+ {0x0EB0, 0x0EB0, 1},
+ {0x0EB2, 0x0EB3, 1},
+ {0x0EBD, 0x0EBD, 1},
+ {0x0EC0, 0x0EC4, 1},
+ {0x0F40, 0x0F47, 1},
+ {0x0F49, 0x0F69, 1},
+ {0x10A0, 0x10C5, 1},
+ {0x10D0, 0x10F6, 1},
+ {0x1100, 0x1100, 1},
+ {0x1102, 0x1103, 1},
+ {0x1105, 0x1107, 1},
+ {0x1109, 0x1109, 1},
+ {0x110B, 0x110C, 1},
+ {0x110E, 0x1112, 1},
+ {0x113C, 0x1140, 2},
+ {0x114C, 0x1150, 2},
+ {0x1154, 0x1155, 1},
+ {0x1159, 0x1159, 1},
+ {0x115F, 0x1161, 1},
+ {0x1163, 0x1169, 2},
+ {0x116D, 0x116E, 1},
+ {0x1172, 0x1173, 1},
+ {0x1175, 0x119E, 0x119E - 0x1175},
+ {0x11A8, 0x11AB, 0x11AB - 0x11A8},
+ {0x11AE, 0x11AF, 1},
+ {0x11B7, 0x11B8, 1},
+ {0x11BA, 0x11BA, 1},
+ {0x11BC, 0x11C2, 1},
+ {0x11EB, 0x11F0, 0x11F0 - 0x11EB},
+ {0x11F9, 0x11F9, 1},
+ {0x1E00, 0x1E9B, 1},
+ {0x1EA0, 0x1EF9, 1},
+ {0x1F00, 0x1F15, 1},
+ {0x1F18, 0x1F1D, 1},
+ {0x1F20, 0x1F45, 1},
+ {0x1F48, 0x1F4D, 1},
+ {0x1F50, 0x1F57, 1},
+ {0x1F59, 0x1F5B, 0x1F5B - 0x1F59},
+ {0x1F5D, 0x1F5D, 1},
+ {0x1F5F, 0x1F7D, 1},
+ {0x1F80, 0x1FB4, 1},
+ {0x1FB6, 0x1FBC, 1},
+ {0x1FBE, 0x1FBE, 1},
+ {0x1FC2, 0x1FC4, 1},
+ {0x1FC6, 0x1FCC, 1},
+ {0x1FD0, 0x1FD3, 1},
+ {0x1FD6, 0x1FDB, 1},
+ {0x1FE0, 0x1FEC, 1},
+ {0x1FF2, 0x1FF4, 1},
+ {0x1FF6, 0x1FFC, 1},
+ {0x2126, 0x2126, 1},
+ {0x212A, 0x212B, 1},
+ {0x212E, 0x212E, 1},
+ {0x2180, 0x2182, 1},
+ {0x3007, 0x3007, 1},
+ {0x3021, 0x3029, 1},
+ {0x3041, 0x3094, 1},
+ {0x30A1, 0x30FA, 1},
+ {0x3105, 0x312C, 1},
+ {0x4E00, 0x9FA5, 1},
+ {0xAC00, 0xD7A3, 1},
+ },
+}
+
+var second = &unicode.RangeTable{
+ R16: []unicode.Range16{
+ {0x002D, 0x002E, 1},
+ {0x0030, 0x0039, 1},
+ {0x00B7, 0x00B7, 1},
+ {0x02D0, 0x02D1, 1},
+ {0x0300, 0x0345, 1},
+ {0x0360, 0x0361, 1},
+ {0x0387, 0x0387, 1},
+ {0x0483, 0x0486, 1},
+ {0x0591, 0x05A1, 1},
+ {0x05A3, 0x05B9, 1},
+ {0x05BB, 0x05BD, 1},
+ {0x05BF, 0x05BF, 1},
+ {0x05C1, 0x05C2, 1},
+ {0x05C4, 0x0640, 0x0640 - 0x05C4},
+ {0x064B, 0x0652, 1},
+ {0x0660, 0x0669, 1},
+ {0x0670, 0x0670, 1},
+ {0x06D6, 0x06DC, 1},
+ {0x06DD, 0x06DF, 1},
+ {0x06E0, 0x06E4, 1},
+ {0x06E7, 0x06E8, 1},
+ {0x06EA, 0x06ED, 1},
+ {0x06F0, 0x06F9, 1},
+ {0x0901, 0x0903, 1},
+ {0x093C, 0x093C, 1},
+ {0x093E, 0x094C, 1},
+ {0x094D, 0x094D, 1},
+ {0x0951, 0x0954, 1},
+ {0x0962, 0x0963, 1},
+ {0x0966, 0x096F, 1},
+ {0x0981, 0x0983, 1},
+ {0x09BC, 0x09BC, 1},
+ {0x09BE, 0x09BF, 1},
+ {0x09C0, 0x09C4, 1},
+ {0x09C7, 0x09C8, 1},
+ {0x09CB, 0x09CD, 1},
+ {0x09D7, 0x09D7, 1},
+ {0x09E2, 0x09E3, 1},
+ {0x09E6, 0x09EF, 1},
+ {0x0A02, 0x0A3C, 0x3A},
+ {0x0A3E, 0x0A3F, 1},
+ {0x0A40, 0x0A42, 1},
+ {0x0A47, 0x0A48, 1},
+ {0x0A4B, 0x0A4D, 1},
+ {0x0A66, 0x0A6F, 1},
+ {0x0A70, 0x0A71, 1},
+ {0x0A81, 0x0A83, 1},
+ {0x0ABC, 0x0ABC, 1},
+ {0x0ABE, 0x0AC5, 1},
+ {0x0AC7, 0x0AC9, 1},
+ {0x0ACB, 0x0ACD, 1},
+ {0x0AE6, 0x0AEF, 1},
+ {0x0B01, 0x0B03, 1},
+ {0x0B3C, 0x0B3C, 1},
+ {0x0B3E, 0x0B43, 1},
+ {0x0B47, 0x0B48, 1},
+ {0x0B4B, 0x0B4D, 1},
+ {0x0B56, 0x0B57, 1},
+ {0x0B66, 0x0B6F, 1},
+ {0x0B82, 0x0B83, 1},
+ {0x0BBE, 0x0BC2, 1},
+ {0x0BC6, 0x0BC8, 1},
+ {0x0BCA, 0x0BCD, 1},
+ {0x0BD7, 0x0BD7, 1},
+ {0x0BE7, 0x0BEF, 1},
+ {0x0C01, 0x0C03, 1},
+ {0x0C3E, 0x0C44, 1},
+ {0x0C46, 0x0C48, 1},
+ {0x0C4A, 0x0C4D, 1},
+ {0x0C55, 0x0C56, 1},
+ {0x0C66, 0x0C6F, 1},
+ {0x0C82, 0x0C83, 1},
+ {0x0CBE, 0x0CC4, 1},
+ {0x0CC6, 0x0CC8, 1},
+ {0x0CCA, 0x0CCD, 1},
+ {0x0CD5, 0x0CD6, 1},
+ {0x0CE6, 0x0CEF, 1},
+ {0x0D02, 0x0D03, 1},
+ {0x0D3E, 0x0D43, 1},
+ {0x0D46, 0x0D48, 1},
+ {0x0D4A, 0x0D4D, 1},
+ {0x0D57, 0x0D57, 1},
+ {0x0D66, 0x0D6F, 1},
+ {0x0E31, 0x0E31, 1},
+ {0x0E34, 0x0E3A, 1},
+ {0x0E46, 0x0E46, 1},
+ {0x0E47, 0x0E4E, 1},
+ {0x0E50, 0x0E59, 1},
+ {0x0EB1, 0x0EB1, 1},
+ {0x0EB4, 0x0EB9, 1},
+ {0x0EBB, 0x0EBC, 1},
+ {0x0EC6, 0x0EC6, 1},
+ {0x0EC8, 0x0ECD, 1},
+ {0x0ED0, 0x0ED9, 1},
+ {0x0F18, 0x0F19, 1},
+ {0x0F20, 0x0F29, 1},
+ {0x0F35, 0x0F39, 2},
+ {0x0F3E, 0x0F3F, 1},
+ {0x0F71, 0x0F84, 1},
+ {0x0F86, 0x0F8B, 1},
+ {0x0F90, 0x0F95, 1},
+ {0x0F97, 0x0F97, 1},
+ {0x0F99, 0x0FAD, 1},
+ {0x0FB1, 0x0FB7, 1},
+ {0x0FB9, 0x0FB9, 1},
+ {0x20D0, 0x20DC, 1},
+ {0x20E1, 0x3005, 0x3005 - 0x20E1},
+ {0x302A, 0x302F, 1},
+ {0x3031, 0x3035, 1},
+ {0x3099, 0x309A, 1},
+ {0x309D, 0x309E, 1},
+ {0x30FC, 0x30FE, 1},
+ },
+}
+
+// HTMLEntity is an entity map containing translations for the
+// standard HTML entity characters.
+//
+// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation.
+var HTMLEntity map[string]string = htmlEntity
+
+var htmlEntity = map[string]string{
+ /*
+ hget http://www.w3.org/TR/html4/sgml/entities.html |
+ ssam '
+ ,y /\&gt;/ x/\&lt;(.|\n)+/ s/\n/ /g
+ ,x v/^\&lt;!ENTITY/d
+ ,s/\&lt;!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g
+ '
+ */
+ "nbsp": "\u00A0",
+ "iexcl": "\u00A1",
+ "cent": "\u00A2",
+ "pound": "\u00A3",
+ "curren": "\u00A4",
+ "yen": "\u00A5",
+ "brvbar": "\u00A6",
+ "sect": "\u00A7",
+ "uml": "\u00A8",
+ "copy": "\u00A9",
+ "ordf": "\u00AA",
+ "laquo": "\u00AB",
+ "not": "\u00AC",
+ "shy": "\u00AD",
+ "reg": "\u00AE",
+ "macr": "\u00AF",
+ "deg": "\u00B0",
+ "plusmn": "\u00B1",
+ "sup2": "\u00B2",
+ "sup3": "\u00B3",
+ "acute": "\u00B4",
+ "micro": "\u00B5",
+ "para": "\u00B6",
+ "middot": "\u00B7",
+ "cedil": "\u00B8",
+ "sup1": "\u00B9",
+ "ordm": "\u00BA",
+ "raquo": "\u00BB",
+ "frac14": "\u00BC",
+ "frac12": "\u00BD",
+ "frac34": "\u00BE",
+ "iquest": "\u00BF",
+ "Agrave": "\u00C0",
+ "Aacute": "\u00C1",
+ "Acirc": "\u00C2",
+ "Atilde": "\u00C3",
+ "Auml": "\u00C4",
+ "Aring": "\u00C5",
+ "AElig": "\u00C6",
+ "Ccedil": "\u00C7",
+ "Egrave": "\u00C8",
+ "Eacute": "\u00C9",
+ "Ecirc": "\u00CA",
+ "Euml": "\u00CB",
+ "Igrave": "\u00CC",
+ "Iacute": "\u00CD",
+ "Icirc": "\u00CE",
+ "Iuml": "\u00CF",
+ "ETH": "\u00D0",
+ "Ntilde": "\u00D1",
+ "Ograve": "\u00D2",
+ "Oacute": "\u00D3",
+ "Ocirc": "\u00D4",
+ "Otilde": "\u00D5",
+ "Ouml": "\u00D6",
+ "times": "\u00D7",
+ "Oslash": "\u00D8",
+ "Ugrave": "\u00D9",
+ "Uacute": "\u00DA",
+ "Ucirc": "\u00DB",
+ "Uuml": "\u00DC",
+ "Yacute": "\u00DD",
+ "THORN": "\u00DE",
+ "szlig": "\u00DF",
+ "agrave": "\u00E0",
+ "aacute": "\u00E1",
+ "acirc": "\u00E2",
+ "atilde": "\u00E3",
+ "auml": "\u00E4",
+ "aring": "\u00E5",
+ "aelig": "\u00E6",
+ "ccedil": "\u00E7",
+ "egrave": "\u00E8",
+ "eacute": "\u00E9",
+ "ecirc": "\u00EA",
+ "euml": "\u00EB",
+ "igrave": "\u00EC",
+ "iacute": "\u00ED",
+ "icirc": "\u00EE",
+ "iuml": "\u00EF",
+ "eth": "\u00F0",
+ "ntilde": "\u00F1",
+ "ograve": "\u00F2",
+ "oacute": "\u00F3",
+ "ocirc": "\u00F4",
+ "otilde": "\u00F5",
+ "ouml": "\u00F6",
+ "divide": "\u00F7",
+ "oslash": "\u00F8",
+ "ugrave": "\u00F9",
+ "uacute": "\u00FA",
+ "ucirc": "\u00FB",
+ "uuml": "\u00FC",
+ "yacute": "\u00FD",
+ "thorn": "\u00FE",
+ "yuml": "\u00FF",
+ "fnof": "\u0192",
+ "Alpha": "\u0391",
+ "Beta": "\u0392",
+ "Gamma": "\u0393",
+ "Delta": "\u0394",
+ "Epsilon": "\u0395",
+ "Zeta": "\u0396",
+ "Eta": "\u0397",
+ "Theta": "\u0398",
+ "Iota": "\u0399",
+ "Kappa": "\u039A",
+ "Lambda": "\u039B",
+ "Mu": "\u039C",
+ "Nu": "\u039D",
+ "Xi": "\u039E",
+ "Omicron": "\u039F",
+ "Pi": "\u03A0",
+ "Rho": "\u03A1",
+ "Sigma": "\u03A3",
+ "Tau": "\u03A4",
+ "Upsilon": "\u03A5",
+ "Phi": "\u03A6",
+ "Chi": "\u03A7",
+ "Psi": "\u03A8",
+ "Omega": "\u03A9",
+ "alpha": "\u03B1",
+ "beta": "\u03B2",
+ "gamma": "\u03B3",
+ "delta": "\u03B4",
+ "epsilon": "\u03B5",
+ "zeta": "\u03B6",
+ "eta": "\u03B7",
+ "theta": "\u03B8",
+ "iota": "\u03B9",
+ "kappa": "\u03BA",
+ "lambda": "\u03BB",
+ "mu": "\u03BC",
+ "nu": "\u03BD",
+ "xi": "\u03BE",
+ "omicron": "\u03BF",
+ "pi": "\u03C0",
+ "rho": "\u03C1",
+ "sigmaf": "\u03C2",
+ "sigma": "\u03C3",
+ "tau": "\u03C4",
+ "upsilon": "\u03C5",
+ "phi": "\u03C6",
+ "chi": "\u03C7",
+ "psi": "\u03C8",
+ "omega": "\u03C9",
+ "thetasym": "\u03D1",
+ "upsih": "\u03D2",
+ "piv": "\u03D6",
+ "bull": "\u2022",
+ "hellip": "\u2026",
+ "prime": "\u2032",
+ "Prime": "\u2033",
+ "oline": "\u203E",
+ "frasl": "\u2044",
+ "weierp": "\u2118",
+ "image": "\u2111",
+ "real": "\u211C",
+ "trade": "\u2122",
+ "alefsym": "\u2135",
+ "larr": "\u2190",
+ "uarr": "\u2191",
+ "rarr": "\u2192",
+ "darr": "\u2193",
+ "harr": "\u2194",
+ "crarr": "\u21B5",
+ "lArr": "\u21D0",
+ "uArr": "\u21D1",
+ "rArr": "\u21D2",
+ "dArr": "\u21D3",
+ "hArr": "\u21D4",
+ "forall": "\u2200",
+ "part": "\u2202",
+ "exist": "\u2203",
+ "empty": "\u2205",
+ "nabla": "\u2207",
+ "isin": "\u2208",
+ "notin": "\u2209",
+ "ni": "\u220B",
+ "prod": "\u220F",
+ "sum": "\u2211",
+ "minus": "\u2212",
+ "lowast": "\u2217",
+ "radic": "\u221A",
+ "prop": "\u221D",
+ "infin": "\u221E",
+ "ang": "\u2220",
+ "and": "\u2227",
+ "or": "\u2228",
+ "cap": "\u2229",
+ "cup": "\u222A",
+ "int": "\u222B",
+ "there4": "\u2234",
+ "sim": "\u223C",
+ "cong": "\u2245",
+ "asymp": "\u2248",
+ "ne": "\u2260",
+ "equiv": "\u2261",
+ "le": "\u2264",
+ "ge": "\u2265",
+ "sub": "\u2282",
+ "sup": "\u2283",
+ "nsub": "\u2284",
+ "sube": "\u2286",
+ "supe": "\u2287",
+ "oplus": "\u2295",
+ "otimes": "\u2297",
+ "perp": "\u22A5",
+ "sdot": "\u22C5",
+ "lceil": "\u2308",
+ "rceil": "\u2309",
+ "lfloor": "\u230A",
+ "rfloor": "\u230B",
+ "lang": "\u2329",
+ "rang": "\u232A",
+ "loz": "\u25CA",
+ "spades": "\u2660",
+ "clubs": "\u2663",
+ "hearts": "\u2665",
+ "diams": "\u2666",
+ "quot": "\u0022",
+ "amp": "\u0026",
+ "lt": "\u003C",
+ "gt": "\u003E",
+ "OElig": "\u0152",
+ "oelig": "\u0153",
+ "Scaron": "\u0160",
+ "scaron": "\u0161",
+ "Yuml": "\u0178",
+ "circ": "\u02C6",
+ "tilde": "\u02DC",
+ "ensp": "\u2002",
+ "emsp": "\u2003",
+ "thinsp": "\u2009",
+ "zwnj": "\u200C",
+ "zwj": "\u200D",
+ "lrm": "\u200E",
+ "rlm": "\u200F",
+ "ndash": "\u2013",
+ "mdash": "\u2014",
+ "lsquo": "\u2018",
+ "rsquo": "\u2019",
+ "sbquo": "\u201A",
+ "ldquo": "\u201C",
+ "rdquo": "\u201D",
+ "bdquo": "\u201E",
+ "dagger": "\u2020",
+ "Dagger": "\u2021",
+ "permil": "\u2030",
+ "lsaquo": "\u2039",
+ "rsaquo": "\u203A",
+ "euro": "\u20AC",
+}
+
+// HTMLAutoClose is the set of HTML elements that
+// should be considered to close automatically.
+//
+// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation.
+var HTMLAutoClose []string = htmlAutoClose
+
+var htmlAutoClose = []string{
+ /*
+ hget http://www.w3.org/TR/html4/loose.dtd |
+ 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/ "\1",/p' | tr A-Z a-z
+ */
+ "basefont",
+ "br",
+ "area",
+ "link",
+ "img",
+ "param",
+ "hr",
+ "input",
+ "col",
+ "frame",
+ "isindex",
+ "base",
+ "meta",
+}
+
+var (
+ escQuot = []byte("&#34;") // shorter than "&quot;"
+ escApos = []byte("&#39;") // shorter than "&apos;"
+ escAmp = []byte("&amp;")
+ escLT = []byte("&lt;")
+ escGT = []byte("&gt;")
+ escTab = []byte("&#x9;")
+ escNL = []byte("&#xA;")
+ escCR = []byte("&#xD;")
+ escFFFD = []byte("\uFFFD") // Unicode replacement character
+)
+
+// EscapeText writes to w the properly escaped XML equivalent
+// of the plain text data s.
+func EscapeText(w io.Writer, s []byte) error {
+ return escapeText(w, s, true)
+}
+
+// escapeText writes to w the properly escaped XML equivalent
+// of the plain text data s. If escapeNewline is true, newline
+// characters will be escaped.
+func escapeText(w io.Writer, s []byte, escapeNewline bool) error {
+ var esc []byte
+ last := 0
+ for i := 0; i < len(s); {
+ r, width := utf8.DecodeRune(s[i:])
+ i += width
+ switch r {
+ case '"':
+ esc = escQuot
+ case '\'':
+ esc = escApos
+ case '&':
+ esc = escAmp
+ case '<':
+ esc = escLT
+ case '>':
+ esc = escGT
+ case '\t':
+ esc = escTab
+ case '\n':
+ if !escapeNewline {
+ continue
+ }
+ esc = escNL
+ case '\r':
+ esc = escCR
+ default:
+ if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
+ esc = escFFFD
+ break
+ }
+ continue
+ }
+ if _, err := w.Write(s[last : i-width]); err != nil {
+ return err
+ }
+ if _, err := w.Write(esc); err != nil {
+ return err
+ }
+ last = i
+ }
+ _, err := w.Write(s[last:])
+ return err
+}
+
+// EscapeString writes to p the properly escaped XML equivalent
+// of the plain text data s.
+func (p *printer) EscapeString(s string) {
+ var esc []byte
+ last := 0
+ for i := 0; i < len(s); {
+ r, width := utf8.DecodeRuneInString(s[i:])
+ i += width
+ switch r {
+ case '"':
+ esc = escQuot
+ case '\'':
+ esc = escApos
+ case '&':
+ esc = escAmp
+ case '<':
+ esc = escLT
+ case '>':
+ esc = escGT
+ case '\t':
+ esc = escTab
+ case '\n':
+ esc = escNL
+ case '\r':
+ esc = escCR
+ default:
+ if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
+ esc = escFFFD
+ break
+ }
+ continue
+ }
+ p.WriteString(s[last : i-width])
+ p.Write(esc)
+ last = i
+ }
+ p.WriteString(s[last:])
+}
+
+// Escape is like [EscapeText] but omits the error return value.
+// It is provided for backwards compatibility with Go 1.0.
+// Code targeting Go 1.1 or later should use [EscapeText].
+func Escape(w io.Writer, s []byte) {
+ EscapeText(w, s)
+}
+
+var (
+ cdataStart = []byte("<![CDATA[")
+ cdataEnd = []byte("]]>")
+ cdataEscape = []byte("]]]]><![CDATA[>")
+)
+
+// emitCDATA writes to w the CDATA-wrapped plain text data s.
+// It escapes CDATA directives nested in s.
+func emitCDATA(w io.Writer, s []byte) error {
+ if len(s) == 0 {
+ return nil
+ }
+ if _, err := w.Write(cdataStart); err != nil {
+ return err
+ }
+
+ for {
+ before, after, ok := bytes.Cut(s, cdataEnd)
+ if !ok {
+ break
+ }
+ // Found a nested CDATA directive end.
+ if _, err := w.Write(before); err != nil {
+ return err
+ }
+ if _, err := w.Write(cdataEscape); err != nil {
+ return err
+ }
+ s = after
+ }
+
+ if _, err := w.Write(s); err != nil {
+ return err
+ }
+
+ _, err := w.Write(cdataEnd)
+ return err
+}
+
+// procInst parses the `param="..."` or `param='...'`
+// value out of the provided string, returning "" if not found.
+func procInst(param, s string) string {
+ // TODO: this parsing is somewhat lame and not exact.
+ // It works for all actual cases, though.
+ param = param + "="
+ _, v, _ := strings.Cut(s, param)
+ if v == "" {
+ return ""
+ }
+ if v[0] != '\'' && v[0] != '"' {
+ return ""
+ }
+ unquote, _, ok := strings.Cut(v[1:], v[:1])
+ if !ok {
+ return ""
+ }
+ return unquote
+}
diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go
new file mode 100644
index 0000000..42f5f5f
--- /dev/null
+++ b/src/encoding/xml/xml_test.go
@@ -0,0 +1,1479 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xml
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "reflect"
+ "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+type toks struct {
+ earlyEOF bool
+ t []Token
+}
+
+func (t *toks) Token() (Token, error) {
+ if len(t.t) == 0 {
+ return nil, io.EOF
+ }
+ var tok Token
+ tok, t.t = t.t[0], t.t[1:]
+ if t.earlyEOF && len(t.t) == 0 {
+ return tok, io.EOF
+ }
+ return tok, nil
+}
+
+func TestDecodeEOF(t *testing.T) {
+ start := StartElement{Name: Name{Local: "test"}}
+ tests := []struct {
+ name string
+ tokens []Token
+ ok bool
+ }{
+ {
+ name: "OK",
+ tokens: []Token{
+ start,
+ start.End(),
+ },
+ ok: true,
+ },
+ {
+ name: "Malformed",
+ tokens: []Token{
+ start,
+ StartElement{Name: Name{Local: "bad"}},
+ start.End(),
+ },
+ ok: false,
+ },
+ }
+ for _, tc := range tests {
+ for _, eof := range []bool{true, false} {
+ name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
+ t.Run(name, func(t *testing.T) {
+ d := NewTokenDecoder(&toks{
+ earlyEOF: eof,
+ t: tc.tokens,
+ })
+ err := d.Decode(&struct {
+ XMLName Name `xml:"test"`
+ }{})
+ if tc.ok && err != nil {
+ t.Fatalf("d.Decode: expected nil error, got %v", err)
+ }
+ if _, ok := err.(*SyntaxError); !tc.ok && !ok {
+ t.Errorf("d.Decode: expected syntax error, got %v", err)
+ }
+ })
+ }
+ }
+}
+
+type toksNil struct {
+ returnEOF bool
+ t []Token
+}
+
+func (t *toksNil) Token() (Token, error) {
+ if len(t.t) == 0 {
+ if !t.returnEOF {
+ // Return nil, nil before returning an EOF. It's legal, but
+ // discouraged.
+ t.returnEOF = true
+ return nil, nil
+ }
+ return nil, io.EOF
+ }
+ var tok Token
+ tok, t.t = t.t[0], t.t[1:]
+ return tok, nil
+}
+
+func TestDecodeNilToken(t *testing.T) {
+ for _, strict := range []bool{true, false} {
+ name := fmt.Sprintf("Strict=%v", strict)
+ t.Run(name, func(t *testing.T) {
+ start := StartElement{Name: Name{Local: "test"}}
+ bad := StartElement{Name: Name{Local: "bad"}}
+ d := NewTokenDecoder(&toksNil{
+ // Malformed
+ t: []Token{start, bad, start.End()},
+ })
+ d.Strict = strict
+ err := d.Decode(&struct {
+ XMLName Name `xml:"test"`
+ }{})
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Errorf("d.Decode: expected syntax error, got %v", err)
+ }
+ })
+ }
+}
+
+const testInput = `
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
+ "\r\n\t" + ` >
+ <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
+ <query>&何; &is-it;</query>
+ <goodbye />
+ <outer foo:attr="value" xmlns:tag="ns4">
+ <inner/>
+ </outer>
+ <tag:name>
+ <![CDATA[Some text here.]]>
+ </tag:name>
+</body><!-- missing final newline -->`
+
+var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
+
+var rawTokens = []Token{
+ CharData("\n"),
+ ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
+ CharData("\n"),
+ Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
+ CharData("\n"),
+ StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
+ CharData("\n "),
+ StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
+ CharData("World <>'\" 白鵬翔"),
+ EndElement{Name{"", "hello"}},
+ CharData("\n "),
+ StartElement{Name{"", "query"}, []Attr{}},
+ CharData("What is it?"),
+ EndElement{Name{"", "query"}},
+ CharData("\n "),
+ StartElement{Name{"", "goodbye"}, []Attr{}},
+ EndElement{Name{"", "goodbye"}},
+ CharData("\n "),
+ StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
+ CharData("\n "),
+ StartElement{Name{"", "inner"}, []Attr{}},
+ EndElement{Name{"", "inner"}},
+ CharData("\n "),
+ EndElement{Name{"", "outer"}},
+ CharData("\n "),
+ StartElement{Name{"tag", "name"}, []Attr{}},
+ CharData("\n "),
+ CharData("Some text here."),
+ CharData("\n "),
+ EndElement{Name{"tag", "name"}},
+ CharData("\n"),
+ EndElement{Name{"", "body"}},
+ Comment(" missing final newline "),
+}
+
+var cookedTokens = []Token{
+ CharData("\n"),
+ ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
+ CharData("\n"),
+ Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
+ CharData("\n"),
+ StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
+ CharData("\n "),
+ StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
+ CharData("World <>'\" 白鵬翔"),
+ EndElement{Name{"ns2", "hello"}},
+ CharData("\n "),
+ StartElement{Name{"ns2", "query"}, []Attr{}},
+ CharData("What is it?"),
+ EndElement{Name{"ns2", "query"}},
+ CharData("\n "),
+ StartElement{Name{"ns2", "goodbye"}, []Attr{}},
+ EndElement{Name{"ns2", "goodbye"}},
+ CharData("\n "),
+ StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
+ CharData("\n "),
+ StartElement{Name{"ns2", "inner"}, []Attr{}},
+ EndElement{Name{"ns2", "inner"}},
+ CharData("\n "),
+ EndElement{Name{"ns2", "outer"}},
+ CharData("\n "),
+ StartElement{Name{"ns3", "name"}, []Attr{}},
+ CharData("\n "),
+ CharData("Some text here."),
+ CharData("\n "),
+ EndElement{Name{"ns3", "name"}},
+ CharData("\n"),
+ EndElement{Name{"ns2", "body"}},
+ Comment(" missing final newline "),
+}
+
+const testInputAltEncoding = `
+<?xml version="1.0" encoding="x-testing-uppercase"?>
+<TAG>VALUE</TAG>`
+
+var rawTokensAltEncoding = []Token{
+ CharData("\n"),
+ ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("value"),
+ EndElement{Name{"", "tag"}},
+}
+
+var xmlInput = []string{
+ // unexpected EOF cases
+ "<",
+ "<t",
+ "<t ",
+ "<t/",
+ "<!",
+ "<!-",
+ "<!--",
+ "<!--c-",
+ "<!--c--",
+ "<!d",
+ "<t></",
+ "<t></t",
+ "<?",
+ "<?p",
+ "<t a",
+ "<t a=",
+ "<t a='",
+ "<t a=''",
+ "<t/><![",
+ "<t/><![C",
+ "<t/><![CDATA[d",
+ "<t/><![CDATA[d]",
+ "<t/><![CDATA[d]]",
+
+ // other Syntax errors
+ "<>",
+ "<t/a",
+ "<0 />",
+ "<?0 >",
+ // "<!0 >", // let the Token() caller handle
+ "</0>",
+ "<t 0=''>",
+ "<t a='&'>",
+ "<t a='<'>",
+ "<t>&nbspc;</t>",
+ "<t a>",
+ "<t a=>",
+ "<t a=v>",
+ // "<![CDATA[d]]>", // let the Token() caller handle
+ "<t></e>",
+ "<t></>",
+ "<t></t!",
+ "<t>cdata]]></t>",
+}
+
+func TestRawToken(t *testing.T) {
+ d := NewDecoder(strings.NewReader(testInput))
+ d.Entity = testEntity
+ testRawToken(t, d, testInput, rawTokens)
+}
+
+const nonStrictInput = `
+<tag>non&entity</tag>
+<tag>&unknown;entity</tag>
+<tag>&#123</tag>
+<tag>&#zzz;</tag>
+<tag>&なまえ3;</tag>
+<tag>&lt-gt;</tag>
+<tag>&;</tag>
+<tag>&0a;</tag>
+`
+
+var nonStrictTokens = []Token{
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("non&entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&unknown;entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&#123"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&#zzz;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&なまえ3;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&lt-gt;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&0a;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+}
+
+func TestNonStrictRawToken(t *testing.T) {
+ d := NewDecoder(strings.NewReader(nonStrictInput))
+ d.Strict = false
+ testRawToken(t, d, nonStrictInput, nonStrictTokens)
+}
+
+type downCaser struct {
+ t *testing.T
+ r io.ByteReader
+}
+
+func (d *downCaser) ReadByte() (c byte, err error) {
+ c, err = d.r.ReadByte()
+ if c >= 'A' && c <= 'Z' {
+ c += 'a' - 'A'
+ }
+ return
+}
+
+func (d *downCaser) Read(p []byte) (int, error) {
+ d.t.Fatalf("unexpected Read call on downCaser reader")
+ panic("unreachable")
+}
+
+func TestRawTokenAltEncoding(t *testing.T) {
+ d := NewDecoder(strings.NewReader(testInputAltEncoding))
+ d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
+ if charset != "x-testing-uppercase" {
+ t.Fatalf("unexpected charset %q", charset)
+ }
+ return &downCaser{t, input.(io.ByteReader)}, nil
+ }
+ testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
+}
+
+func TestRawTokenAltEncodingNoConverter(t *testing.T) {
+ d := NewDecoder(strings.NewReader(testInputAltEncoding))
+ token, err := d.RawToken()
+ if token == nil {
+ t.Fatalf("expected a token on first RawToken call")
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ token, err = d.RawToken()
+ if token != nil {
+ t.Errorf("expected a nil token; got %#v", token)
+ }
+ if err == nil {
+ t.Fatalf("expected an error on second RawToken call")
+ }
+ const encoding = "x-testing-uppercase"
+ if !strings.Contains(err.Error(), encoding) {
+ t.Errorf("expected error to contain %q; got error: %v",
+ encoding, err)
+ }
+}
+
+func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
+ lastEnd := int64(0)
+ for i, want := range rawTokens {
+ start := d.InputOffset()
+ have, err := d.RawToken()
+ end := d.InputOffset()
+ if err != nil {
+ t.Fatalf("token %d: unexpected error: %s", i, err)
+ }
+ if !reflect.DeepEqual(have, want) {
+ var shave, swant string
+ if _, ok := have.(CharData); ok {
+ shave = fmt.Sprintf("CharData(%q)", have)
+ } else {
+ shave = fmt.Sprintf("%#v", have)
+ }
+ if _, ok := want.(CharData); ok {
+ swant = fmt.Sprintf("CharData(%q)", want)
+ } else {
+ swant = fmt.Sprintf("%#v", want)
+ }
+ t.Errorf("token %d = %s, want %s", i, shave, swant)
+ }
+
+ // Check that InputOffset returned actual token.
+ switch {
+ case start < lastEnd:
+ t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
+ case start >= end:
+ // Special case: EndElement can be synthesized.
+ if start == end && end == lastEnd {
+ break
+ }
+ t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
+ case end > int64(len(raw)):
+ t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
+ default:
+ text := raw[start:end]
+ if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
+ t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
+ }
+ }
+ lastEnd = end
+ }
+}
+
+// Ensure that directives (specifically !DOCTYPE) include the complete
+// text of any nested directives, noting that < and > do not change
+// nesting depth if they are in single or double quotes.
+
+var nestedDirectivesInput = `
+<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
+<!DOCTYPE [<!ENTITY xlt ">">]>
+<!DOCTYPE [<!ENTITY xlt "<">]>
+<!DOCTYPE [<!ENTITY xlt '>'>]>
+<!DOCTYPE [<!ENTITY xlt '<'>]>
+<!DOCTYPE [<!ENTITY xlt '">'>]>
+<!DOCTYPE [<!ENTITY xlt "'<">]>
+`
+
+var nestedDirectivesTokens = []Token{
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
+ CharData("\n"),
+}
+
+func TestNestedDirectives(t *testing.T) {
+ d := NewDecoder(strings.NewReader(nestedDirectivesInput))
+
+ for i, want := range nestedDirectivesTokens {
+ have, err := d.Token()
+ if err != nil {
+ t.Fatalf("token %d: unexpected error: %s", i, err)
+ }
+ if !reflect.DeepEqual(have, want) {
+ t.Errorf("token %d = %#v want %#v", i, have, want)
+ }
+ }
+}
+
+func TestToken(t *testing.T) {
+ d := NewDecoder(strings.NewReader(testInput))
+ d.Entity = testEntity
+
+ for i, want := range cookedTokens {
+ have, err := d.Token()
+ if err != nil {
+ t.Fatalf("token %d: unexpected error: %s", i, err)
+ }
+ if !reflect.DeepEqual(have, want) {
+ t.Errorf("token %d = %#v want %#v", i, have, want)
+ }
+ }
+}
+
+func TestSyntax(t *testing.T) {
+ for i := range xmlInput {
+ d := NewDecoder(strings.NewReader(xmlInput[i]))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
+ }
+ }
+}
+
+func TestInputLinePos(t *testing.T) {
+ testInput := `<root>
+<?pi
+ ?> <elt
+att
+=
+"val">
+<![CDATA[
+]]><!--
+
+--></elt>
+</root>`
+ linePos := [][]int{
+ {1, 7},
+ {2, 1},
+ {3, 4},
+ {3, 6},
+ {6, 7},
+ {7, 1},
+ {8, 4},
+ {10, 4},
+ {10, 10},
+ {11, 1},
+ {11, 8},
+ }
+ dec := NewDecoder(strings.NewReader(testInput))
+ for _, want := range linePos {
+ if _, err := dec.Token(); err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ continue
+ }
+
+ gotLine, gotCol := dec.InputPos()
+ if gotLine != want[0] || gotCol != want[1] {
+ t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
+ }
+ }
+}
+
+type allScalars struct {
+ True1 bool
+ True2 bool
+ False1 bool
+ False2 bool
+ Int int
+ Int8 int8
+ Int16 int16
+ Int32 int32
+ Int64 int64
+ Uint int
+ Uint8 uint8
+ Uint16 uint16
+ Uint32 uint32
+ Uint64 uint64
+ Uintptr uintptr
+ Float32 float32
+ Float64 float64
+ String string
+ PtrString *string
+}
+
+var all = allScalars{
+ True1: true,
+ True2: true,
+ False1: false,
+ False2: false,
+ Int: 1,
+ Int8: -2,
+ Int16: 3,
+ Int32: -4,
+ Int64: 5,
+ Uint: 6,
+ Uint8: 7,
+ Uint16: 8,
+ Uint32: 9,
+ Uint64: 10,
+ Uintptr: 11,
+ Float32: 13.0,
+ Float64: 14.0,
+ String: "15",
+ PtrString: &sixteen,
+}
+
+var sixteen = "16"
+
+const testScalarsInput = `<allscalars>
+ <True1>true</True1>
+ <True2>1</True2>
+ <False1>false</False1>
+ <False2>0</False2>
+ <Int>1</Int>
+ <Int8>-2</Int8>
+ <Int16>3</Int16>
+ <Int32>-4</Int32>
+ <Int64>5</Int64>
+ <Uint>6</Uint>
+ <Uint8>7</Uint8>
+ <Uint16>8</Uint16>
+ <Uint32>9</Uint32>
+ <Uint64>10</Uint64>
+ <Uintptr>11</Uintptr>
+ <Float>12.0</Float>
+ <Float32>13.0</Float32>
+ <Float64>14.0</Float64>
+ <String>15</String>
+ <PtrString>16</PtrString>
+</allscalars>`
+
+func TestAllScalars(t *testing.T) {
+ var a allScalars
+ err := Unmarshal([]byte(testScalarsInput), &a)
+
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !reflect.DeepEqual(a, all) {
+ t.Errorf("have %+v want %+v", a, all)
+ }
+}
+
+type item struct {
+ FieldA string
+}
+
+func TestIssue569(t *testing.T) {
+ data := `<item><FieldA>abcd</FieldA></item>`
+ var i item
+ err := Unmarshal([]byte(data), &i)
+
+ if err != nil || i.FieldA != "abcd" {
+ t.Fatal("Expecting abcd")
+ }
+}
+
+func TestUnquotedAttrs(t *testing.T) {
+ data := "<tag attr=azAZ09:-_\t>"
+ d := NewDecoder(strings.NewReader(data))
+ d.Strict = false
+ token, err := d.Token()
+ if _, ok := err.(*SyntaxError); ok {
+ t.Errorf("Unexpected error: %v", err)
+ }
+ if token.(StartElement).Name.Local != "tag" {
+ t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
+ }
+ attr := token.(StartElement).Attr[0]
+ if attr.Value != "azAZ09:-_" {
+ t.Errorf("Unexpected attribute value: %v", attr.Value)
+ }
+ if attr.Name.Local != "attr" {
+ t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
+ }
+}
+
+func TestValuelessAttrs(t *testing.T) {
+ tests := [][3]string{
+ {"<p nowrap>", "p", "nowrap"},
+ {"<p nowrap >", "p", "nowrap"},
+ {"<input checked/>", "input", "checked"},
+ {"<input checked />", "input", "checked"},
+ }
+ for _, test := range tests {
+ d := NewDecoder(strings.NewReader(test[0]))
+ d.Strict = false
+ token, err := d.Token()
+ if _, ok := err.(*SyntaxError); ok {
+ t.Errorf("Unexpected error: %v", err)
+ }
+ if token.(StartElement).Name.Local != test[1] {
+ t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
+ }
+ attr := token.(StartElement).Attr[0]
+ if attr.Value != test[2] {
+ t.Errorf("Unexpected attribute value: %v", attr.Value)
+ }
+ if attr.Name.Local != test[2] {
+ t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
+ }
+ }
+}
+
+func TestCopyTokenCharData(t *testing.T) {
+ data := []byte("same data")
+ var tok1 Token = CharData(data)
+ tok2 := CopyToken(tok1)
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) != CharData")
+ }
+ data[1] = 'o'
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) uses same buffer.")
+ }
+}
+
+func TestCopyTokenStartElement(t *testing.T) {
+ elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
+ var tok1 Token = elt
+ tok2 := CopyToken(tok1)
+ if tok1.(StartElement).Attr[0].Value != "en" {
+ t.Error("CopyToken overwrote Attr[0]")
+ }
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(StartElement) != StartElement")
+ }
+ tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(CharData) uses same buffer.")
+ }
+}
+
+func TestCopyTokenComment(t *testing.T) {
+ data := []byte("<!-- some comment -->")
+ var tok1 Token = Comment(data)
+ tok2 := CopyToken(tok1)
+ if !reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(Comment) != Comment")
+ }
+ data[1] = 'o'
+ if reflect.DeepEqual(tok1, tok2) {
+ t.Error("CopyToken(Comment) uses same buffer.")
+ }
+}
+
+func TestSyntaxErrorLineNum(t *testing.T) {
+ testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
+ d := NewDecoder(strings.NewReader(testInput))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ synerr, ok := err.(*SyntaxError)
+ if !ok {
+ t.Error("Expected SyntaxError.")
+ }
+ if synerr.Line != 3 {
+ t.Error("SyntaxError didn't have correct line number.")
+ }
+}
+
+func TestTrailingRawToken(t *testing.T) {
+ input := `<FOO></FOO> `
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
+ }
+}
+
+func TestTrailingToken(t *testing.T) {
+ input := `<FOO></FOO> `
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
+ }
+}
+
+func TestEntityInsideCDATA(t *testing.T) {
+ input := `<test><![CDATA[ &val=foo ]]></test>`
+ d := NewDecoder(strings.NewReader(input))
+ var err error
+ for _, err = d.Token(); err == nil; _, err = d.Token() {
+ }
+ if err != io.EOF {
+ t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
+ }
+}
+
+var characterTests = []struct {
+ in string
+ err string
+}{
+ {"\x12<doc/>", "illegal character code U+0012"},
+ {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
+ {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
+ {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
+ {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
+ {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
+ {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
+ {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
+ {"<doc>&hello;</doc>", "invalid character entity &hello;"},
+}
+
+func TestDisallowedCharacters(t *testing.T) {
+
+ for i, tt := range characterTests {
+ d := NewDecoder(strings.NewReader(tt.in))
+ var err error
+
+ for err == nil {
+ _, err = d.Token()
+ }
+ synerr, ok := err.(*SyntaxError)
+ if !ok {
+ t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
+ }
+ if synerr.Msg != tt.err {
+ t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
+ }
+ }
+}
+
+func TestIsInCharacterRange(t *testing.T) {
+ invalid := []rune{
+ utf8.MaxRune + 1,
+ 0xD800, // surrogate min
+ 0xDFFF, // surrogate max
+ -1,
+ }
+ for _, r := range invalid {
+ if isInCharacterRange(r) {
+ t.Errorf("rune %U considered valid", r)
+ }
+ }
+}
+
+var procInstTests = []struct {
+ input string
+ expect [2]string
+}{
+ {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
+ {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
+ {`encoding="FOO" `, [2]string{"", "FOO"}},
+}
+
+func TestProcInstEncoding(t *testing.T) {
+ for _, test := range procInstTests {
+ if got := procInst("version", test.input); got != test.expect[0] {
+ t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
+ }
+ if got := procInst("encoding", test.input); got != test.expect[1] {
+ t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
+ }
+ }
+}
+
+// Ensure that directives with comments include the complete
+// text of any nested directives.
+
+var directivesWithCommentsInput = `
+<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
+<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
+<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
+`
+
+var directivesWithCommentsTokens = []Token{
+ CharData("\n"),
+ Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
+ CharData("\n"),
+ Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
+ CharData("\n"),
+ Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
+ CharData("\n"),
+}
+
+func TestDirectivesWithComments(t *testing.T) {
+ d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
+
+ for i, want := range directivesWithCommentsTokens {
+ have, err := d.Token()
+ if err != nil {
+ t.Fatalf("token %d: unexpected error: %s", i, err)
+ }
+ if !reflect.DeepEqual(have, want) {
+ t.Errorf("token %d = %#v want %#v", i, have, want)
+ }
+ }
+}
+
+// Writer whose Write method always returns an error.
+type errWriter struct{}
+
+func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
+
+func TestEscapeTextIOErrors(t *testing.T) {
+ expectErr := "unwritable"
+ err := EscapeText(errWriter{}, []byte{'A'})
+
+ if err == nil || err.Error() != expectErr {
+ t.Errorf("have %v, want %v", err, expectErr)
+ }
+}
+
+func TestEscapeTextInvalidChar(t *testing.T) {
+ input := []byte("A \x00 terminated string.")
+ expected := "A \uFFFD terminated string."
+
+ buff := new(strings.Builder)
+ if err := EscapeText(buff, input); err != nil {
+ t.Fatalf("have %v, want nil", err)
+ }
+ text := buff.String()
+
+ if text != expected {
+ t.Errorf("have %v, want %v", text, expected)
+ }
+}
+
+func TestIssue5880(t *testing.T) {
+ type T []byte
+ data, err := Marshal(T{192, 168, 0, 1})
+ if err != nil {
+ t.Errorf("Marshal error: %v", err)
+ }
+ if !utf8.Valid(data) {
+ t.Errorf("Marshal generated invalid UTF-8: %x", data)
+ }
+}
+
+func TestIssue8535(t *testing.T) {
+
+ type ExampleConflict struct {
+ XMLName Name `xml:"example"`
+ Link string `xml:"link"`
+ AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
+ }
+ testCase := `<example>
+ <title>Example</title>
+ <link>http://example.com/default</link> <!-- not assigned -->
+ <link>http://example.com/home</link> <!-- not assigned -->
+ <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
+ </example>`
+
+ var dest ExampleConflict
+ d := NewDecoder(strings.NewReader(testCase))
+ if err := d.Decode(&dest); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestEncodeXMLNS(t *testing.T) {
+ testCases := []struct {
+ f func() ([]byte, error)
+ want string
+ ok bool
+ }{
+ {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
+ {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
+ {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
+ {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
+ }
+
+ for i, tc := range testCases {
+ if b, err := tc.f(); err == nil {
+ if got, want := string(b), tc.want; got != want {
+ t.Errorf("%d: got %s, want %s \n", i, got, want)
+ }
+ } else {
+ t.Errorf("%d: marshal failed with %s", i, err)
+ }
+ }
+}
+
+func encodeXMLNS1() ([]byte, error) {
+
+ type T struct {
+ XMLName Name `xml:"Test"`
+ Ns string `xml:"xmlns,attr"`
+ Body string
+ }
+
+ s := &T{Ns: "http://example.com/ns", Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS2() ([]byte, error) {
+
+ type Test struct {
+ Body string `xml:"http://example.com/ns body"`
+ }
+
+ s := &Test{Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS3() ([]byte, error) {
+
+ type Test struct {
+ XMLName Name `xml:"http://example.com/ns Test"`
+ Body string
+ }
+
+ //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
+ // as documentation states
+ s := &Test{Body: "hello world"}
+ return Marshal(s)
+}
+
+func encodeXMLNS4() ([]byte, error) {
+
+ type Test struct {
+ Ns string `xml:"xmlns,attr"`
+ Body string
+ }
+
+ s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
+ return Marshal(s)
+}
+
+func TestIssue11405(t *testing.T) {
+ testCases := []string{
+ "<root>",
+ "<root><foo>",
+ "<root><foo></foo>",
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ break
+ }
+ }
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
+ }
+ }
+}
+
+func TestIssue12417(t *testing.T) {
+ testCases := []struct {
+ s string
+ ok bool
+ }{
+ {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
+ {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
+ {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
+ {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc.s))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ break
+ }
+ }
+ if err != nil && tc.ok {
+ t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
+ continue
+ }
+ if err == nil && !tc.ok {
+ t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
+ }
+ }
+}
+
+func TestIssue7113(t *testing.T) {
+ type C struct {
+ XMLName Name `xml:""` // Sets empty namespace
+ }
+
+ type D struct {
+ XMLName Name `xml:"d"`
+ }
+
+ type A struct {
+ XMLName Name `xml:""`
+ C C `xml:""`
+ D D
+ }
+
+ var a A
+ structSpace := "b"
+ xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
+ t.Log(xmlTest)
+ err := Unmarshal([]byte(xmlTest), &a)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if a.XMLName.Space != structSpace {
+ t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace)
+ }
+ if len(a.C.XMLName.Space) != 0 {
+ t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space)
+ }
+
+ var b []byte
+ b, err = Marshal(&a)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(a.C.XMLName.Space) != 0 {
+ t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
+ }
+ if string(b) != xmlTest {
+ t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest)
+ }
+ var c A
+ err = Unmarshal(b, &c)
+ if err != nil {
+ t.Fatalf("second Unmarshal failed: %s", err)
+ }
+ if c.XMLName.Space != "b" {
+ t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
+ }
+ if len(c.C.XMLName.Space) != 0 {
+ t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
+ }
+}
+
+func TestIssue20396(t *testing.T) {
+
+ var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
+
+ testCases := []struct {
+ s string
+ wantErr error
+ }{
+ {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
+ UnmarshalError("XML syntax error on line 1: expected element name after <")},
+ {`<a:te=st xmlns:a="abcd"/>`, attrError},
+ {`<a:te&st xmlns:a="abcd"/>`, attrError},
+ {`<a:test xmlns:a="abcd"/>`, nil},
+ {`<a:te:st xmlns:a="abcd">1</a:te:st>`,
+ UnmarshalError("XML syntax error on line 1: expected element name after <")},
+ {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
+ {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
+ {`<a:test xmlns:a="abcd">1</a:test>`, nil},
+ }
+
+ var dest string
+ for _, tc := range testCases {
+ if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
+ if got == nil {
+ t.Errorf("%s: Unexpected success, want %v", tc.s, want)
+ } else if want == nil {
+ t.Errorf("%s: Unexpected error, got %v", tc.s, got)
+ } else if got.Error() != want.Error() {
+ t.Errorf("%s: got %v, want %v", tc.s, got, want)
+ }
+ }
+ }
+}
+
+func TestIssue20685(t *testing.T) {
+ testCases := []struct {
+ s string
+ ok bool
+ }{
+ {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
+ {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
+ {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
+ {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
+ {`<x:book xmlns:x="abcd">one</y:book>`, false},
+ {`<x:book>one</y:book>`, false},
+ {`<xbook>one</ybook>`, false},
+ }
+ for _, tc := range testCases {
+ d := NewDecoder(strings.NewReader(tc.s))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ break
+ }
+ }
+ if err != nil && tc.ok {
+ t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
+ continue
+ }
+ if err == nil && !tc.ok {
+ t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
+ }
+ }
+}
+
+func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
+ return func(src TokenReader) TokenReader {
+ return mapper{
+ t: src,
+ f: mapping,
+ }
+ }
+}
+
+type mapper struct {
+ t TokenReader
+ f func(Token) Token
+}
+
+func (m mapper) Token() (Token, error) {
+ tok, err := m.t.Token()
+ if err != nil {
+ return nil, err
+ }
+ return m.f(tok), nil
+}
+
+func TestNewTokenDecoderIdempotent(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`<br>`))
+ d2 := NewTokenDecoder(d)
+ if d != d2 {
+ t.Error("NewTokenDecoder did not detect underlying Decoder")
+ }
+}
+
+func TestWrapDecoder(t *testing.T) {
+ d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
+ m := tokenMap(func(t Token) Token {
+ switch tok := t.(type) {
+ case StartElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ case EndElement:
+ if tok.Name.Local == "quote" {
+ tok.Name.Local = "blocking"
+ return tok
+ }
+ }
+ return t
+ })
+
+ d = NewTokenDecoder(m(d))
+
+ o := struct {
+ XMLName Name `xml:"blocking"`
+ Chardata string `xml:",chardata"`
+ }{}
+
+ if err := d.Decode(&o); err != nil {
+ t.Fatal("Got unexpected error while decoding:", err)
+ }
+
+ if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
+ t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
+ }
+}
+
+type tokReader struct{}
+
+func (tokReader) Token() (Token, error) {
+ return StartElement{}, nil
+}
+
+type Failure struct{}
+
+func (Failure) UnmarshalXML(*Decoder, StartElement) error {
+ return nil
+}
+
+func TestTokenUnmarshaler(t *testing.T) {
+ defer func() {
+ if r := recover(); r != nil {
+ t.Error("Unexpected panic using custom token unmarshaler")
+ }
+ }()
+
+ d := NewTokenDecoder(tokReader{})
+ d.Decode(&Failure{})
+}
+
+func testRoundTrip(t *testing.T, input string) {
+ d := NewDecoder(strings.NewReader(input))
+ var tokens []Token
+ var buf bytes.Buffer
+ e := NewEncoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("invalid input: %v", err)
+ }
+ if err := e.EncodeToken(tok); err != nil {
+ t.Fatalf("failed to re-encode input: %v", err)
+ }
+ tokens = append(tokens, CopyToken(tok))
+ }
+ if err := e.Flush(); err != nil {
+ t.Fatal(err)
+ }
+
+ d = NewDecoder(&buf)
+ for {
+ tok, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.Fatalf("failed to decode output: %v", err)
+ }
+ if len(tokens) == 0 {
+ t.Fatalf("unexpected token: %#v", tok)
+ }
+ a, b := tokens[0], tok
+ if !reflect.DeepEqual(a, b) {
+ t.Fatalf("token mismatch: %#v vs %#v", a, b)
+ }
+ tokens = tokens[1:]
+ }
+ if len(tokens) > 0 {
+ t.Fatalf("lost tokens: %#v", tokens)
+ }
+}
+
+func TestRoundTrip(t *testing.T) {
+ tests := map[string]string{
+ "trailing colon": `<foo abc:="x"></foo>`,
+ "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
+ }
+ for name, input := range tests {
+ t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
+ }
+}
+
+func TestParseErrors(t *testing.T) {
+ withDefaultHeader := func(s string) string {
+ return `<?xml version="1.0" encoding="UTF-8"?>` + s
+ }
+ tests := []struct {
+ src string
+ err string
+ }{
+ {withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
+ {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
+ {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
+ {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
+ {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
+ {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
+ {withDefaultHeader("\xf1"), `invalid UTF-8`},
+
+ // Header-related errors.
+ {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
+
+ // Cases below are for "no errors".
+ {withDefaultHeader(`<?ok?>`), ``},
+ {withDefaultHeader(`<?ok version="ok"?>`), ``},
+ }
+
+ for _, test := range tests {
+ d := NewDecoder(strings.NewReader(test.src))
+ var err error
+ for {
+ _, err = d.Token()
+ if err != nil {
+ break
+ }
+ }
+ if test.err == "" {
+ if err != io.EOF {
+ t.Errorf("parse %s: have %q error, expected none", test.src, err)
+ }
+ continue
+ }
+ // Inv: err != nil
+ if err == io.EOF {
+ t.Errorf("parse %s: unexpected EOF", test.src)
+ continue
+ }
+ if !strings.Contains(err.Error(), test.err) {
+ t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err)
+ continue
+ }
+ }
+}
+
+const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
+<br>
+<br/><br/>
+<br><br>
+<br></br>
+<BR>
+<BR/><BR/>
+<Br></Br>
+<BR><span id="test">abc</span><br/><br/>`
+
+func BenchmarkHTMLAutoClose(b *testing.B) {
+ b.RunParallel(func(p *testing.PB) {
+ for p.Next() {
+ d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
+ d.Strict = false
+ d.AutoClose = HTMLAutoClose
+ d.Entity = HTMLEntity
+ for {
+ _, err := d.Token()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ b.Fatalf("unexpected error: %v", err)
+ }
+ }
+ }
+ })
+}
+
+func TestHTMLAutoClose(t *testing.T) {
+ wantTokens := []Token{
+ ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
+ CharData("\n"),
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ CharData("\n"),
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ CharData("\n"),
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ CharData("\n"),
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ CharData("\n"),
+ StartElement{Name{"", "BR"}, []Attr{}},
+ EndElement{Name{"", "BR"}},
+ CharData("\n"),
+ StartElement{Name{"", "BR"}, []Attr{}},
+ EndElement{Name{"", "BR"}},
+ StartElement{Name{"", "BR"}, []Attr{}},
+ EndElement{Name{"", "BR"}},
+ CharData("\n"),
+ StartElement{Name{"", "Br"}, []Attr{}},
+ EndElement{Name{"", "Br"}},
+ CharData("\n"),
+ StartElement{Name{"", "BR"}, []Attr{}},
+ EndElement{Name{"", "BR"}},
+ StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
+ CharData("abc"),
+ EndElement{Name{"", "span"}},
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ StartElement{Name{"", "br"}, []Attr{}},
+ EndElement{Name{"", "br"}},
+ }
+
+ d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
+ d.Strict = false
+ d.AutoClose = HTMLAutoClose
+ d.Entity = HTMLEntity
+ var haveTokens []Token
+ for {
+ tok, err := d.Token()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ t.Fatalf("unexpected error: %v", err)
+ }
+ haveTokens = append(haveTokens, CopyToken(tok))
+ }
+ if len(haveTokens) != len(wantTokens) {
+ t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
+ }
+ for i, want := range wantTokens {
+ if i >= len(haveTokens) {
+ t.Errorf("token[%d] expected %#v, have no token", i, want)
+ } else {
+ have := haveTokens[i]
+ if !reflect.DeepEqual(have, want) {
+ t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
+ }
+ }
+ }
+}