diff options
Diffstat (limited to 'src/encoding/xml')
-rw-r--r-- | src/encoding/xml/atom_test.go | 56 | ||||
-rw-r--r-- | src/encoding/xml/example_marshaling_test.go | 84 | ||||
-rw-r--r-- | src/encoding/xml/example_test.go | 151 | ||||
-rw-r--r-- | src/encoding/xml/example_text_marshaling_test.go | 79 | ||||
-rw-r--r-- | src/encoding/xml/marshal.go | 1131 | ||||
-rw-r--r-- | src/encoding/xml/marshal_test.go | 2591 | ||||
-rw-r--r-- | src/encoding/xml/read.go | 777 | ||||
-rw-r--r-- | src/encoding/xml/read_test.go | 1128 | ||||
-rw-r--r-- | src/encoding/xml/typeinfo.go | 367 | ||||
-rw-r--r-- | src/encoding/xml/xml.go | 2060 | ||||
-rw-r--r-- | src/encoding/xml/xml_test.go | 1479 |
11 files changed, 9903 insertions, 0 deletions
diff --git a/src/encoding/xml/atom_test.go b/src/encoding/xml/atom_test.go new file mode 100644 index 0000000..f394dab --- /dev/null +++ b/src/encoding/xml/atom_test.go @@ -0,0 +1,56 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import "time" + +var atomValue = &Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Example Feed", + Link: []Link{{Href: "http://example.org/"}}, + Updated: ParseTime("2003-12-13T18:30:02Z"), + Author: Person{Name: "John Doe"}, + ID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + + Entry: []Entry{ + { + Title: "Atom-Powered Robots Run Amok", + Link: []Link{{Href: "http://example.org/2003/12/13/atom03"}}, + ID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", + Updated: ParseTime("2003-12-13T18:30:02Z"), + Summary: NewText("Some text."), + }, + }, +} + +var atomXML = `` + + `<feed xmlns="http://www.w3.org/2005/Atom" updated="2003-12-13T18:30:02Z">` + + `<title>Example Feed</title>` + + `<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` + + `<link href="http://example.org/"></link>` + + `<author><name>John Doe</name><uri></uri><email></email></author>` + + `<entry>` + + `<title>Atom-Powered Robots Run Amok</title>` + + `<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>` + + `<link href="http://example.org/2003/12/13/atom03"></link>` + + `<updated>2003-12-13T18:30:02Z</updated>` + + `<author><name></name><uri></uri><email></email></author>` + + `<summary>Some text.</summary>` + + `</entry>` + + `</feed>` + +func ParseTime(str string) time.Time { + t, err := time.Parse(time.RFC3339, str) + if err != nil { + panic(err) + } + return t +} + +func NewText(text string) Text { + return Text{ + Body: text, + } +} diff --git a/src/encoding/xml/example_marshaling_test.go b/src/encoding/xml/example_marshaling_test.go new file mode 100644 index 0000000..9f9e801 --- /dev/null +++ b/src/encoding/xml/example_marshaling_test.go @@ -0,0 +1,84 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "log" + "strings" +) + +type Animal int + +const ( + Unknown Animal = iota + Gopher + Zebra +) + +func (a *Animal) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + var s string + if err := d.DecodeElement(&s, &start); err != nil { + return err + } + switch strings.ToLower(s) { + default: + *a = Unknown + case "gopher": + *a = Gopher + case "zebra": + *a = Zebra + } + + return nil +} + +func (a Animal) MarshalXML(e *xml.Encoder, start xml.StartElement) error { + var s string + switch a { + default: + s = "unknown" + case Gopher: + s = "gopher" + case Zebra: + s = "zebra" + } + return e.EncodeElement(s, start) +} + +func Example_customMarshalXML() { + blob := ` + <animals> + <animal>gopher</animal> + <animal>armadillo</animal> + <animal>zebra</animal> + <animal>unknown</animal> + <animal>gopher</animal> + <animal>bee</animal> + <animal>gopher</animal> + <animal>zebra</animal> + </animals>` + var zoo struct { + Animals []Animal `xml:"animal"` + } + if err := xml.Unmarshal([]byte(blob), &zoo); err != nil { + log.Fatal(err) + } + + census := make(map[Animal]int) + for _, animal := range zoo.Animals { + census[animal] += 1 + } + + fmt.Printf("Zoo Census:\n* Gophers: %d\n* Zebras: %d\n* Unknown: %d\n", + census[Gopher], census[Zebra], census[Unknown]) + + // Output: + // Zoo Census: + // * Gophers: 3 + // * Zebras: 2 + // * Unknown: 3 +} diff --git a/src/encoding/xml/example_test.go b/src/encoding/xml/example_test.go new file mode 100644 index 0000000..21b48de --- /dev/null +++ b/src/encoding/xml/example_test.go @@ -0,0 +1,151 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "os" +) + +func ExampleMarshalIndent() { + type Address struct { + City, State string + } + type Person struct { + XMLName xml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + output, err := xml.MarshalIndent(v, " ", " ") + if err != nil { + fmt.Printf("error: %v\n", err) + } + + os.Stdout.Write(output) + // Output: + // <person id="13"> + // <name> + // <first>John</first> + // <last>Doe</last> + // </name> + // <age>42</age> + // <Married>false</Married> + // <City>Hanga Roa</City> + // <State>Easter Island</State> + // <!-- Need more details. --> + // </person> +} + +func ExampleEncoder() { + type Address struct { + City, State string + } + type Person struct { + XMLName xml.Name `xml:"person"` + Id int `xml:"id,attr"` + FirstName string `xml:"name>first"` + LastName string `xml:"name>last"` + Age int `xml:"age"` + Height float32 `xml:"height,omitempty"` + Married bool + Address + Comment string `xml:",comment"` + } + + v := &Person{Id: 13, FirstName: "John", LastName: "Doe", Age: 42} + v.Comment = " Need more details. " + v.Address = Address{"Hanga Roa", "Easter Island"} + + enc := xml.NewEncoder(os.Stdout) + enc.Indent(" ", " ") + if err := enc.Encode(v); err != nil { + fmt.Printf("error: %v\n", err) + } + + // Output: + // <person id="13"> + // <name> + // <first>John</first> + // <last>Doe</last> + // </name> + // <age>42</age> + // <Married>false</Married> + // <City>Hanga Roa</City> + // <State>Easter Island</State> + // <!-- Need more details. --> + // </person> +} + +// This example demonstrates unmarshaling an XML excerpt into a value with +// some preset fields. Note that the Phone field isn't modified and that +// the XML <Company> element is ignored. Also, the Groups field is assigned +// considering the element path provided in its tag. +func ExampleUnmarshal() { + type Email struct { + Where string `xml:"where,attr"` + Addr string + } + type Address struct { + City, State string + } + type Result struct { + XMLName xml.Name `xml:"Person"` + Name string `xml:"FullName"` + Phone string + Email []Email + Groups []string `xml:"Group>Value"` + Address + } + v := Result{Name: "none", Phone: "none"} + + data := ` + <Person> + <FullName>Grace R. Emlin</FullName> + <Company>Example Inc.</Company> + <Email where="home"> + <Addr>gre@example.com</Addr> + </Email> + <Email where='work'> + <Addr>gre@work.com</Addr> + </Email> + <Group> + <Value>Friends</Value> + <Value>Squash</Value> + </Group> + <City>Hanga Roa</City> + <State>Easter Island</State> + </Person> + ` + err := xml.Unmarshal([]byte(data), &v) + if err != nil { + fmt.Printf("error: %v", err) + return + } + fmt.Printf("XMLName: %#v\n", v.XMLName) + fmt.Printf("Name: %q\n", v.Name) + fmt.Printf("Phone: %q\n", v.Phone) + fmt.Printf("Email: %v\n", v.Email) + fmt.Printf("Groups: %v\n", v.Groups) + fmt.Printf("Address: %v\n", v.Address) + // Output: + // XMLName: xml.Name{Space:"", Local:"Person"} + // Name: "Grace R. Emlin" + // Phone: "none" + // Email: [{home gre@example.com} {work gre@work.com}] + // Groups: [Friends Squash] + // Address: {Hanga Roa Easter Island} +} diff --git a/src/encoding/xml/example_text_marshaling_test.go b/src/encoding/xml/example_text_marshaling_test.go new file mode 100644 index 0000000..2549cb1 --- /dev/null +++ b/src/encoding/xml/example_text_marshaling_test.go @@ -0,0 +1,79 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml_test + +import ( + "encoding/xml" + "fmt" + "log" + "strings" +) + +type Size int + +const ( + Unrecognized Size = iota + Small + Large +) + +func (s *Size) UnmarshalText(text []byte) error { + switch strings.ToLower(string(text)) { + default: + *s = Unrecognized + case "small": + *s = Small + case "large": + *s = Large + } + return nil +} + +func (s Size) MarshalText() ([]byte, error) { + var name string + switch s { + default: + name = "unrecognized" + case Small: + name = "small" + case Large: + name = "large" + } + return []byte(name), nil +} + +func Example_textMarshalXML() { + blob := ` + <sizes> + <size>small</size> + <size>regular</size> + <size>large</size> + <size>unrecognized</size> + <size>small</size> + <size>normal</size> + <size>small</size> + <size>large</size> + </sizes>` + var inventory struct { + Sizes []Size `xml:"size"` + } + if err := xml.Unmarshal([]byte(blob), &inventory); err != nil { + log.Fatal(err) + } + + counts := make(map[Size]int) + for _, size := range inventory.Sizes { + counts[size] += 1 + } + + fmt.Printf("Inventory Counts:\n* Small: %d\n* Large: %d\n* Unrecognized: %d\n", + counts[Small], counts[Large], counts[Unrecognized]) + + // Output: + // Inventory Counts: + // * Small: 3 + // * Large: 2 + // * Unrecognized: 3 +} diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go new file mode 100644 index 0000000..05b5542 --- /dev/null +++ b/src/encoding/xml/marshal.go @@ -0,0 +1,1131 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bufio" + "bytes" + "encoding" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" +) + +const ( + // Header is a generic XML header suitable for use with the output of [Marshal]. + // This is not automatically added to any output of this package, + // it is provided as a convenience. + Header = `<?xml version="1.0" encoding="UTF-8"?>` + "\n" +) + +// Marshal returns the XML encoding of v. +// +// Marshal handles an array or slice by marshaling each of the elements. +// Marshal handles a pointer by marshaling the value it points at or, if the +// pointer is nil, by writing nothing. Marshal handles an interface value by +// marshaling the value it contains or, if the interface value is nil, by +// writing nothing. Marshal handles all other data by writing one or more XML +// elements containing the data. +// +// The name for the XML elements is taken from, in order of preference: +// - the tag on the XMLName field, if the data is a struct +// - the value of the XMLName field of type [Name] +// - the tag of the struct field used to obtain the data +// - the name of the struct field used to obtain the data +// - the name of the marshaled type +// +// The XML element for a struct contains marshaled elements for each of the +// exported fields of the struct, with these exceptions: +// - the XMLName field, described above, is omitted. +// - a field with tag "-" is omitted. +// - a field with tag "name,attr" becomes an attribute with +// the given name in the XML element. +// - a field with tag ",attr" becomes an attribute with the +// field name in the XML element. +// - a field with tag ",chardata" is written as character data, +// not as an XML element. +// - a field with tag ",cdata" is written as character data +// wrapped in one or more <![CDATA[ ... ]]> tags, not as an XML element. +// - a field with tag ",innerxml" is written verbatim, not subject +// to the usual marshaling procedure. +// - a field with tag ",comment" is written as an XML comment, not +// subject to the usual marshaling procedure. It must not contain +// the "--" string within it. +// - a field with a tag including the "omitempty" option is omitted +// if the field value is empty. The empty values are false, 0, any +// nil pointer or interface value, and any array, slice, map, or +// string of length zero. +// - an anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// - a field implementing [Marshaler] is written by calling its MarshalXML +// method. +// - a field implementing [encoding.TextMarshaler] is written by encoding the +// result of its MarshalText method as text. +// +// If a field uses a tag "a>b>c", then the element c will be nested inside +// parent elements a and b. Fields that appear next to each other that name +// the same parent will be enclosed in one XML element. +// +// If the XML name for a struct field is defined by both the field tag and the +// struct's XMLName field, the names must match. +// +// See [MarshalIndent] for an example. +// +// Marshal will return an error if asked to marshal a channel, function, or map. +func Marshal(v any) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// Marshaler is the interface implemented by objects that can marshal +// themselves into valid XML elements. +// +// MarshalXML encodes the receiver as zero or more XML elements. +// By convention, arrays or slices are typically encoded as a sequence +// of elements, one per entry. +// Using start as the element tag is not required, but doing so +// will enable [Unmarshal] to match the XML elements to the correct +// struct field. +// One common implementation strategy is to construct a separate +// value with a layout corresponding to the desired XML and then +// to encode it using e.EncodeElement. +// Another common strategy is to use repeated calls to e.EncodeToken +// to generate the XML output one token at a time. +// The sequence of encoded tokens must make up zero or more valid +// XML elements. +type Marshaler interface { + MarshalXML(e *Encoder, start StartElement) error +} + +// MarshalerAttr is the interface implemented by objects that can marshal +// themselves into valid XML attributes. +// +// MarshalXMLAttr returns an XML attribute with the encoded value of the receiver. +// Using name as the attribute name is not required, but doing so +// will enable [Unmarshal] to match the attribute to the correct +// struct field. +// If MarshalXMLAttr returns the zero attribute [Attr]{}, no attribute +// will be generated in the output. +// MarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type MarshalerAttr interface { + MarshalXMLAttr(name Name) (Attr, error) +} + +// MarshalIndent works like [Marshal], but each XML element begins on a new +// indented line that starts with prefix and is followed by one or more +// copies of indent according to the nesting depth. +func MarshalIndent(v any, prefix, indent string) ([]byte, error) { + var b bytes.Buffer + enc := NewEncoder(&b) + enc.Indent(prefix, indent) + if err := enc.Encode(v); err != nil { + return nil, err + } + if err := enc.Close(); err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// An Encoder writes XML data to an output stream. +type Encoder struct { + p printer +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + e := &Encoder{printer{w: bufio.NewWriter(w)}} + e.p.encoder = e + return e +} + +// Indent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (enc *Encoder) Indent(prefix, indent string) { + enc.p.prefix = prefix + enc.p.indent = indent +} + +// Encode writes the XML encoding of v to the stream. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// Encode calls [Encoder.Flush] before returning. +func (enc *Encoder) Encode(v any) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, nil) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +// EncodeElement writes the XML encoding of v to the stream, +// using start as the outermost tag in the encoding. +// +// See the documentation for [Marshal] for details about the conversion +// of Go values to XML. +// +// EncodeElement calls [Encoder.Flush] before returning. +func (enc *Encoder) EncodeElement(v any, start StartElement) error { + err := enc.p.marshalValue(reflect.ValueOf(v), nil, &start) + if err != nil { + return err + } + return enc.p.w.Flush() +} + +var ( + begComment = []byte("<!--") + endComment = []byte("-->") + endProcInst = []byte("?>") +) + +// EncodeToken writes the given XML token to the stream. +// It returns an error if [StartElement] and [EndElement] tokens are not properly matched. +// +// EncodeToken does not call [Encoder.Flush], because usually it is part of a larger operation +// such as [Encoder.Encode] or [Encoder.EncodeElement] (or a custom [Marshaler]'s MarshalXML invoked +// during those), and those will call Flush when finished. +// Callers that create an Encoder and then invoke EncodeToken directly, without +// using Encode or EncodeElement, need to call Flush when finished to ensure +// that the XML is written to the underlying writer. +// +// EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token +// in the stream. +func (enc *Encoder) EncodeToken(t Token) error { + + p := &enc.p + switch t := t.(type) { + case StartElement: + if err := p.writeStart(&t); err != nil { + return err + } + case EndElement: + if err := p.writeEnd(t.Name); err != nil { + return err + } + case CharData: + escapeText(p, t, false) + case Comment: + if bytes.Contains(t, endComment) { + return fmt.Errorf("xml: EncodeToken of Comment containing --> marker") + } + p.WriteString("<!--") + p.Write(t) + p.WriteString("-->") + return p.cachedWriteError() + case ProcInst: + // First token to be encoded which is also a ProcInst with target of xml + // is the xml declaration. The only ProcInst where target of xml is allowed. + if t.Target == "xml" && p.w.Buffered() != 0 { + return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") + } + if !isNameString(t.Target) { + return fmt.Errorf("xml: EncodeToken of ProcInst with invalid Target") + } + if bytes.Contains(t.Inst, endProcInst) { + return fmt.Errorf("xml: EncodeToken of ProcInst containing ?> marker") + } + p.WriteString("<?") + p.WriteString(t.Target) + if len(t.Inst) > 0 { + p.WriteByte(' ') + p.Write(t.Inst) + } + p.WriteString("?>") + case Directive: + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") + } + p.WriteString("<!") + p.Write(t) + p.WriteString(">") + default: + return fmt.Errorf("xml: EncodeToken of invalid token type") + + } + return p.cachedWriteError() +} + +// isValidDirective reports whether dir is a valid directive text, +// meaning angle brackets are matched, ignoring comments and strings. +func isValidDirective(dir Directive) bool { + var ( + depth int + inquote uint8 + incomment bool + ) + for i, c := range dir { + switch { + case incomment: + if c == '>' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + +// Flush flushes any buffered XML to the underlying writer. +// See the [Encoder.EncodeToken] documentation for details about when it is necessary. +func (enc *Encoder) Flush() error { + return enc.p.w.Flush() +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (enc *Encoder) Close() error { + return enc.p.Close() +} + +type printer struct { + w *bufio.Writer + encoder *Encoder + seq int + indent string + prefix string + depth int + indentedIn bool + putNewline bool + attrNS map[string]string // map prefix -> name space + attrPrefix map[string]string // map name space -> prefix + prefixes []string + tags []Name + closed bool + err error +} + +// createAttrPrefix finds the name space prefix attribute to use for the given name space, +// defining a new prefix if necessary. It returns the prefix. +func (p *printer) createAttrPrefix(url string) string { + if prefix := p.attrPrefix[url]; prefix != "" { + return prefix + } + + // The "http://www.w3.org/XML/1998/namespace" name space is predefined as "xml" + // and must be referred to that way. + // (The "http://www.w3.org/2000/xmlns/" name space is also predefined as "xmlns", + // but users should not be trying to use that one directly - that's our job.) + if url == xmlURL { + return xmlPrefix + } + + // Need to define a new name space. + if p.attrPrefix == nil { + p.attrPrefix = make(map[string]string) + p.attrNS = make(map[string]string) + } + + // Pick a name. We try to use the final element of the path + // but fall back to _. + prefix := strings.TrimRight(url, "/") + if i := strings.LastIndex(prefix, "/"); i >= 0 { + prefix = prefix[i+1:] + } + if prefix == "" || !isName([]byte(prefix)) || strings.Contains(prefix, ":") { + prefix = "_" + } + // xmlanything is reserved and any variant of it regardless of + // case should be matched, so: + // (('X'|'x') ('M'|'m') ('L'|'l')) + // See Section 2.3 of https://www.w3.org/TR/REC-xml/ + if len(prefix) >= 3 && strings.EqualFold(prefix[:3], "xml") { + prefix = "_" + prefix + } + if p.attrNS[prefix] != "" { + // Name is taken. Find a better one. + for p.seq++; ; p.seq++ { + if id := prefix + "_" + strconv.Itoa(p.seq); p.attrNS[id] == "" { + prefix = id + break + } + } + } + + p.attrPrefix[url] = prefix + p.attrNS[prefix] = url + + p.WriteString(`xmlns:`) + p.WriteString(prefix) + p.WriteString(`="`) + EscapeText(p, []byte(url)) + p.WriteString(`" `) + + p.prefixes = append(p.prefixes, prefix) + + return prefix +} + +// deleteAttrPrefix removes an attribute name space prefix. +func (p *printer) deleteAttrPrefix(prefix string) { + delete(p.attrPrefix, p.attrNS[prefix]) + delete(p.attrNS, prefix) +} + +func (p *printer) markPrefix() { + p.prefixes = append(p.prefixes, "") +} + +func (p *printer) popPrefix() { + for len(p.prefixes) > 0 { + prefix := p.prefixes[len(p.prefixes)-1] + p.prefixes = p.prefixes[:len(p.prefixes)-1] + if prefix == "" { + break + } + p.deleteAttrPrefix(prefix) + } +} + +var ( + marshalerType = reflect.TypeFor[Marshaler]() + marshalerAttrType = reflect.TypeFor[MarshalerAttr]() + textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() +) + +// marshalValue writes one or more XML elements representing val. +// If val was obtained from a struct field, finfo must have its details. +func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo, startTemplate *StartElement) error { + if startTemplate != nil && startTemplate.Name.Local == "" { + return fmt.Errorf("xml: EncodeElement of StartElement with missing name") + } + + if !val.IsValid() { + return nil + } + if finfo != nil && finfo.flags&fOmitEmpty != 0 && isEmptyValue(val) { + return nil + } + + // Drill into interfaces and pointers. + // This can turn into an infinite loop given a cyclic chain, + // but it matches the Go 1 behavior. + for val.Kind() == reflect.Interface || val.Kind() == reflect.Pointer { + if val.IsNil() { + return nil + } + val = val.Elem() + } + + kind := val.Kind() + typ := val.Type() + + // Check for marshaler. + if val.CanInterface() && typ.Implements(marshalerType) { + return p.marshalInterface(val.Interface().(Marshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerType) { + return p.marshalInterface(pv.Interface().(Marshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Check for text marshaler. + if val.CanInterface() && typ.Implements(textMarshalerType) { + return p.marshalTextInterface(val.Interface().(encoding.TextMarshaler), defaultStart(typ, finfo, startTemplate)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + return p.marshalTextInterface(pv.Interface().(encoding.TextMarshaler), defaultStart(pv.Type(), finfo, startTemplate)) + } + } + + // Slices and arrays iterate over the elements. They do not have an enclosing tag. + if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { + for i, n := 0, val.Len(); i < n; i++ { + if err := p.marshalValue(val.Index(i), finfo, startTemplate); err != nil { + return err + } + } + return nil + } + + tinfo, err := getTypeInfo(typ) + if err != nil { + return err + } + + // Create start element. + // Precedence for the XML element name is: + // 0. startTemplate + // 1. XMLName field in underlying struct; + // 2. field name/tag in the struct field; and + // 3. type name + var start StartElement + + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if tinfo.xmlname != nil { + xmlname := tinfo.xmlname + if xmlname.name != "" { + start.Name.Space, start.Name.Local = xmlname.xmlns, xmlname.name + } else { + fv := xmlname.value(val, dontInitNilPointers) + if v, ok := fv.Interface().(Name); ok && v.Local != "" { + start.Name = v + } + } + } + if start.Name.Local == "" && finfo != nil { + start.Name.Space, start.Name.Local = finfo.xmlns, finfo.name + } + if start.Name.Local == "" { + name := typ.Name() + if i := strings.IndexByte(name, '['); i >= 0 { + // Truncate generic instantiation name. See issue 48318. + name = name[:i] + } + if name == "" { + return &UnsupportedTypeError{typ} + } + start.Name.Local = name + } + + // Attributes + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr == 0 { + continue + } + fv := finfo.value(val, dontInitNilPointers) + + if finfo.flags&fOmitEmpty != 0 && (!fv.IsValid() || isEmptyValue(fv)) { + continue + } + + if fv.Kind() == reflect.Interface && fv.IsNil() { + continue + } + + name := Name{Space: finfo.xmlns, Local: finfo.name} + if err := p.marshalAttr(&start, name, fv); err != nil { + return err + } + } + + // If an empty name was found, namespace is overridden with an empty space + if tinfo.xmlname != nil && start.Name.Space == "" && + tinfo.xmlname.xmlns == "" && tinfo.xmlname.name == "" && + len(p.tags) != 0 && p.tags[len(p.tags)-1].Space != "" { + start.Attr = append(start.Attr, Attr{Name{"", xmlnsPrefix}, ""}) + } + if err := p.writeStart(&start); err != nil { + return err + } + + if val.Kind() == reflect.Struct { + err = p.marshalStruct(tinfo, val) + } else { + s, b, err1 := p.marshalSimple(typ, val) + if err1 != nil { + err = err1 + } else if b != nil { + EscapeText(p, b) + } else { + p.EscapeString(s) + } + } + if err != nil { + return err + } + + if err := p.writeEnd(start.Name); err != nil { + return err + } + + return p.cachedWriteError() +} + +// marshalAttr marshals an attribute with the given name and value, adding to start.Attr. +func (p *printer) marshalAttr(start *StartElement, name Name, val reflect.Value) error { + if val.CanInterface() && val.Type().Implements(marshalerAttrType) { + attr, err := val.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(marshalerAttrType) { + attr, err := pv.Interface().(MarshalerAttr).MarshalXMLAttr(name) + if err != nil { + return err + } + if attr.Name.Local != "" { + start.Attr = append(start.Attr, attr) + } + return nil + } + } + + if val.CanInterface() && val.Type().Implements(textMarshalerType) { + text, err := val.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + text, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + start.Attr = append(start.Attr, Attr{name, string(text)}) + return nil + } + } + + // Dereference or skip nil pointer, interface values. + switch val.Kind() { + case reflect.Pointer, reflect.Interface: + if val.IsNil() { + return nil + } + val = val.Elem() + } + + // Walk slices. + if val.Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + n := val.Len() + for i := 0; i < n; i++ { + if err := p.marshalAttr(start, name, val.Index(i)); err != nil { + return err + } + } + return nil + } + + if val.Type() == attrType { + start.Attr = append(start.Attr, val.Interface().(Attr)) + return nil + } + + s, b, err := p.marshalSimple(val.Type(), val) + if err != nil { + return err + } + if b != nil { + s = string(b) + } + start.Attr = append(start.Attr, Attr{name, s}) + return nil +} + +// defaultStart returns the default start element to use, +// given the reflect type, field info, and start template. +func defaultStart(typ reflect.Type, finfo *fieldInfo, startTemplate *StartElement) StartElement { + var start StartElement + // Precedence for the XML element name is as above, + // except that we do not look inside structs for the first field. + if startTemplate != nil { + start.Name = startTemplate.Name + start.Attr = append(start.Attr, startTemplate.Attr...) + } else if finfo != nil && finfo.name != "" { + start.Name.Local = finfo.name + start.Name.Space = finfo.xmlns + } else if typ.Name() != "" { + start.Name.Local = typ.Name() + } else { + // Must be a pointer to a named type, + // since it has the Marshaler methods. + start.Name.Local = typ.Elem().Name() + } + return start +} + +// marshalInterface marshals a Marshaler interface value. +func (p *printer) marshalInterface(val Marshaler, start StartElement) error { + // Push a marker onto the tag stack so that MarshalXML + // cannot close the XML tags that it did not open. + p.tags = append(p.tags, Name{}) + n := len(p.tags) + + err := val.MarshalXML(p.encoder, start) + if err != nil { + return err + } + + // Make sure MarshalXML closed all its tags. p.tags[n-1] is the mark. + if len(p.tags) > n { + return fmt.Errorf("xml: %s.MarshalXML wrote invalid XML: <%s> not closed", receiverType(val), p.tags[len(p.tags)-1].Local) + } + p.tags = p.tags[:n-1] + return nil +} + +// marshalTextInterface marshals a TextMarshaler interface value. +func (p *printer) marshalTextInterface(val encoding.TextMarshaler, start StartElement) error { + if err := p.writeStart(&start); err != nil { + return err + } + text, err := val.MarshalText() + if err != nil { + return err + } + EscapeText(p, text) + return p.writeEnd(start.Name) +} + +// writeStart writes the given start element. +func (p *printer) writeStart(start *StartElement) error { + if start.Name.Local == "" { + return fmt.Errorf("xml: start tag with no name") + } + + p.tags = append(p.tags, start.Name) + p.markPrefix() + + p.writeIndent(1) + p.WriteByte('<') + p.WriteString(start.Name.Local) + + if start.Name.Space != "" { + p.WriteString(` xmlns="`) + p.EscapeString(start.Name.Space) + p.WriteByte('"') + } + + // Attributes + for _, attr := range start.Attr { + name := attr.Name + if name.Local == "" { + continue + } + p.WriteByte(' ') + if name.Space != "" { + p.WriteString(p.createAttrPrefix(name.Space)) + p.WriteByte(':') + } + p.WriteString(name.Local) + p.WriteString(`="`) + p.EscapeString(attr.Value) + p.WriteByte('"') + } + p.WriteByte('>') + return nil +} + +func (p *printer) writeEnd(name Name) error { + if name.Local == "" { + return fmt.Errorf("xml: end tag with no name") + } + if len(p.tags) == 0 || p.tags[len(p.tags)-1].Local == "" { + return fmt.Errorf("xml: end tag </%s> without start tag", name.Local) + } + if top := p.tags[len(p.tags)-1]; top != name { + if top.Local != name.Local { + return fmt.Errorf("xml: end tag </%s> does not match start tag <%s>", name.Local, top.Local) + } + return fmt.Errorf("xml: end tag </%s> in namespace %s does not match start tag <%s> in namespace %s", name.Local, name.Space, top.Local, top.Space) + } + p.tags = p.tags[:len(p.tags)-1] + + p.writeIndent(-1) + p.WriteByte('<') + p.WriteByte('/') + p.WriteString(name.Local) + p.WriteByte('>') + p.popPrefix() + return nil +} + +func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) (string, []byte, error) { + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.FormatInt(val.Int(), 10), nil, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(val.Uint(), 10), nil, nil + case reflect.Float32, reflect.Float64: + return strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()), nil, nil + case reflect.String: + return val.String(), nil, nil + case reflect.Bool: + return strconv.FormatBool(val.Bool()), nil, nil + case reflect.Array: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // [...]byte + var bytes []byte + if val.CanAddr() { + bytes = val.Bytes() + } else { + bytes = make([]byte, val.Len()) + reflect.Copy(reflect.ValueOf(bytes), val) + } + return "", bytes, nil + case reflect.Slice: + if typ.Elem().Kind() != reflect.Uint8 { + break + } + // []byte + return "", val.Bytes(), nil + } + return "", nil, &UnsupportedTypeError{typ} +} + +var ddBytes = []byte("--") + +// indirect drills into interfaces and pointers, returning the pointed-at value. +// If it encounters a nil interface or pointer, indirect returns that nil value. +// This can turn into an infinite loop given a cyclic chain, +// but it matches the Go 1 behavior. +func indirect(vf reflect.Value) reflect.Value { + for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Pointer { + if vf.IsNil() { + return vf + } + vf = vf.Elem() + } + return vf +} + +func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error { + s := parentStack{p: p} + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fAttr != 0 { + continue + } + vf := finfo.value(val, dontInitNilPointers) + if !vf.IsValid() { + // The field is behind an anonymous struct field that's + // nil. Skip it. + continue + } + + switch finfo.flags & fMode { + case fCDATA, fCharData: + emit := EscapeText + if finfo.flags&fMode == fCDATA { + emit = emitCDATA + } + if err := s.trim(finfo.parents); err != nil { + return err + } + if vf.CanInterface() && vf.Type().Implements(textMarshalerType) { + data, err := vf.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + if vf.CanAddr() { + pv := vf.Addr() + if pv.CanInterface() && pv.Type().Implements(textMarshalerType) { + data, err := pv.Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return err + } + if err := emit(p, data); err != nil { + return err + } + continue + } + } + + var scratch [64]byte + vf = indirect(vf) + switch vf.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil { + return err + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if err := emit(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10)); err != nil { + return err + } + case reflect.Float32, reflect.Float64: + if err := emit(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits())); err != nil { + return err + } + case reflect.Bool: + if err := emit(p, strconv.AppendBool(scratch[:0], vf.Bool())); err != nil { + return err + } + case reflect.String: + if err := emit(p, []byte(vf.String())); err != nil { + return err + } + case reflect.Slice: + if elem, ok := vf.Interface().([]byte); ok { + if err := emit(p, elem); err != nil { + return err + } + } + } + continue + + case fComment: + if err := s.trim(finfo.parents); err != nil { + return err + } + vf = indirect(vf) + k := vf.Kind() + if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) { + return fmt.Errorf("xml: bad type for comment field of %s", val.Type()) + } + if vf.Len() == 0 { + continue + } + p.writeIndent(0) + p.WriteString("<!--") + dashDash := false + dashLast := false + switch k { + case reflect.String: + s := vf.String() + dashDash = strings.Contains(s, "--") + dashLast = s[len(s)-1] == '-' + if !dashDash { + p.WriteString(s) + } + case reflect.Slice: + b := vf.Bytes() + dashDash = bytes.Contains(b, ddBytes) + dashLast = b[len(b)-1] == '-' + if !dashDash { + p.Write(b) + } + default: + panic("can't happen") + } + if dashDash { + return fmt.Errorf(`xml: comments must not contain "--"`) + } + if dashLast { + // "--->" is invalid grammar. Make it "- -->" + p.WriteByte(' ') + } + p.WriteString("-->") + continue + + case fInnerXML: + vf = indirect(vf) + iface := vf.Interface() + switch raw := iface.(type) { + case []byte: + p.Write(raw) + continue + case string: + p.WriteString(raw) + continue + } + + case fElement, fElement | fAny: + if err := s.trim(finfo.parents); err != nil { + return err + } + if len(finfo.parents) > len(s.stack) { + if vf.Kind() != reflect.Pointer && vf.Kind() != reflect.Interface || !vf.IsNil() { + if err := s.push(finfo.parents[len(s.stack):]); err != nil { + return err + } + } + } + } + if err := p.marshalValue(vf, finfo, nil); err != nil { + return err + } + } + s.trim(nil) + return p.cachedWriteError() +} + +// Write implements io.Writer +func (p *printer) Write(b []byte) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.Write(b) + } + return n, p.err +} + +// WriteString implements io.StringWriter +func (p *printer) WriteString(s string) (n int, err error) { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + n, p.err = p.w.WriteString(s) + } + return n, p.err +} + +// WriteByte implements io.ByteWriter +func (p *printer) WriteByte(c byte) error { + if p.closed && p.err == nil { + p.err = errors.New("use of closed Encoder") + } + if p.err == nil { + p.err = p.w.WriteByte(c) + } + return p.err +} + +// Close the Encoder, indicating that no more data will be written. It flushes +// any buffered XML to the underlying writer and returns an error if the +// written XML is invalid (e.g. by containing unclosed elements). +func (p *printer) Close() error { + if p.closed { + return nil + } + p.closed = true + if err := p.w.Flush(); err != nil { + return err + } + if len(p.tags) > 0 { + return fmt.Errorf("unclosed tag <%s>", p.tags[len(p.tags)-1].Local) + } + return nil +} + +// return the bufio Writer's cached write error +func (p *printer) cachedWriteError() error { + _, err := p.Write(nil) + return err +} + +func (p *printer) writeIndent(depthDelta int) { + if len(p.prefix) == 0 && len(p.indent) == 0 { + return + } + if depthDelta < 0 { + p.depth-- + if p.indentedIn { + p.indentedIn = false + return + } + p.indentedIn = false + } + if p.putNewline { + p.WriteByte('\n') + } else { + p.putNewline = true + } + if len(p.prefix) > 0 { + p.WriteString(p.prefix) + } + if len(p.indent) > 0 { + for i := 0; i < p.depth; i++ { + p.WriteString(p.indent) + } + } + if depthDelta > 0 { + p.depth++ + p.indentedIn = true + } +} + +type parentStack struct { + p *printer + stack []string +} + +// trim updates the XML context to match the longest common prefix of the stack +// and the given parents. A closing tag will be written for every parent +// popped. Passing a zero slice or nil will close all the elements. +func (s *parentStack) trim(parents []string) error { + split := 0 + for ; split < len(parents) && split < len(s.stack); split++ { + if parents[split] != s.stack[split] { + break + } + } + for i := len(s.stack) - 1; i >= split; i-- { + if err := s.p.writeEnd(Name{Local: s.stack[i]}); err != nil { + return err + } + } + s.stack = s.stack[:split] + return nil +} + +// push adds parent elements to the stack and writes open tags. +func (s *parentStack) push(parents []string) error { + for i := 0; i < len(parents); i++ { + if err := s.p.writeStart(&StartElement{Name: Name{Local: parents[i]}}); err != nil { + return err + } + } + s.stack = append(s.stack, parents...) + return nil +} + +// UnsupportedTypeError is returned when [Marshal] encounters a type +// that cannot be converted into XML. +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "xml: unsupported type: " + e.Type.String() +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, + reflect.Float32, reflect.Float64, + reflect.Interface, reflect.Pointer: + return v.IsZero() + } + return false +} diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go new file mode 100644 index 0000000..f6bcc7f --- /dev/null +++ b/src/encoding/xml/marshal_test.go @@ -0,0 +1,2591 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +type DriveType int + +const ( + HyperDrive DriveType = iota + ImprobabilityDrive +) + +type Passenger struct { + Name []string `xml:"name"` + Weight float32 `xml:"weight"` +} + +type Ship struct { + XMLName struct{} `xml:"spaceship"` + + Name string `xml:"name,attr"` + Pilot string `xml:"pilot,attr"` + Drive DriveType `xml:"drive"` + Age uint `xml:"age"` + Passenger []*Passenger `xml:"passenger"` + secret string +} + +type NamedType string + +type Port struct { + XMLName struct{} `xml:"port"` + Type string `xml:"type,attr,omitempty"` + Comment string `xml:",comment"` + Number string `xml:",chardata"` +} + +type Domain struct { + XMLName struct{} `xml:"domain"` + Country string `xml:",attr,omitempty"` + Name []byte `xml:",chardata"` + Comment []byte `xml:",comment"` +} + +type Book struct { + XMLName struct{} `xml:"book"` + Title string `xml:",chardata"` +} + +type Event struct { + XMLName struct{} `xml:"event"` + Year int `xml:",chardata"` +} + +type Movie struct { + XMLName struct{} `xml:"movie"` + Length uint `xml:",chardata"` +} + +type Pi struct { + XMLName struct{} `xml:"pi"` + Approximation float32 `xml:",chardata"` +} + +type Universe struct { + XMLName struct{} `xml:"universe"` + Visible float64 `xml:",chardata"` +} + +type Particle struct { + XMLName struct{} `xml:"particle"` + HasMass bool `xml:",chardata"` +} + +type Departure struct { + XMLName struct{} `xml:"departure"` + When time.Time `xml:",chardata"` +} + +type SecretAgent struct { + XMLName struct{} `xml:"agent"` + Handle string `xml:"handle,attr"` + Identity string + Obfuscate string `xml:",innerxml"` +} + +type NestedItems struct { + XMLName struct{} `xml:"result"` + Items []string `xml:">item"` + Item1 []string `xml:"Items>item1"` +} + +type NestedOrder struct { + XMLName struct{} `xml:"result"` + Field1 string `xml:"parent>c"` + Field2 string `xml:"parent>b"` + Field3 string `xml:"parent>a"` +} + +type MixedNested struct { + XMLName struct{} `xml:"result"` + A string `xml:"parent1>a"` + B string `xml:"b"` + C string `xml:"parent1>parent2>c"` + D string `xml:"parent1>d"` +} + +type NilTest struct { + A any `xml:"parent1>parent2>a"` + B any `xml:"parent1>b"` + C any `xml:"parent1>parent2>c"` +} + +type Service struct { + XMLName struct{} `xml:"service"` + Domain *Domain `xml:"host>domain"` + Port *Port `xml:"host>port"` + Extra1 any + Extra2 any `xml:"host>extra2"` +} + +var nilStruct *Ship + +type EmbedA struct { + EmbedC + EmbedB EmbedB + FieldA string + embedD +} + +type EmbedB struct { + FieldB string + *EmbedC +} + +type EmbedC struct { + FieldA1 string `xml:"FieldA>A1"` + FieldA2 string `xml:"FieldA>A2"` + FieldB string + FieldC string +} + +type embedD struct { + fieldD string + FieldE string // Promoted and visible when embedD is embedded. +} + +type NameCasing struct { + XMLName struct{} `xml:"casing"` + Xy string + XY string + XyA string `xml:"Xy,attr"` + XYA string `xml:"XY,attr"` +} + +type NamePrecedence struct { + XMLName Name `xml:"Parent"` + FromTag XMLNameWithoutTag `xml:"InTag"` + FromNameVal XMLNameWithoutTag + FromNameTag XMLNameWithTag + InFieldName string +} + +type XMLNameWithTag struct { + XMLName Name `xml:"InXMLNameTag"` + Value string `xml:",chardata"` +} + +type XMLNameWithoutTag struct { + XMLName Name + Value string `xml:",chardata"` +} + +type NameInField struct { + Foo Name `xml:"ns foo"` +} + +type AttrTest struct { + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type AttrsTest struct { + Attrs []Attr `xml:",any,attr"` + Int int `xml:",attr"` + Named int `xml:"int,attr"` + Float float64 `xml:",attr"` + Uint8 uint8 `xml:",attr"` + Bool bool `xml:",attr"` + Str string `xml:",attr"` + Bytes []byte `xml:",attr"` +} + +type OmitAttrTest struct { + Int int `xml:",attr,omitempty"` + Named int `xml:"int,attr,omitempty"` + Float float64 `xml:",attr,omitempty"` + Uint8 uint8 `xml:",attr,omitempty"` + Bool bool `xml:",attr,omitempty"` + Str string `xml:",attr,omitempty"` + Bytes []byte `xml:",attr,omitempty"` + PStr *string `xml:",attr,omitempty"` +} + +type OmitFieldTest struct { + Int int `xml:",omitempty"` + Named int `xml:"int,omitempty"` + Float float64 `xml:",omitempty"` + Uint8 uint8 `xml:",omitempty"` + Bool bool `xml:",omitempty"` + Str string `xml:",omitempty"` + Bytes []byte `xml:",omitempty"` + PStr *string `xml:",omitempty"` + Ptr *PresenceTest `xml:",omitempty"` +} + +type AnyTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField AnyHolder `xml:",any"` +} + +type AnyOmitTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField *AnyHolder `xml:",any,omitempty"` +} + +type AnySliceTest struct { + XMLName struct{} `xml:"a"` + Nested string `xml:"nested>value"` + AnyField []AnyHolder `xml:",any"` +} + +type AnyHolder struct { + XMLName Name + XML string `xml:",innerxml"` +} + +type RecurseA struct { + A string + B *RecurseB +} + +type RecurseB struct { + A *RecurseA + B string +} + +type PresenceTest struct { + Exists *struct{} +} + +type IgnoreTest struct { + PublicSecret string `xml:"-"` +} + +type MyBytes []byte + +type Data struct { + Bytes []byte + Attr []byte `xml:",attr"` + Custom MyBytes +} + +type Plain struct { + V any +} + +type MyInt int + +type EmbedInt struct { + MyInt +} + +type Strings struct { + X []string `xml:"A>B,omitempty"` +} + +type PointerFieldsTest struct { + XMLName Name `xml:"dummy"` + Name *string `xml:"name,attr"` + Age *uint `xml:"age,attr"` + Empty *string `xml:"empty,attr"` + Contents *string `xml:",chardata"` +} + +type ChardataEmptyTest struct { + XMLName Name `xml:"test"` + Contents *string `xml:",chardata"` +} + +type PointerAnonFields struct { + *MyInt + *NamedType +} + +type MyMarshalerTest struct { +} + +var _ Marshaler = (*MyMarshalerTest)(nil) + +func (m *MyMarshalerTest) MarshalXML(e *Encoder, start StartElement) error { + e.EncodeToken(start) + e.EncodeToken(CharData([]byte("hello world"))) + e.EncodeToken(EndElement{start.Name}) + return nil +} + +type MyMarshalerAttrTest struct { +} + +var _ MarshalerAttr = (*MyMarshalerAttrTest)(nil) + +func (m *MyMarshalerAttrTest) MarshalXMLAttr(name Name) (Attr, error) { + return Attr{name, "hello world"}, nil +} + +func (m *MyMarshalerAttrTest) UnmarshalXMLAttr(attr Attr) error { + return nil +} + +type MarshalerStruct struct { + Foo MyMarshalerAttrTest `xml:",attr"` +} + +type InnerStruct struct { + XMLName Name `xml:"testns outer"` +} + +type OuterStruct struct { + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterNamedStruct struct { + InnerStruct + XMLName Name `xml:"outerns test"` + IntAttr int `xml:"int,attr"` +} + +type OuterNamedOrderedStruct struct { + XMLName Name `xml:"outerns test"` + InnerStruct + IntAttr int `xml:"int,attr"` +} + +type OuterOuterStruct struct { + OuterStruct +} + +type NestedAndChardata struct { + AB []string `xml:"A>B"` + Chardata string `xml:",chardata"` +} + +type NestedAndComment struct { + AB []string `xml:"A>B"` + Comment string `xml:",comment"` +} + +type CDataTest struct { + Chardata string `xml:",cdata"` +} + +type NestedAndCData struct { + AB []string `xml:"A>B"` + CDATA string `xml:",cdata"` +} + +func ifaceptr(x any) any { + return &x +} + +func stringptr(x string) *string { + return &x +} + +type T1 struct{} +type T2 struct{} + +type IndirComment struct { + T1 T1 + Comment *string `xml:",comment"` + T2 T2 +} + +type DirectComment struct { + T1 T1 + Comment string `xml:",comment"` + T2 T2 +} + +type IfaceComment struct { + T1 T1 + Comment any `xml:",comment"` + T2 T2 +} + +type IndirChardata struct { + T1 T1 + Chardata *string `xml:",chardata"` + T2 T2 +} + +type DirectChardata struct { + T1 T1 + Chardata string `xml:",chardata"` + T2 T2 +} + +type IfaceChardata struct { + T1 T1 + Chardata any `xml:",chardata"` + T2 T2 +} + +type IndirCDATA struct { + T1 T1 + CDATA *string `xml:",cdata"` + T2 T2 +} + +type DirectCDATA struct { + T1 T1 + CDATA string `xml:",cdata"` + T2 T2 +} + +type IfaceCDATA struct { + T1 T1 + CDATA any `xml:",cdata"` + T2 T2 +} + +type IndirInnerXML struct { + T1 T1 + InnerXML *string `xml:",innerxml"` + T2 T2 +} + +type DirectInnerXML struct { + T1 T1 + InnerXML string `xml:",innerxml"` + T2 T2 +} + +type IfaceInnerXML struct { + T1 T1 + InnerXML any `xml:",innerxml"` + T2 T2 +} + +type IndirElement struct { + T1 T1 + Element *string + T2 T2 +} + +type DirectElement struct { + T1 T1 + Element string + T2 T2 +} + +type IfaceElement struct { + T1 T1 + Element any + T2 T2 +} + +type IndirOmitEmpty struct { + T1 T1 + OmitEmpty *string `xml:",omitempty"` + T2 T2 +} + +type DirectOmitEmpty struct { + T1 T1 + OmitEmpty string `xml:",omitempty"` + T2 T2 +} + +type IfaceOmitEmpty struct { + T1 T1 + OmitEmpty any `xml:",omitempty"` + T2 T2 +} + +type IndirAny struct { + T1 T1 + Any *string `xml:",any"` + T2 T2 +} + +type DirectAny struct { + T1 T1 + Any string `xml:",any"` + T2 T2 +} + +type IfaceAny struct { + T1 T1 + Any any `xml:",any"` + T2 T2 +} + +type Generic[T any] struct { + X T +} + +var ( + nameAttr = "Sarah" + ageAttr = uint(12) + contentsAttr = "lorem ipsum" + empty = "" +) + +// Unless explicitly stated as such (or *Plain), all of the +// tests below are two-way tests. When introducing new tests, +// please try to make them two-way as well to ensure that +// marshaling and unmarshaling are as symmetrical as feasible. +var marshalTests = []struct { + Value any + ExpectXML string + MarshalOnly bool + MarshalError string + UnmarshalOnly bool + UnmarshalError string +}{ + // Test nil marshals to nothing + {Value: nil, ExpectXML: ``, MarshalOnly: true}, + {Value: nilStruct, ExpectXML: ``, MarshalOnly: true}, + + // Test value types + {Value: &Plain{true}, ExpectXML: `<Plain><V>true</V></Plain>`}, + {Value: &Plain{false}, ExpectXML: `<Plain><V>false</V></Plain>`}, + {Value: &Plain{int(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{int32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint8(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint16(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{uint32(42)}, ExpectXML: `<Plain><V>42</V></Plain>`}, + {Value: &Plain{float32(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`}, + {Value: &Plain{float64(1.25)}, ExpectXML: `<Plain><V>1.25</V></Plain>`}, + {Value: &Plain{uintptr(0xFFDD)}, ExpectXML: `<Plain><V>65501</V></Plain>`}, + {Value: &Plain{"gopher"}, ExpectXML: `<Plain><V>gopher</V></Plain>`}, + {Value: &Plain{[]byte("gopher")}, ExpectXML: `<Plain><V>gopher</V></Plain>`}, + {Value: &Plain{"</>"}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{[]byte("</>")}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{[3]byte{'<', '/', '>'}}, ExpectXML: `<Plain><V></></V></Plain>`}, + {Value: &Plain{NamedType("potato")}, ExpectXML: `<Plain><V>potato</V></Plain>`}, + {Value: &Plain{[]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`}, + {Value: &Plain{[3]int{1, 2, 3}}, ExpectXML: `<Plain><V>1</V><V>2</V><V>3</V></Plain>`}, + {Value: ifaceptr(true), MarshalOnly: true, ExpectXML: `<bool>true</bool>`}, + + // Test time. + { + Value: &Plain{time.Unix(1e9, 123456789).UTC()}, + ExpectXML: `<Plain><V>2001-09-09T01:46:40.123456789Z</V></Plain>`, + }, + + // A pointer to struct{} may be used to test for an element's presence. + { + Value: &PresenceTest{new(struct{})}, + ExpectXML: `<PresenceTest><Exists></Exists></PresenceTest>`, + }, + { + Value: &PresenceTest{}, + ExpectXML: `<PresenceTest></PresenceTest>`, + }, + + // A []byte field is only nil if the element was not found. + { + Value: &Data{}, + ExpectXML: `<Data></Data>`, + UnmarshalOnly: true, + }, + { + Value: &Data{Bytes: []byte{}, Custom: MyBytes{}, Attr: []byte{}}, + ExpectXML: `<Data Attr=""><Bytes></Bytes><Custom></Custom></Data>`, + UnmarshalOnly: true, + }, + + // Check that []byte works, including named []byte types. + { + Value: &Data{Bytes: []byte("ab"), Custom: MyBytes("cd"), Attr: []byte{'v'}}, + ExpectXML: `<Data Attr="v"><Bytes>ab</Bytes><Custom>cd</Custom></Data>`, + }, + + // Test innerxml + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, + MarshalOnly: true, + }, + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<Identity>James Bond</Identity><redacted/>", + }, + ExpectXML: `<agent handle="007"><Identity>James Bond</Identity><redacted/></agent>`, + UnmarshalOnly: true, + }, + + // Test structs + {Value: &Port{Type: "ssl", Number: "443"}, ExpectXML: `<port type="ssl">443</port>`}, + {Value: &Port{Number: "443"}, ExpectXML: `<port>443</port>`}, + {Value: &Port{Type: "<unix>"}, ExpectXML: `<port type="<unix>"></port>`}, + {Value: &Port{Number: "443", Comment: "https"}, ExpectXML: `<port><!--https-->443</port>`}, + {Value: &Port{Number: "443", Comment: "add space-"}, ExpectXML: `<port><!--add space- -->443</port>`, MarshalOnly: true}, + {Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`}, + {Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`}, + {Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`}, + {Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`}, + {Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`}, + {Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`}, + {Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`}, + {Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`}, + {Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`}, + {Value: atomValue, ExpectXML: atomXML}, + {Value: &Generic[int]{1}, ExpectXML: `<Generic><X>1</X></Generic>`}, + { + Value: &Ship{ + Name: "Heart of Gold", + Pilot: "Computer", + Age: 1, + Drive: ImprobabilityDrive, + Passenger: []*Passenger{ + { + Name: []string{"Zaphod", "Beeblebrox"}, + Weight: 7.25, + }, + { + Name: []string{"Trisha", "McMillen"}, + Weight: 5.5, + }, + { + Name: []string{"Ford", "Prefect"}, + Weight: 7, + }, + { + Name: []string{"Arthur", "Dent"}, + Weight: 6.75, + }, + }, + }, + ExpectXML: `<spaceship name="Heart of Gold" pilot="Computer">` + + `<drive>` + strconv.Itoa(int(ImprobabilityDrive)) + `</drive>` + + `<age>1</age>` + + `<passenger>` + + `<name>Zaphod</name>` + + `<name>Beeblebrox</name>` + + `<weight>7.25</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Trisha</name>` + + `<name>McMillen</name>` + + `<weight>5.5</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Ford</name>` + + `<name>Prefect</name>` + + `<weight>7</weight>` + + `</passenger>` + + `<passenger>` + + `<name>Arthur</name>` + + `<name>Dent</name>` + + `<weight>6.75</weight>` + + `</passenger>` + + `</spaceship>`, + }, + + // Test a>b + { + Value: &NestedItems{Items: nil, Item1: nil}, + ExpectXML: `<result>` + + `<Items>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{}, Item1: []string{}}, + ExpectXML: `<result>` + + `<Items>` + + `</Items>` + + `</result>`, + MarshalOnly: true, + }, + { + Value: &NestedItems{Items: nil, Item1: []string{"A"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item1>A</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: nil}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedItems{Items: []string{"A", "B"}, Item1: []string{"C"}}, + ExpectXML: `<result>` + + `<Items>` + + `<item>A</item>` + + `<item>B</item>` + + `<item1>C</item1>` + + `</Items>` + + `</result>`, + }, + { + Value: &NestedOrder{Field1: "C", Field2: "B", Field3: "A"}, + ExpectXML: `<result>` + + `<parent>` + + `<c>C</c>` + + `<b>B</b>` + + `<a>A</a>` + + `</parent>` + + `</result>`, + }, + { + Value: &NilTest{A: "A", B: nil, C: "C"}, + ExpectXML: `<NilTest>` + + `<parent1>` + + `<parent2><a>A</a></parent2>` + + `<parent2><c>C</c></parent2>` + + `</parent1>` + + `</NilTest>`, + MarshalOnly: true, // Uses interface{} + }, + { + Value: &MixedNested{A: "A", B: "B", C: "C", D: "D"}, + ExpectXML: `<result>` + + `<parent1><a>A</a></parent1>` + + `<b>B</b>` + + `<parent1>` + + `<parent2><c>C</c></parent2>` + + `<d>D</d>` + + `</parent1>` + + `</result>`, + }, + { + Value: &Service{Port: &Port{Number: "80"}}, + ExpectXML: `<service><host><port>80</port></host></service>`, + }, + { + Value: &Service{}, + ExpectXML: `<service></service>`, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra1: "A", Extra2: "B"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<Extra1>A</Extra1>` + + `<host><extra2>B</extra2></host>` + + `</service>`, + MarshalOnly: true, + }, + { + Value: &Service{Port: &Port{Number: "80"}, Extra2: "example"}, + ExpectXML: `<service>` + + `<host><port>80</port></host>` + + `<host><extra2>example</extra2></host>` + + `</service>`, + MarshalOnly: true, + }, + { + Value: &struct { + XMLName struct{} `xml:"space top"` + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space">` + + `<x><a>a</a><b>b</b><c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName Name + A string `xml:"x>a"` + B string `xml:"x>b"` + C string `xml:"space x>c"` + C1 string `xml:"space1 x>c"` + D1 string `xml:"space1 x>d"` + }{ + XMLName: Name{ + Space: "space0", + Local: "top", + }, + A: "a", + B: "b", + C: "c", + C1: "c1", + D1: "d1", + }, + ExpectXML: `<top xmlns="space0">` + + `<x><a>a</a><b>b</b>` + + `<c xmlns="space">c</c>` + + `<c xmlns="space1">c1</c>` + + `<d xmlns="space1">d1</d>` + + `</x>` + + `</top>`, + }, + { + Value: &struct { + XMLName struct{} `xml:"top"` + B string `xml:"space x>b"` + B1 string `xml:"space1 x>b"` + }{ + B: "b", + B1: "b1", + }, + ExpectXML: `<top>` + + `<x><b xmlns="space">b</b>` + + `<b xmlns="space1">b1</b></x>` + + `</top>`, + }, + + // Test struct embedding + { + Value: &EmbedA{ + EmbedC: EmbedC{ + FieldA1: "", // Shadowed by A.A + FieldA2: "", // Shadowed by A.A + FieldB: "A.C.B", + FieldC: "A.C.C", + }, + EmbedB: EmbedB{ + FieldB: "A.B.B", + EmbedC: &EmbedC{ + FieldA1: "A.B.C.A1", + FieldA2: "A.B.C.A2", + FieldB: "", // Shadowed by A.B.B + FieldC: "A.B.C.C", + }, + }, + FieldA: "A.A", + embedD: embedD{ + FieldE: "A.D.E", + }, + }, + ExpectXML: `<EmbedA>` + + `<FieldB>A.C.B</FieldB>` + + `<FieldC>A.C.C</FieldC>` + + `<EmbedB>` + + `<FieldB>A.B.B</FieldB>` + + `<FieldA>` + + `<A1>A.B.C.A1</A1>` + + `<A2>A.B.C.A2</A2>` + + `</FieldA>` + + `<FieldC>A.B.C.C</FieldC>` + + `</EmbedB>` + + `<FieldA>A.A</FieldA>` + + `<FieldE>A.D.E</FieldE>` + + `</EmbedA>`, + }, + + // Anonymous struct pointer field which is nil + { + Value: &EmbedB{}, + ExpectXML: `<EmbedB><FieldB></FieldB></EmbedB>`, + }, + + // Other kinds of nil anonymous fields + { + Value: &PointerAnonFields{}, + ExpectXML: `<PointerAnonFields></PointerAnonFields>`, + }, + + // Test that name casing matters + { + Value: &NameCasing{Xy: "mixed", XY: "upper", XyA: "mixedA", XYA: "upperA"}, + ExpectXML: `<casing Xy="mixedA" XY="upperA"><Xy>mixed</Xy><XY>upper</XY></casing>`, + }, + + // Test the order in which the XML element name is chosen + { + Value: &NamePrecedence{ + FromTag: XMLNameWithoutTag{Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "InXMLName"}, Value: "B"}, + FromNameTag: XMLNameWithTag{Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `<Parent>` + + `<InTag>A</InTag>` + + `<InXMLName>B</InXMLName>` + + `<InXMLNameTag>C</InXMLNameTag>` + + `<InFieldName>D</InFieldName>` + + `</Parent>`, + MarshalOnly: true, + }, + { + Value: &NamePrecedence{ + XMLName: Name{Local: "Parent"}, + FromTag: XMLNameWithoutTag{XMLName: Name{Local: "InTag"}, Value: "A"}, + FromNameVal: XMLNameWithoutTag{XMLName: Name{Local: "FromNameVal"}, Value: "B"}, + FromNameTag: XMLNameWithTag{XMLName: Name{Local: "InXMLNameTag"}, Value: "C"}, + InFieldName: "D", + }, + ExpectXML: `<Parent>` + + `<InTag>A</InTag>` + + `<FromNameVal>B</FromNameVal>` + + `<InXMLNameTag>C</InXMLNameTag>` + + `<InFieldName>D</InFieldName>` + + `</Parent>`, + UnmarshalOnly: true, + }, + + // xml.Name works in a plain field as well. + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`, + }, + { + Value: &NameInField{Name{Space: "ns", Local: "foo"}}, + ExpectXML: `<NameInField><foo xmlns="ns"><ignore></ignore></foo></NameInField>`, + UnmarshalOnly: true, + }, + + // Marshaling zero xml.Name uses the tag or field name. + { + Value: &NameInField{}, + ExpectXML: `<NameInField><foo xmlns="ns"></foo></NameInField>`, + MarshalOnly: true, + }, + + // Test attributes + { + Value: &AttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: `<AttrTest Int="8" int="9" Float="23.5" Uint8="255"` + + ` Bool="true" Str="str" Bytes="byt"></AttrTest>`, + }, + { + Value: &AttrTest{Bytes: []byte{}}, + ExpectXML: `<AttrTest Int="0" int="0" Float="0" Uint8="0"` + + ` Bool="false" Str="" Bytes=""></AttrTest>`, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + {Name: Name{Local: "Int"}, Value: "8"}, + {Name: Name{Local: "int"}, Value: "9"}, + {Name: Name{Local: "Float"}, Value: "23.5"}, + {Name: Name{Local: "Uint8"}, Value: "255"}, + {Name: Name{Local: "Bool"}, Value: "true"}, + {Name: Name{Local: "Str"}, Value: "str"}, + {Name: Name{Local: "Bytes"}, Value: "byt"}, + }, + }, + ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`, + MarshalOnly: true, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Answer"}, Value: "42"}, + }, + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + }, + ExpectXML: `<AttrsTest Answer="42" Int="8" int="9" Float="23.5" Uint8="255" Bool="true" Str="str" Bytes="byt"></AttrsTest>`, + }, + { + Value: &AttrsTest{ + Attrs: []Attr{ + {Name: Name{Local: "Int"}, Value: "0"}, + {Name: Name{Local: "int"}, Value: "0"}, + {Name: Name{Local: "Float"}, Value: "0"}, + {Name: Name{Local: "Uint8"}, Value: "0"}, + {Name: Name{Local: "Bool"}, Value: "false"}, + {Name: Name{Local: "Str"}}, + {Name: Name{Local: "Bytes"}}, + }, + Bytes: []byte{}, + }, + ExpectXML: `<AttrsTest Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes="" Int="0" int="0" Float="0" Uint8="0" Bool="false" Str="" Bytes=""></AttrsTest>`, + MarshalOnly: true, + }, + { + Value: &OmitAttrTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + }, + ExpectXML: `<OmitAttrTest Int="8" int="9" Float="23.5" Uint8="255"` + + ` Bool="true" Str="str" Bytes="byt" PStr=""></OmitAttrTest>`, + }, + { + Value: &OmitAttrTest{}, + ExpectXML: `<OmitAttrTest></OmitAttrTest>`, + }, + + // pointer fields + { + Value: &PointerFieldsTest{Name: &nameAttr, Age: &ageAttr, Contents: &contentsAttr}, + ExpectXML: `<dummy name="Sarah" age="12">lorem ipsum</dummy>`, + MarshalOnly: true, + }, + + // empty chardata pointer field + { + Value: &ChardataEmptyTest{}, + ExpectXML: `<test></test>`, + MarshalOnly: true, + }, + + // omitempty on fields + { + Value: &OmitFieldTest{ + Int: 8, + Named: 9, + Float: 23.5, + Uint8: 255, + Bool: true, + Str: "str", + Bytes: []byte("byt"), + PStr: &empty, + Ptr: &PresenceTest{}, + }, + ExpectXML: `<OmitFieldTest>` + + `<Int>8</Int>` + + `<int>9</int>` + + `<Float>23.5</Float>` + + `<Uint8>255</Uint8>` + + `<Bool>true</Bool>` + + `<Str>str</Str>` + + `<Bytes>byt</Bytes>` + + `<PStr></PStr>` + + `<Ptr></Ptr>` + + `</OmitFieldTest>`, + }, + { + Value: &OmitFieldTest{}, + ExpectXML: `<OmitFieldTest></OmitFieldTest>`, + }, + + // Test ",any" + { + ExpectXML: `<a><nested><value>known</value></nested><other><sub>unknown</sub></other></a>`, + Value: &AnyTest{ + Nested: "known", + AnyField: AnyHolder{ + XMLName: Name{Local: "other"}, + XML: "<sub>unknown</sub>", + }, + }, + }, + { + Value: &AnyTest{Nested: "known", + AnyField: AnyHolder{ + XML: "<unknown/>", + XMLName: Name{Local: "AnyField"}, + }, + }, + ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnyOmitTest{ + Nested: "b", + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`, + Value: &AnySliceTest{ + Nested: "b", + AnyField: []AnyHolder{ + { + XMLName: Name{Local: "c"}, + XML: "<d>e</d>", + }, + { + XMLName: Name{Space: "f", Local: "g"}, + XML: "<h>i</h>", + }, + }, + }, + }, + { + ExpectXML: `<a><nested><value>b</value></nested></a>`, + Value: &AnySliceTest{ + Nested: "b", + }, + }, + + // Test recursive types. + { + Value: &RecurseA{ + A: "a1", + B: &RecurseB{ + A: &RecurseA{"a2", nil}, + B: "b1", + }, + }, + ExpectXML: `<RecurseA><A>a1</A><B><A><A>a2</A></A><B>b1</B></B></RecurseA>`, + }, + + // Test ignoring fields via "-" tag + { + ExpectXML: `<IgnoreTest></IgnoreTest>`, + Value: &IgnoreTest{}, + }, + { + ExpectXML: `<IgnoreTest></IgnoreTest>`, + Value: &IgnoreTest{PublicSecret: "can't tell"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IgnoreTest><PublicSecret>ignore me</PublicSecret></IgnoreTest>`, + Value: &IgnoreTest{}, + UnmarshalOnly: true, + }, + + // Test escaping. + { + ExpectXML: `<a><nested><value>dquote: "; squote: '; ampersand: &; less: <; greater: >;</value></nested><empty></empty></a>`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + AnyField: AnyHolder{XMLName: Name{Local: "empty"}}, + }, + }, + { + ExpectXML: `<a><nested><value>newline: 
; cr: 
; tab: 	;</value></nested><AnyField></AnyField></a>`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}}, + }, + }, + { + ExpectXML: "<a><nested><value>1\r2\r\n3\n\r4\n5</value></nested></a>", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, + { + ExpectXML: `<EmbedInt><MyInt>42</MyInt></EmbedInt>`, + Value: &EmbedInt{ + MyInt: 42, + }, + }, + // Test outputting CDATA-wrapped text. + { + ExpectXML: `<CDataTest></CDataTest>`, + Value: &CDataTest{}, + }, + { + ExpectXML: `<CDataTest><![CDATA[http://example.com/tests/1?foo=1&bar=baz]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "http://example.com/tests/1?foo=1&bar=baz", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[Literal <![CDATA[Nested]]]]><![CDATA[>!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "Literal <![CDATA[Nested]]>!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[Nested]]> Literal!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[Nested]]]]><![CDATA[> Literal! <![CDATA[Nested]]]]><![CDATA[> Literal!]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[Nested]]> Literal! <![CDATA[Nested]]> Literal!", + }, + }, + { + ExpectXML: `<CDataTest><![CDATA[<![CDATA[<![CDATA[Nested]]]]><![CDATA[>]]]]><![CDATA[>]]></CDataTest>`, + Value: &CDataTest{ + Chardata: "<![CDATA[<![CDATA[Nested]]>]]>", + }, + }, + + // Test omitempty with parent chain; see golang.org/issue/4168. + { + ExpectXML: `<Strings><A></A></Strings>`, + Value: &Strings{}, + }, + // Custom marshalers. + { + ExpectXML: `<MyMarshalerTest>hello world</MyMarshalerTest>`, + Value: &MyMarshalerTest{}, + }, + { + ExpectXML: `<MarshalerStruct Foo="hello world"></MarshalerStruct>`, + Value: &MarshalerStruct{}, + }, + { + ExpectXML: `<outer xmlns="testns" int="10"></outer>`, + Value: &OuterStruct{IntAttr: 10}, + }, + { + ExpectXML: `<test xmlns="outerns" int="10"></test>`, + Value: &OuterNamedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: `<test xmlns="outerns" int="10"></test>`, + Value: &OuterNamedOrderedStruct{XMLName: Name{Space: "outerns", Local: "test"}, IntAttr: 10}, + }, + { + ExpectXML: `<outer xmlns="testns" int="10"></outer>`, + Value: &OuterOuterStruct{OuterStruct{IntAttr: 10}}, + }, + { + ExpectXML: `<NestedAndChardata><A><B></B><B></B></A>test</NestedAndChardata>`, + Value: &NestedAndChardata{AB: make([]string, 2), Chardata: "test"}, + }, + { + ExpectXML: `<NestedAndComment><A><B></B><B></B></A><!--test--></NestedAndComment>`, + Value: &NestedAndComment{AB: make([]string, 2), Comment: "test"}, + }, + { + ExpectXML: `<NestedAndCData><A><B></B><B></B></A><![CDATA[test]]></NestedAndCData>`, + Value: &NestedAndCData{AB: make([]string, 2), CDATA: "test"}, + }, + // Test pointer indirection in various kinds of fields. + // https://golang.org/issue/19063 + { + ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: stringptr("hi")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirComment><T1></T1><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IndirComment", + }, + { + ExpectXML: `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`, + Value: &IndirComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + MarshalError: "xml: bad type for comment field of xml.IfaceComment", + }, + { + ExpectXML: `<IfaceComment><T1></T1><T2></T2></IfaceComment>`, + Value: &IfaceComment{Comment: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectComment><T1></T1><!--hi--><T2></T2></DirectComment>`, + Value: &DirectComment{Comment: string("hi")}, + }, + { + ExpectXML: `<DirectComment><T1></T1><T2></T2></DirectComment>`, + Value: &DirectComment{Comment: string("")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1>hi<T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1><![CDATA[hi]]><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: stringptr("")}, + }, + { + ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`, + Value: &IndirChardata{Chardata: nil}, + MarshalOnly: true, // unmarshal leaves Chardata=stringptr("") + }, + { + ExpectXML: `<IfaceChardata><T1></T1>hi<T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><![CDATA[hi]]><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`, + Value: &IfaceChardata{Chardata: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<DirectChardata><T1></T1>hi<T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("hi")}, + }, + { + ExpectXML: `<DirectChardata><T1></T1><![CDATA[hi]]><T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("hi")}, + UnmarshalOnly: true, // marshals without CDATA + }, + { + ExpectXML: `<DirectChardata><T1></T1><T2></T2></DirectChardata>`, + Value: &DirectChardata{Chardata: string("")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1><![CDATA[hi]]><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1>hi<T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: stringptr("")}, + }, + { + ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`, + Value: &IndirCDATA{CDATA: nil}, + MarshalOnly: true, // unmarshal leaves CDATA=stringptr("") + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><![CDATA[hi]]><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1>hi<T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: string("")}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`, + Value: &IfaceCDATA{CDATA: nil}, + UnmarshalError: "cannot unmarshal into interface {}", + }, + { + ExpectXML: `<DirectCDATA><T1></T1><![CDATA[hi]]><T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("hi")}, + }, + { + ExpectXML: `<DirectCDATA><T1></T1>hi<T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("hi")}, + UnmarshalOnly: true, // marshals with CDATA + }, + { + ExpectXML: `<DirectCDATA><T1></T1><T2></T2></DirectCDATA>`, + Value: &DirectCDATA{CDATA: string("")}, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: stringptr("<hi/>")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: nil}, + }, + { + ExpectXML: `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`, + Value: &IndirInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: "<hi/>"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + }, + { + ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`, + Value: &IfaceInnerXML{InnerXML: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<hi/>")}, + MarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<T1></T1><hi/><T2></T2>")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`, + Value: &DirectInnerXML{InnerXML: string("<T1></T1><T2></T2>")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirElement><T1></T1><Element>hi</Element><T2></T2></IndirElement>`, + Value: &IndirElement{Element: stringptr("hi")}, + }, + { + ExpectXML: `<IndirElement><T1></T1><Element></Element><T2></T2></IndirElement>`, + Value: &IndirElement{Element: stringptr("")}, + }, + { + ExpectXML: `<IndirElement><T1></T1><T2></T2></IndirElement>`, + Value: &IndirElement{Element: nil}, + }, + { + ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + }, + { + ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`, + Value: &IfaceElement{Element: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectElement><T1></T1><Element>hi</Element><T2></T2></DirectElement>`, + Value: &DirectElement{Element: string("hi")}, + }, + { + ExpectXML: `<DirectElement><T1></T1><Element></Element><T2></T2></DirectElement>`, + Value: &DirectElement{Element: string("")}, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("hi")}, + }, + { + // Note: Changed in Go 1.8 to include <OmitEmpty> element (because x.OmitEmpty != nil). + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + MarshalOnly: true, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirOmitEmpty><T1></T1><T2></T2></IndirOmitEmpty>`, + Value: &IndirOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + }, + { + ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`, + Value: &IfaceOmitEmpty{OmitEmpty: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></DirectOmitEmpty>`, + Value: &DirectOmitEmpty{OmitEmpty: string("hi")}, + }, + { + ExpectXML: `<DirectOmitEmpty><T1></T1><T2></T2></DirectOmitEmpty>`, + Value: &DirectOmitEmpty{OmitEmpty: string("")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><Any>hi</Any><T2></T2></IndirAny>`, + Value: &IndirAny{Any: stringptr("hi")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><Any></Any><T2></T2></IndirAny>`, + Value: &IndirAny{Any: stringptr("")}, + }, + { + ExpectXML: `<IndirAny><T1></T1><T2></T2></IndirAny>`, + Value: &IndirAny{Any: nil}, + }, + { + ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: "hi"}, + MarshalOnly: true, + }, + { + ExpectXML: `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + }, + { + ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectAny><T1></T1><Any>hi</Any><T2></T2></DirectAny>`, + Value: &DirectAny{Any: string("hi")}, + }, + { + ExpectXML: `<DirectAny><T1></T1><Any></Any><T2></T2></DirectAny>`, + Value: &DirectAny{Any: string("")}, + }, + { + ExpectXML: `<IndirFoo><T1></T1><Foo>hi</Foo><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: stringptr("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirFoo><T1></T1><Foo></Foo><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: stringptr("")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IndirFoo><T1></T1><T2></T2></IndirFoo>`, + Value: &IndirAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><Foo>hi</Foo><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`, + Value: &IfaceAny{Any: nil}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectFoo><T1></T1><Foo>hi</Foo><T2></T2></DirectFoo>`, + Value: &DirectAny{Any: string("hi")}, + UnmarshalOnly: true, + }, + { + ExpectXML: `<DirectFoo><T1></T1><Foo></Foo><T2></T2></DirectFoo>`, + Value: &DirectAny{Any: string("")}, + UnmarshalOnly: true, + }, +} + +func TestMarshal(t *testing.T) { + for idx, test := range marshalTests { + if test.UnmarshalOnly { + continue + } + + t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + data, err := Marshal(test.Value) + if err != nil { + if test.MarshalError == "" { + t.Errorf("marshal(%#v): %s", test.Value, err) + return + } + if !strings.Contains(err.Error(), test.MarshalError) { + t.Errorf("marshal(%#v): %s, want %q", test.Value, err, test.MarshalError) + } + return + } + if test.MarshalError != "" { + t.Errorf("Marshal succeeded, want error %q", test.MarshalError) + return + } + if got, want := string(data), test.ExpectXML; got != want { + if strings.Contains(want, "\n") { + t.Errorf("marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", test.Value, got, want) + } else { + t.Errorf("marshal(%#v):\nhave %#q\nwant %#q", test.Value, got, want) + } + } + }) + } +} + +type AttrParent struct { + X string `xml:"X>Y,attr"` +} + +type BadAttr struct { + Name map[string]string `xml:"name,attr"` +} + +var marshalErrorTests = []struct { + Value any + Err string + Kind reflect.Kind +}{ + { + Value: make(chan bool), + Err: "xml: unsupported type: chan bool", + Kind: reflect.Chan, + }, + { + Value: map[string]string{ + "question": "What do you get when you multiply six by nine?", + "answer": "42", + }, + Err: "xml: unsupported type: map[string]string", + Kind: reflect.Map, + }, + { + Value: map[*Ship]bool{nil: false}, + Err: "xml: unsupported type: map[*xml.Ship]bool", + Kind: reflect.Map, + }, + { + Value: &Domain{Comment: []byte("f--bar")}, + Err: `xml: comments must not contain "--"`, + }, + // Reject parent chain with attr, never worked; see golang.org/issue/5033. + { + Value: &AttrParent{}, + Err: `xml: X>Y chain not valid with attr flag`, + }, + { + Value: BadAttr{map[string]string{"X": "Y"}}, + Err: `xml: unsupported type: map[string]string`, + }, +} + +var marshalIndentTests = []struct { + Value any + Prefix string + Indent string + ExpectXML string +}{ + { + Value: &SecretAgent{ + Handle: "007", + Identity: "James Bond", + Obfuscate: "<redacted/>", + }, + Prefix: "", + Indent: "\t", + ExpectXML: fmt.Sprintf("<agent handle=\"007\">\n\t<Identity>James Bond</Identity><redacted/>\n</agent>"), + }, +} + +func TestMarshalErrors(t *testing.T) { + for idx, test := range marshalErrorTests { + data, err := Marshal(test.Value) + if err == nil { + t.Errorf("#%d: marshal(%#v) = [success] %q, want error %v", idx, test.Value, data, test.Err) + continue + } + if err.Error() != test.Err { + t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) + } + if test.Kind != reflect.Invalid { + if kind := err.(*UnsupportedTypeError).Type.Kind(); kind != test.Kind { + t.Errorf("#%d: marshal(%#v) = [error kind] %s, want %s", idx, test.Value, kind, test.Kind) + } + } + } +} + +// Do invertibility testing on the various structures that we test +func TestUnmarshal(t *testing.T) { + for i, test := range marshalTests { + if test.MarshalOnly { + continue + } + if _, ok := test.Value.(*Plain); ok { + continue + } + if test.ExpectXML == `<top>`+ + `<x><b xmlns="space">b</b>`+ + `<b xmlns="space1">b1</b></x>`+ + `</top>` { + // TODO(rogpeppe): re-enable this test in + // https://go-review.googlesource.com/#/c/5910/ + continue + } + + vt := reflect.TypeOf(test.Value) + dest := reflect.New(vt.Elem()).Interface() + err := Unmarshal([]byte(test.ExpectXML), dest) + + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + switch fix := dest.(type) { + case *Feed: + fix.Author.InnerXML = "" + for i := range fix.Entry { + fix.Entry[i].Author.InnerXML = "" + } + } + + if err != nil { + if test.UnmarshalError == "" { + t.Errorf("unmarshal(%#v): %s", test.ExpectXML, err) + return + } + if !strings.Contains(err.Error(), test.UnmarshalError) { + t.Errorf("unmarshal(%#v): %s, want %q", test.ExpectXML, err, test.UnmarshalError) + } + return + } + if got, want := dest, test.Value; !reflect.DeepEqual(got, want) { + t.Errorf("unmarshal(%q):\nhave %#v\nwant %#v", test.ExpectXML, got, want) + } + }) + } +} + +func TestMarshalIndent(t *testing.T) { + for i, test := range marshalIndentTests { + data, err := MarshalIndent(test.Value, test.Prefix, test.Indent) + if err != nil { + t.Errorf("#%d: Error: %s", i, err) + continue + } + if got, want := string(data), test.ExpectXML; got != want { + t.Errorf("#%d: MarshalIndent:\nGot:%s\nWant:\n%s", i, got, want) + } + } +} + +type limitedBytesWriter struct { + w io.Writer + remain int // until writes fail +} + +func (lw *limitedBytesWriter) Write(p []byte) (n int, err error) { + if lw.remain <= 0 { + println("error") + return 0, errors.New("write limit hit") + } + if len(p) > lw.remain { + p = p[:lw.remain] + n, _ = lw.w.Write(p) + lw.remain = 0 + return n, errors.New("write limit hit") + } + n, err = lw.w.Write(p) + lw.remain -= n + return n, err +} + +func TestMarshalWriteErrors(t *testing.T) { + var buf bytes.Buffer + const writeCap = 1024 + w := &limitedBytesWriter{&buf, writeCap} + enc := NewEncoder(w) + var err error + var i int + const n = 4000 + for i = 1; i <= n; i++ { + err = enc.Encode(&Passenger{ + Name: []string{"Alice", "Bob"}, + Weight: 5, + }) + if err != nil { + break + } + } + if err == nil { + t.Error("expected an error") + } + if i == n { + t.Errorf("expected to fail before the end") + } + if buf.Len() != writeCap { + t.Errorf("buf.Len() = %d; want %d", buf.Len(), writeCap) + } +} + +func TestMarshalWriteIOErrors(t *testing.T) { + enc := NewEncoder(errWriter{}) + + expectErr := "unwritable" + err := enc.Encode(&Passenger{}) + if err == nil || err.Error() != expectErr { + t.Errorf("EscapeTest = [error] %v, want %v", err, expectErr) + } +} + +func TestMarshalFlush(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(CharData("hello world")); err != nil { + t.Fatalf("enc.EncodeToken: %v", err) + } + if buf.Len() > 0 { + t.Fatalf("enc.EncodeToken caused actual write: %q", buf.String()) + } + if err := enc.Flush(); err != nil { + t.Fatalf("enc.Flush: %v", err) + } + if buf.String() != "hello world" { + t.Fatalf("after enc.Flush, buf.String() = %q, want %q", buf.String(), "hello world") + } +} + +func BenchmarkMarshal(b *testing.B) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Marshal(atomValue) + } + }) +} + +func BenchmarkUnmarshal(b *testing.B) { + b.ReportAllocs() + xml := []byte(atomXML) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Unmarshal(xml, &Feed{}) + } + }) +} + +// golang.org/issue/6556 +func TestStructPointerMarshal(t *testing.T) { + type A struct { + XMLName string `xml:"a"` + B []any + } + type C struct { + XMLName Name + Value string `xml:"value"` + } + + a := new(A) + a.B = append(a.B, &C{ + XMLName: Name{Local: "c"}, + Value: "x", + }) + + b, err := Marshal(a) + if err != nil { + t.Fatal(err) + } + if x := string(b); x != "<a><c><value>x</value></c></a>" { + t.Fatal(x) + } + var v A + err = Unmarshal(b, &v) + if err != nil { + t.Fatal(err) + } +} + +var encodeTokenTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "start element with name space", + toks: []Token{ + StartElement{Name{"space", "local"}, nil}, + }, + want: `<local xmlns="space">`, +}, { + desc: "start element with no name", + toks: []Token{ + StartElement{Name{"space", ""}, nil}, + }, + err: "xml: start tag with no name", +}, { + desc: "end element with no name", + toks: []Token{ + EndElement{Name{"space", ""}}, + }, + err: "xml: end tag with no name", +}, { + desc: "char data", + toks: []Token{ + CharData("foo"), + }, + want: `foo`, +}, { + desc: "char data with escaped chars", + toks: []Token{ + CharData(" \t\n"), + }, + want: " 	\n", +}, { + desc: "comment", + toks: []Token{ + Comment("foo"), + }, + want: `<!--foo-->`, +}, { + desc: "comment with invalid content", + toks: []Token{ + Comment("foo-->"), + }, + err: "xml: EncodeToken of Comment containing --> marker", +}, { + desc: "proc instruction", + toks: []Token{ + ProcInst{"Target", []byte("Instruction")}, + }, + want: `<?Target Instruction?>`, +}, { + desc: "proc instruction with empty target", + toks: []Token{ + ProcInst{"", []byte("Instruction")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "proc instruction with bad content", + toks: []Token{ + ProcInst{"", []byte("Instruction?>")}, + }, + err: "xml: EncodeToken of ProcInst with invalid Target", +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: `<!foo>`, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"), + }, + want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`, +}, { + desc: "directive instruction with bad name", + toks: []Token{ + Directive("foo>"), + }, + err: "xml: EncodeToken of Directive containing wrong < or > markers", +}, { + desc: "end tag without start tag", + toks: []Token{ + EndElement{Name{"foo", "bar"}}, + }, + err: "xml: end tag </bar> without start tag", +}, { + desc: "mismatching end tag local name", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "bar"}}, + }, + err: "xml: end tag </bar> does not match start tag <foo>", + want: `<foo>`, +}, { + desc: "mismatching end tag namespace", + toks: []Token{ + StartElement{Name{"space", "foo"}, nil}, + EndElement{Name{"another", "foo"}}, + }, + err: "xml: end tag </foo> in namespace another does not match start tag <foo> in namespace space", + want: `<foo xmlns="space">`, +}, { + desc: "start element with explicit namespace", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value">`, +}, { + desc: "start element with explicit namespace and colliding prefix", + toks: []Token{ + StartElement{Name{"space", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + {Name{"space", "foo"}, "value"}, + {Name{"x", "bar"}, "other"}, + }}, + }, + want: `<local xmlns="space" xmlns:_xmlns="xmlns" _xmlns:x="space" xmlns:space="space" space:foo="value" xmlns:x="x" x:bar="other">`, +}, { + desc: "start element using previously defined namespace", + toks: []Token{ + StartElement{Name{"", "local"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"space", "x"}, "y"}, + }}, + }, + want: `<local xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns:space="space" space:x="y">`, +}, { + desc: "nested name space with same prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space1"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space2"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + EndElement{Name{"", "foo"}}, + EndElement{Name{"", "foo"}}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"space1", "a"}, "space1 value"}, + {Name{"space2", "b"}, "space2 value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space1"><foo _xmlns:x="space2"><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value"></foo></foo><foo xmlns:space1="space1" space1:a="space1 value" xmlns:space2="space2" space2:b="space2 value">`, +}, { + desc: "start element defining several prefixes for the same name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "a"}, "space"}, + {Name{"xmlns", "b"}, "space"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:a="space" _xmlns:b="space" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element redefines name space", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element creates alias for default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xmlns", "y"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" xmlns:_xmlns="xmlns" _xmlns:y="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element defines default name space with existing prefix", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "x"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "a"}, "value"}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:x="space"><foo xmlns="space" xmlns="space" xmlns:space="space" space:a="value">`, +}, { + desc: "nested element uses empty attribute name space when default ns defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="space" attr="value">`, +}, { + desc: "redefine xmlns", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"foo", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns:foo="foo" foo:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #1", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"xml", "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xmlns:_xml="xml" _xml:xmlns="space">`, +}, { + desc: "xmlns with explicit name space #2", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{xmlURL, "xmlns"}, "space"}, + }}, + }, + want: `<foo xmlns="space" xml:xmlns="space">`, +}, { + desc: "empty name space declaration is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"xmlns", "foo"}, ""}, + }}, + }, + want: `<foo xmlns:_xmlns="xmlns" _xmlns:foo="">`, +}, { + desc: "attribute with no name is ignored", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", ""}, "value"}, + }}, + }, + want: `<foo>`, +}, { + desc: "namespace URL with non-valid name", + toks: []Token{ + StartElement{Name{"/34", "foo"}, []Attr{ + {Name{"/34", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="/34" xmlns:_="/34" _:x="value">`, +}, { + desc: "nested element resets default namespace to empty", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, ""}, + {Name{"", "x"}, "value"}, + {Name{"space", "x"}, "value"}, + }}, + }, + want: `<foo xmlns="space" xmlns="space"><foo xmlns="" x="value" xmlns:space="space" space:x="value">`, +}, { + desc: "nested element requires empty default name space", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"", "foo"}, nil}, + }, + want: `<foo xmlns="space" xmlns="space"><foo>`, +}, { + desc: "attribute uses name space from xmlns", + toks: []Token{ + StartElement{Name{"some/space", "foo"}, []Attr{ + {Name{"", "attr"}, "value"}, + {Name{"some/space", "other"}, "other value"}, + }}, + }, + want: `<foo xmlns="some/space" attr="value" xmlns:space="some/space" space:other="other value">`, +}, { + desc: "default name space should not be used by attributes", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"xmlns", "bar"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:_xmlns="xmlns" _xmlns:bar="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "default name space not used by attributes, not explicitly defined", + toks: []Token{ + StartElement{Name{"space", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + {Name{"space", "baz"}, "foo"}, + }}, + StartElement{Name{"space", "baz"}, nil}, + EndElement{Name{"space", "baz"}}, + EndElement{Name{"space", "foo"}}, + }, + want: `<foo xmlns="space" xmlns="space" xmlns:space="space" space:baz="foo"><baz xmlns="space"></baz></foo>`, +}, { + desc: "impossible xmlns declaration", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"", "xmlns"}, "space"}, + }}, + StartElement{Name{"space", "bar"}, []Attr{ + {Name{"space", "attr"}, "value"}, + }}, + }, + want: `<foo xmlns="space"><bar xmlns="space" xmlns:space="space" space:attr="value">`, +}, { + desc: "reserved namespace prefix -- all lower case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/xmlSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_xmlSchema-instance="http://www.w3.org/2001/xmlSchema-instance" _xmlSchema-instance:nil="true">`, +}, { + desc: "reserved namespace prefix -- all upper case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XMLSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_XMLSchema-instance="http://www.w3.org/2001/XMLSchema-instance" _XMLSchema-instance:nil="true">`, +}, { + desc: "reserved namespace prefix -- all mixed case", + toks: []Token{ + StartElement{Name{"", "foo"}, []Attr{ + {Name{"http://www.w3.org/2001/XmLSchema-instance", "nil"}, "true"}, + }}, + }, + want: `<foo xmlns:_XmLSchema-instance="http://www.w3.org/2001/XmLSchema-instance" _XmLSchema-instance:nil="true">`, +}} + +func TestEncodeToken(t *testing.T) { +loop: + for i, tt := range encodeTokenTests { + var buf strings.Builder + enc := NewEncoder(&buf) + var err error + for j, tok := range tt.toks { + err = enc.EncodeToken(tok) + if err != nil && j < len(tt.toks)-1 { + t.Errorf("#%d %s token #%d: %v", i, tt.desc, j, err) + continue loop + } + } + errorf := func(f string, a ...any) { + t.Errorf("#%d %s token #%d:%s", i, tt.desc, len(tt.toks)-1, fmt.Sprintf(f, a...)) + } + switch { + case tt.err != "" && err == nil: + errorf(" expected error; got none") + continue + case tt.err == "" && err != nil: + errorf(" got error: %v", err) + continue + case tt.err != "" && err != nil && tt.err != err.Error(): + errorf(" error mismatch; got %v, want %v", err, tt.err) + continue + } + if err := enc.Flush(); err != nil { + errorf(" %v", err) + continue + } + if got := buf.String(); got != tt.want { + errorf("\ngot %v\nwant %v", got, tt.want) + continue + } + } +} + +func TestProcInstEncodeToken(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err) + } + + if err := enc.EncodeToken(ProcInst{"Target", []byte("Instruction")}); err != nil { + t.Fatalf("enc.EncodeToken: expected to be able to add non-xml target ProcInst") + } + + if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err == nil { + t.Fatalf("enc.EncodeToken: expected to not be allowed to encode xml target ProcInst when not first token") + } +} + +func TestDecodeEncode(t *testing.T) { + var in, out bytes.Buffer + in.WriteString(`<?xml version="1.0" encoding="UTF-8"?> +<?Target Instruction?> +<root> +</root> +`) + dec := NewDecoder(&in) + enc := NewEncoder(&out) + for tok, err := dec.Token(); err == nil; tok, err = dec.Token() { + err = enc.EncodeToken(tok) + if err != nil { + t.Fatalf("enc.EncodeToken: Unable to encode token (%#v), %v", tok, err) + } + } +} + +// Issue 9796. Used to fail with GORACE="halt_on_error=1" -race. +func TestRace9796(t *testing.T) { + type A struct{} + type B struct { + C []A `xml:"X>Y"` + } + var wg sync.WaitGroup + for i := 0; i < 2; i++ { + wg.Add(1) + go func() { + Marshal(B{[]A{{}}}) + wg.Done() + }() + } + wg.Wait() +} + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "<!DOCTYPE '<' '>' '>' <!--nothing-->>", + "<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>", + "<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>", + "<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >", + } + testKO := []string{ + "<", + ">", + "<!--", + "-->", + "< > > < < >", + "<!dummy <!-- > -->", + "<!DOCTYPE doc '>", + "<!DOCTYPE doc '>'", + "<!DOCTYPE doc <!--comment>", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +} + +// Issue 11719. EncodeToken used to silently eat tokens with an invalid type. +func TestSimpleUseOfEncodeToken(t *testing.T) { + var buf strings.Builder + enc := NewEncoder(&buf) + if err := enc.EncodeToken(&StartElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(&EndElement{Name: Name{"", "object1"}}); err == nil { + t.Errorf("enc.EncodeToken: pointer type should be rejected") + } + if err := enc.EncodeToken(StartElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: StartElement %s", err) + } + if err := enc.EncodeToken(EndElement{Name: Name{"", "object2"}}); err != nil { + t.Errorf("enc.EncodeToken: EndElement %s", err) + } + if err := enc.EncodeToken(Universe{}); err == nil { + t.Errorf("enc.EncodeToken: invalid type not caught") + } + if err := enc.Flush(); err != nil { + t.Errorf("enc.Flush: %s", err) + } + if buf.Len() == 0 { + t.Errorf("enc.EncodeToken: empty buffer") + } + want := "<object2></object2>" + if buf.String() != want { + t.Errorf("enc.EncodeToken: expected %q; got %q", want, buf.String()) + } +} + +// Issue 16158. Decoder.unmarshalAttr ignores the return value of copyValue. +func TestIssue16158(t *testing.T) { + const data = `<foo b="HELLOWORLD"></foo>` + err := Unmarshal([]byte(data), &struct { + B byte `xml:"b,attr,omitempty"` + }{}) + if err == nil { + t.Errorf("Unmarshal: expected error, got nil") + } +} + +// Issue 20953. Crash on invalid XMLName attribute. + +type InvalidXMLName struct { + XMLName Name `xml:"error"` + Type struct { + XMLName Name `xml:"type,attr"` + } +} + +func TestInvalidXMLName(t *testing.T) { + var buf bytes.Buffer + enc := NewEncoder(&buf) + if err := enc.Encode(InvalidXMLName{}); err == nil { + t.Error("unexpected success") + } else if want := "invalid tag"; !strings.Contains(err.Error(), want) { + t.Errorf("error %q does not contain %q", err, want) + } +} + +// Issue 50164. Crash on zero value XML attribute. +type LayerOne struct { + XMLName Name `xml:"l1"` + + Value *float64 `xml:"value,omitempty"` + *LayerTwo `xml:",omitempty"` +} + +type LayerTwo struct { + ValueTwo *int `xml:"value_two,attr,omitempty"` +} + +func TestMarshalZeroValue(t *testing.T) { + proofXml := `<l1><value>1.2345</value></l1>` + var l1 LayerOne + err := Unmarshal([]byte(proofXml), &l1) + if err != nil { + t.Fatalf("unmarshal XML error: %v", err) + } + want := float64(1.2345) + got := *l1.Value + if got != want { + t.Fatalf("unexpected unmarshal result, want %f but got %f", want, got) + } + + // Marshal again (or Encode again) + // In issue 50164, here `Marshal(l1)` will panic because of the zero value of xml attribute ValueTwo `value_two`. + anotherXML, err := Marshal(l1) + if err != nil { + t.Fatalf("marshal XML error: %v", err) + } + if string(anotherXML) != proofXml { + t.Fatalf("unexpected unmarshal result, want %q but got %q", proofXml, anotherXML) + } +} + +var closeTests = []struct { + desc string + toks []Token + want string + err string +}{{ + desc: "unclosed start element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + }, + want: `<foo>`, + err: "unclosed tag <foo>", +}, { + desc: "closed element", + toks: []Token{ + StartElement{Name{"", "foo"}, nil}, + EndElement{Name{"", "foo"}}, + }, + want: `<foo></foo>`, +}, { + desc: "directive", + toks: []Token{ + Directive("foo"), + }, + want: `<!foo>`, +}} + +func TestClose(t *testing.T) { + for _, tt := range closeTests { + tt := tt + t.Run(tt.desc, func(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&out) + for j, tok := range tt.toks { + if err := enc.EncodeToken(tok); err != nil { + t.Fatalf("token #%d: %v", j, err) + } + } + err := enc.Close() + switch { + case tt.err != "" && err == nil: + t.Error(" expected error; got none") + case tt.err == "" && err != nil: + t.Errorf(" got error: %v", err) + case tt.err != "" && err != nil && tt.err != err.Error(): + t.Errorf(" error mismatch; got %v, want %v", err, tt.err) + } + if got := out.String(); got != tt.want { + t.Errorf("\ngot %v\nwant %v", got, tt.want) + } + t.Log(enc.p.closed) + if err := enc.EncodeToken(Directive("foo")); err == nil { + t.Errorf("unexpected success when encoding after Close") + } + }) + } +} diff --git a/src/encoding/xml/read.go b/src/encoding/xml/read.go new file mode 100644 index 0000000..3cc4968 --- /dev/null +++ b/src/encoding/xml/read.go @@ -0,0 +1,777 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "encoding" + "errors" + "fmt" + "reflect" + "runtime" + "strconv" + "strings" +) + +// BUG(rsc): Mapping between XML elements and data structures is inherently flawed: +// an XML element is an order-dependent collection of anonymous +// values, while a data structure is an order-independent collection +// of named values. +// See [encoding/json] for a textual representation more suitable +// to data structures. + +// Unmarshal parses the XML-encoded data and stores the result in +// the value pointed to by v, which must be an arbitrary struct, +// slice, or string. Well-formed data that does not fit into v is +// discarded. +// +// Because Unmarshal uses the reflect package, it can only assign +// to exported (upper case) fields. Unmarshal uses a case-sensitive +// comparison to match XML element names to tag values and struct +// field names. +// +// Unmarshal maps an XML element to a struct using the following rules. +// In the rules, the tag of a field refers to the value associated with the +// key 'xml' in the struct field's tag (see the example above). +// +// - If the struct has a field of type []byte or string with tag +// ",innerxml", Unmarshal accumulates the raw XML nested inside the +// element in that field. The rest of the rules still apply. +// +// - If the struct has a field named XMLName of type Name, +// Unmarshal records the element name in that field. +// +// - If the XMLName field has an associated tag of the form +// "name" or "namespace-URL name", the XML element must have +// the given name (and, optionally, name space) or else Unmarshal +// returns an error. +// +// - If the XML element has an attribute whose name matches a +// struct field name with an associated tag containing ",attr" or +// the explicit name in a struct field tag of the form "name,attr", +// Unmarshal records the attribute value in that field. +// +// - If the XML element has an attribute not handled by the previous +// rule and the struct has a field with an associated tag containing +// ",any,attr", Unmarshal records the attribute value in the first +// such field. +// +// - If the XML element contains character data, that data is +// accumulated in the first struct field that has tag ",chardata". +// The struct field may have type []byte or string. +// If there is no such field, the character data is discarded. +// +// - If the XML element contains comments, they are accumulated in +// the first struct field that has tag ",comment". The struct +// field may have type []byte or string. If there is no such +// field, the comments are discarded. +// +// - If the XML element contains a sub-element whose name matches +// the prefix of a tag formatted as "a" or "a>b>c", unmarshal +// will descend into the XML structure looking for elements with the +// given names, and will map the innermost elements to that struct +// field. A tag starting with ">" is equivalent to one starting +// with the field name followed by ">". +// +// - If the XML element contains a sub-element whose name matches +// a struct field's XMLName tag and the struct field has no +// explicit name tag as per the previous rule, unmarshal maps +// the sub-element to that struct field. +// +// - If the XML element contains a sub-element whose name matches a +// field without any mode flags (",attr", ",chardata", etc), Unmarshal +// maps the sub-element to that struct field. +// +// - If the XML element contains a sub-element that hasn't matched any +// of the above rules and the struct has a field with tag ",any", +// unmarshal maps the sub-element to that struct field. +// +// - An anonymous struct field is handled as if the fields of its +// value were part of the outer struct. +// +// - A struct field with tag "-" is never unmarshaled into. +// +// If Unmarshal encounters a field type that implements the Unmarshaler +// interface, Unmarshal calls its UnmarshalXML method to produce the value from +// the XML element. Otherwise, if the value implements +// [encoding.TextUnmarshaler], Unmarshal calls that value's UnmarshalText method. +// +// Unmarshal maps an XML element to a string or []byte by saving the +// concatenation of that element's character data in the string or +// []byte. The saved []byte is never nil. +// +// Unmarshal maps an attribute value to a string or []byte by saving +// the value in the string or slice. +// +// Unmarshal maps an attribute value to an [Attr] by saving the attribute, +// including its name, in the Attr. +// +// Unmarshal maps an XML element or attribute value to a slice by +// extending the length of the slice and mapping the element or attribute +// to the newly created value. +// +// Unmarshal maps an XML element or attribute value to a bool by +// setting it to the boolean value represented by the string. Whitespace +// is trimmed and ignored. +// +// Unmarshal maps an XML element or attribute value to an integer or +// floating-point field by setting the field to the result of +// interpreting the string value in decimal. There is no check for +// overflow. Whitespace is trimmed and ignored. +// +// Unmarshal maps an XML element to a Name by recording the element +// name. +// +// Unmarshal maps an XML element to a pointer by setting the pointer +// to a freshly allocated value and then mapping the element to that value. +// +// A missing element or empty attribute value will be unmarshaled as a zero value. +// If the field is a slice, a zero value will be appended to the field. Otherwise, the +// field will be set to its zero value. +func Unmarshal(data []byte, v any) error { + return NewDecoder(bytes.NewReader(data)).Decode(v) +} + +// Decode works like [Unmarshal], except it reads the decoder +// stream to find the start element. +func (d *Decoder) Decode(v any) error { + return d.DecodeElement(v, nil) +} + +// DecodeElement works like [Unmarshal] except that it takes +// a pointer to the start XML element to decode into v. +// It is useful when a client reads some raw XML tokens itself +// but also wants to defer to [Unmarshal] for some elements. +func (d *Decoder) DecodeElement(v any, start *StartElement) error { + val := reflect.ValueOf(v) + if val.Kind() != reflect.Pointer { + return errors.New("non-pointer passed to Unmarshal") + } + + if val.IsNil() { + return errors.New("nil pointer passed to Unmarshal") + } + return d.unmarshal(val.Elem(), start, 0) +} + +// An UnmarshalError represents an error in the unmarshaling process. +type UnmarshalError string + +func (e UnmarshalError) Error() string { return string(e) } + +// Unmarshaler is the interface implemented by objects that can unmarshal +// an XML element description of themselves. +// +// UnmarshalXML decodes a single XML element +// beginning with the given start element. +// If it returns an error, the outer call to Unmarshal stops and +// returns that error. +// UnmarshalXML must consume exactly one XML element. +// One common implementation strategy is to unmarshal into +// a separate value with a layout matching the expected XML +// using d.DecodeElement, and then to copy the data from +// that value into the receiver. +// Another common strategy is to use d.Token to process the +// XML object one token at a time. +// UnmarshalXML may not use d.RawToken. +type Unmarshaler interface { + UnmarshalXML(d *Decoder, start StartElement) error +} + +// UnmarshalerAttr is the interface implemented by objects that can unmarshal +// an XML attribute description of themselves. +// +// UnmarshalXMLAttr decodes a single XML attribute. +// If it returns an error, the outer call to [Unmarshal] stops and +// returns that error. +// UnmarshalXMLAttr is used only for struct fields with the +// "attr" option in the field tag. +type UnmarshalerAttr interface { + UnmarshalXMLAttr(attr Attr) error +} + +// receiverType returns the receiver type to use in an expression like "%s.MethodName". +func receiverType(val any) string { + t := reflect.TypeOf(val) + if t.Name() != "" { + return t.String() + } + return "(" + t.String() + ")" +} + +// unmarshalInterface unmarshals a single XML element into val. +// start is the opening tag of the element. +func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { + // Record that decoder must stop at end tag corresponding to start. + d.pushEOF() + + d.unmarshalDepth++ + err := val.UnmarshalXML(d, *start) + d.unmarshalDepth-- + if err != nil { + d.popEOF() + return err + } + + if !d.popEOF() { + return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) + } + + return nil +} + +// unmarshalTextInterface unmarshals a single XML element into val. +// The chardata contained in the element (but not its children) +// is passed to the text unmarshaler. +func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error { + var buf []byte + depth := 1 + for depth > 0 { + t, err := d.Token() + if err != nil { + return err + } + switch t := t.(type) { + case CharData: + if depth == 1 { + buf = append(buf, t...) + } + case StartElement: + depth++ + case EndElement: + depth-- + } + } + return val.UnmarshalText(buf) +} + +// unmarshalAttr unmarshals a single XML attribute into val. +func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { + return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) + } + } + + // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) + } + } + + if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 { + // Slice of element values. + // Grow slice. + n := val.Len() + val.Grow(1) + val.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshalAttr(val.Index(n), attr); err != nil { + val.SetLen(n) + return err + } + return nil + } + + if val.Type() == attrType { + val.Set(reflect.ValueOf(attr)) + return nil + } + + return copyValue(val, []byte(attr.Value)) +} + +var ( + attrType = reflect.TypeFor[Attr]() + unmarshalerType = reflect.TypeFor[Unmarshaler]() + unmarshalerAttrType = reflect.TypeFor[UnmarshalerAttr]() + textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() +) + +const ( + maxUnmarshalDepth = 10000 + maxUnmarshalDepthWasm = 5000 // go.dev/issue/56498 +) + +var errUnmarshalDepth = errors.New("exceeded max depth") + +// Unmarshal a single XML element into val. +func (d *Decoder) unmarshal(val reflect.Value, start *StartElement, depth int) error { + if depth >= maxUnmarshalDepth || runtime.GOARCH == "wasm" && depth >= maxUnmarshalDepthWasm { + return errUnmarshalDepth + } + // Find start element if we need it. + if start == nil { + for { + tok, err := d.Token() + if err != nil { + return err + } + if t, ok := tok.(StartElement); ok { + start = &t + break + } + } + } + + // Load value from interface, but only if the result will be + // usefully addressable. + if val.Kind() == reflect.Interface && !val.IsNil() { + e := val.Elem() + if e.Kind() == reflect.Pointer && !e.IsNil() { + val = e + } + } + + if val.Kind() == reflect.Pointer { + if val.IsNil() { + val.Set(reflect.New(val.Type().Elem())) + } + val = val.Elem() + } + + if val.CanInterface() && val.Type().Implements(unmarshalerType) { + // This is an unmarshaler with a non-pointer receiver, + // so it's likely to be incorrect, but we do what we're told. + return d.unmarshalInterface(val.Interface().(Unmarshaler), start) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { + return d.unmarshalInterface(pv.Interface().(Unmarshaler), start) + } + } + + if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler)) + } + + if val.CanAddr() { + pv := val.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler)) + } + } + + var ( + data []byte + saveData reflect.Value + comment []byte + saveComment reflect.Value + saveXML reflect.Value + saveXMLIndex int + saveXMLData []byte + saveAny reflect.Value + sv reflect.Value + tinfo *typeInfo + err error + ) + + switch v := val; v.Kind() { + default: + return errors.New("unknown type " + v.Type().String()) + + case reflect.Interface: + // TODO: For now, simply ignore the field. In the near + // future we may choose to unmarshal the start + // element on it, if not nil. + return d.Skip() + + case reflect.Slice: + typ := v.Type() + if typ.Elem().Kind() == reflect.Uint8 { + // []byte + saveData = v + break + } + + // Slice of element values. + // Grow slice. + n := v.Len() + v.Grow(1) + v.SetLen(n + 1) + + // Recur to read element into slice. + if err := d.unmarshal(v.Index(n), start, depth+1); err != nil { + v.SetLen(n) + return err + } + return nil + + case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: + saveData = v + + case reflect.Struct: + typ := v.Type() + if typ == nameType { + v.Set(reflect.ValueOf(start.Name)) + break + } + + sv = v + tinfo, err = getTypeInfo(typ) + if err != nil { + return err + } + + // Validate and assign element name. + if tinfo.xmlname != nil { + finfo := tinfo.xmlname + if finfo.name != "" && finfo.name != start.Name.Local { + return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") + } + if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " + if start.Name.Space == "" { + e += "no name space" + } else { + e += start.Name.Space + } + return UnmarshalError(e) + } + fv := finfo.value(sv, initNilPointers) + if _, ok := fv.Interface().(Name); ok { + fv.Set(reflect.ValueOf(start.Name)) + } + } + + // Assign attributes. + for _, a := range start.Attr { + handled := false + any := -1 + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fAttr: + strv := finfo.value(sv, initNilPointers) + if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + handled = true + } + + case fAny | fAttr: + if any == -1 { + any = i + } + } + } + if !handled && any >= 0 { + finfo := &tinfo.fields[any] + strv := finfo.value(sv, initNilPointers) + if err := d.unmarshalAttr(strv, a); err != nil { + return err + } + } + } + + // Determine whether we need to save character data or comments. + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + switch finfo.flags & fMode { + case fCDATA, fCharData: + if !saveData.IsValid() { + saveData = finfo.value(sv, initNilPointers) + } + + case fComment: + if !saveComment.IsValid() { + saveComment = finfo.value(sv, initNilPointers) + } + + case fAny, fAny | fElement: + if !saveAny.IsValid() { + saveAny = finfo.value(sv, initNilPointers) + } + + case fInnerXML: + if !saveXML.IsValid() { + saveXML = finfo.value(sv, initNilPointers) + if d.saved == nil { + saveXMLIndex = 0 + d.saved = new(bytes.Buffer) + } else { + saveXMLIndex = d.savedOffset() + } + } + } + } + } + + // Find end element. + // Process sub-elements along the way. +Loop: + for { + var savedOffset int + if saveXML.IsValid() { + savedOffset = d.savedOffset() + } + tok, err := d.Token() + if err != nil { + return err + } + switch t := tok.(type) { + case StartElement: + consumed := false + if sv.IsValid() { + // unmarshalPath can call unmarshal, so we need to pass the depth through so that + // we can continue to enforce the maximum recursion limit. + consumed, err = d.unmarshalPath(tinfo, sv, nil, &t, depth) + if err != nil { + return err + } + if !consumed && saveAny.IsValid() { + consumed = true + if err := d.unmarshal(saveAny, &t, depth+1); err != nil { + return err + } + } + } + if !consumed { + if err := d.Skip(); err != nil { + return err + } + } + + case EndElement: + if saveXML.IsValid() { + saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset] + if saveXMLIndex == 0 { + d.saved = nil + } + } + break Loop + + case CharData: + if saveData.IsValid() { + data = append(data, t...) + } + + case Comment: + if saveComment.IsValid() { + comment = append(comment, t...) + } + } + } + + if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { + if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + + if saveData.IsValid() && saveData.CanAddr() { + pv := saveData.Addr() + if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { + if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { + return err + } + saveData = reflect.Value{} + } + } + + if err := copyValue(saveData, data); err != nil { + return err + } + + switch t := saveComment; t.Kind() { + case reflect.String: + t.SetString(string(comment)) + case reflect.Slice: + t.Set(reflect.ValueOf(comment)) + } + + switch t := saveXML; t.Kind() { + case reflect.String: + t.SetString(string(saveXMLData)) + case reflect.Slice: + if t.Type().Elem().Kind() == reflect.Uint8 { + t.Set(reflect.ValueOf(saveXMLData)) + } + } + + return nil +} + +func copyValue(dst reflect.Value, src []byte) (err error) { + dst0 := dst + + if dst.Kind() == reflect.Pointer { + if dst.IsNil() { + dst.Set(reflect.New(dst.Type().Elem())) + } + dst = dst.Elem() + } + + // Save accumulated data. + switch dst.Kind() { + case reflect.Invalid: + // Probably a comment. + default: + return errors.New("cannot unmarshal into " + dst0.Type().String()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if len(src) == 0 { + dst.SetInt(0) + return nil + } + itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetInt(itmp) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if len(src) == 0 { + dst.SetUint(0) + return nil + } + utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits()) + if err != nil { + return err + } + dst.SetUint(utmp) + case reflect.Float32, reflect.Float64: + if len(src) == 0 { + dst.SetFloat(0) + return nil + } + ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits()) + if err != nil { + return err + } + dst.SetFloat(ftmp) + case reflect.Bool: + if len(src) == 0 { + dst.SetBool(false) + return nil + } + value, err := strconv.ParseBool(strings.TrimSpace(string(src))) + if err != nil { + return err + } + dst.SetBool(value) + case reflect.String: + dst.SetString(string(src)) + case reflect.Slice: + if len(src) == 0 { + // non-nil to flag presence + src = []byte{} + } + dst.SetBytes(src) + } + return nil +} + +// unmarshalPath walks down an XML structure looking for wanted +// paths, and calls unmarshal on them. +// The consumed result tells whether XML elements have been consumed +// from the Decoder until start's matching end element, or if it's +// still untouched because start is uninteresting for sv's fields. +func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement, depth int) (consumed bool, err error) { + recurse := false +Loop: + for i := range tinfo.fields { + finfo := &tinfo.fields[i] + if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { + continue + } + for j := range parents { + if parents[j] != finfo.parents[j] { + continue Loop + } + } + if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { + // It's a perfect match, unmarshal the field. + return true, d.unmarshal(finfo.value(sv, initNilPointers), start, depth+1) + } + if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { + // It's a prefix for the field. Break and recurse + // since it's not ok for one field path to be itself + // the prefix for another field path. + recurse = true + + // We can reuse the same slice as long as we + // don't try to append to it. + parents = finfo.parents[:len(parents)+1] + break + } + } + if !recurse { + // We have no business with this element. + return false, nil + } + // The element is not a perfect match for any field, but one + // or more fields have the path to this element as a parent + // prefix. Recurse and attempt to match these. + for { + var tok Token + tok, err = d.Token() + if err != nil { + return true, err + } + switch t := tok.(type) { + case StartElement: + // the recursion depth of unmarshalPath is limited to the path length specified + // by the struct field tag, so we don't increment the depth here. + consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t, depth) + if err != nil { + return true, err + } + if !consumed2 { + if err := d.Skip(); err != nil { + return true, err + } + } + case EndElement: + return true, nil + } + } +} + +// Skip reads tokens until it has consumed the end element +// matching the most recent start element already consumed, +// skipping nested structures. +// It returns nil if it finds an end element matching the start +// element; otherwise it returns an error describing the problem. +func (d *Decoder) Skip() error { + var depth int64 + for { + tok, err := d.Token() + if err != nil { + return err + } + switch tok.(type) { + case StartElement: + depth++ + case EndElement: + if depth == 0 { + return nil + } + depth-- + } + } +} diff --git a/src/encoding/xml/read_test.go b/src/encoding/xml/read_test.go new file mode 100644 index 0000000..ce99894 --- /dev/null +++ b/src/encoding/xml/read_test.go @@ -0,0 +1,1128 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "errors" + "io" + "reflect" + "runtime" + "strings" + "testing" + "time" +) + +// Stripped down Atom feed data structures. + +func TestUnmarshalFeed(t *testing.T) { + var f Feed + if err := Unmarshal([]byte(atomFeedString), &f); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(f, atomFeed) { + t.Fatalf("have %#v\nwant %#v", f, atomFeed) + } +} + +// hget http://codereview.appspot.com/rss/mine/rsc +const atomFeedString = ` +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us" updated="2009-10-04T01:35:58+00:00"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><author><name>rietveld<></name></author><entry><title>rietveld: an attempt at pubsubhubbub +</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a &lt;link rel=&quot;hub&quot; href=&quot;hub-server&quot;&gt; tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can&#39;t quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed&#39;s actual URL in +the link rel=&quot;self&quot;, but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +</summary></entry><entry><title>rietveld: correct tab handling +</title><link href="http://codereview.appspot.com/124106" rel="alternate"></link><updated>2009-10-03T23:02:17+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:0a2a4f19bb815101f0ba2904aed7c35a</id><summary type="html"> + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn&#39;t know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +</summary></entry></feed> ` + +type Feed struct { + XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated,attr"` + Author Person `xml:"author"` + Entry []Entry `xml:"entry"` +} + +type Entry struct { + Title string `xml:"title"` + ID string `xml:"id"` + Link []Link `xml:"link"` + Updated time.Time `xml:"updated"` + Author Person `xml:"author"` + Summary Text `xml:"summary"` +} + +type Link struct { + Rel string `xml:"rel,attr,omitempty"` + Href string `xml:"href,attr"` +} + +type Person struct { + Name string `xml:"name"` + URI string `xml:"uri"` + Email string `xml:"email"` + InnerXML string `xml:",innerxml"` +} + +type Text struct { + Type string `xml:"type,attr,omitempty"` + Body string `xml:",chardata"` +} + +var atomFeed = Feed{ + XMLName: Name{"http://www.w3.org/2005/Atom", "feed"}, + Title: "Code Review - My issues", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/"}, + {Rel: "self", Href: "http://codereview.appspot.com/rss/mine/rsc"}, + }, + ID: "http://codereview.appspot.com/", + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "rietveld<>", + InnerXML: "<name>rietveld<></name>", + }, + Entry: []Entry{ + { + Title: "rietveld: an attempt at pubsubhubbub\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/126085"}, + }, + Updated: ParseTime("2009-10-04T01:35:58+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + ID: "urn:md5:134d9179c41f806be79b3a5f7877d19a", + Summary: Text{ + Type: "html", + Body: ` + An attempt at adding pubsubhubbub support to Rietveld. +http://code.google.com/p/pubsubhubbub +http://code.google.com/p/rietveld/issues/detail?id=155 + +The server side of the protocol is trivial: + 1. add a <link rel="hub" href="hub-server"> tag to all + feeds that will be pubsubhubbubbed. + 2. every time one of those feeds changes, tell the hub + with a simple POST request. + +I have tested this by adding debug prints to a local hub +server and checking that the server got the right publish +requests. + +I can't quite get the server to work, but I think the bug +is not in my code. I think that the server expects to be +able to grab the feed and see the feed's actual URL in +the link rel="self", but the default value for that drops +the :port from the URL, and I cannot for the life of me +figure out how to get the Atom generator deep inside +django not to do that, or even where it is doing that, +or even what code is running to generate the Atom feed. +(I thought I knew but I added some assert False statements +and it kept running!) + +Ignoring that particular problem, I would appreciate +feedback on the right way to get the two values at +the top of feeds.py marked NOTE(rsc). + + +`, + }, + }, + { + Title: "rietveld: correct tab handling\n", + Link: []Link{ + {Rel: "alternate", Href: "http://codereview.appspot.com/124106"}, + }, + Updated: ParseTime("2009-10-03T23:02:17+00:00"), + Author: Person{ + Name: "email-address-removed", + InnerXML: "<name>email-address-removed</name>", + }, + ID: "urn:md5:0a2a4f19bb815101f0ba2904aed7c35a", + Summary: Text{ + Type: "html", + Body: ` + This fixes the buggy tab rendering that can be seen at +http://codereview.appspot.com/116075/diff/1/2 + +The fundamental problem was that the tab code was +not being told what column the text began in, so it +didn't know where to put the tab stops. Another problem +was that some of the code assumed that string byte +offsets were the same as column offsets, which is only +true if there are no tabs. + +In the process of fixing this, I cleaned up the arguments +to Fold and ExpandTabs and renamed them Break and +_ExpandTabs so that I could be sure that I found all the +call sites. I also wanted to verify that ExpandTabs was +not being used from outside intra_region_diff.py. + + +`, + }, + }, + }, +} + +const pathTestString = ` +<Result> + <Before>1</Before> + <Items> + <Item1> + <Value>A</Value> + </Item1> + <Item2> + <Value>B</Value> + </Item2> + <Item1> + <Value>C</Value> + <Value>D</Value> + </Item1> + <_> + <Value>E</Value> + </_> + </Items> + <After>2</After> +</Result> +` + +type PathTestItem struct { + Value string +} + +type PathTestA struct { + Items []PathTestItem `xml:">Item1"` + Before, After string +} + +type PathTestB struct { + Other []PathTestItem `xml:"Items>Item1"` + Before, After string +} + +type PathTestC struct { + Values1 []string `xml:"Items>Item1>Value"` + Values2 []string `xml:"Items>Item2>Value"` + Before, After string +} + +type PathTestSet struct { + Item1 []PathTestItem +} + +type PathTestD struct { + Other PathTestSet `xml:"Items"` + Before, After string +} + +type PathTestE struct { + Underline string `xml:"Items>_>Value"` + Before, After string +} + +var pathTests = []any{ + &PathTestA{Items: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestB{Other: []PathTestItem{{"A"}, {"D"}}, Before: "1", After: "2"}, + &PathTestC{Values1: []string{"A", "C", "D"}, Values2: []string{"B"}, Before: "1", After: "2"}, + &PathTestD{Other: PathTestSet{Item1: []PathTestItem{{"A"}, {"D"}}}, Before: "1", After: "2"}, + &PathTestE{Underline: "E", Before: "1", After: "2"}, +} + +func TestUnmarshalPaths(t *testing.T) { + for _, pt := range pathTests { + v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() + if err := Unmarshal([]byte(pathTestString), v); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if !reflect.DeepEqual(v, pt) { + t.Fatalf("have %#v\nwant %#v", v, pt) + } + } +} + +type BadPathTestA struct { + First string `xml:"items>item1"` + Other string `xml:"items>item2"` + Second string `xml:"items"` +} + +type BadPathTestB struct { + Other string `xml:"items>item2>value"` + First string `xml:"items>item1"` + Second string `xml:"items>item1>value"` +} + +type BadPathTestC struct { + First string + Second string `xml:"First"` +} + +type BadPathTestD struct { + BadPathEmbeddedA + BadPathEmbeddedB +} + +type BadPathEmbeddedA struct { + First string +} + +type BadPathEmbeddedB struct { + Second string `xml:"First"` +} + +var badPathTests = []struct { + v, e any +}{ + {&BadPathTestA{}, &TagPathError{reflect.TypeFor[BadPathTestA](), "First", "items>item1", "Second", "items"}}, + {&BadPathTestB{}, &TagPathError{reflect.TypeFor[BadPathTestB](), "First", "items>item1", "Second", "items>item1>value"}}, + {&BadPathTestC{}, &TagPathError{reflect.TypeFor[BadPathTestC](), "First", "", "Second", "First"}}, + {&BadPathTestD{}, &TagPathError{reflect.TypeFor[BadPathTestD](), "First", "", "Second", "First"}}, +} + +func TestUnmarshalBadPaths(t *testing.T) { + for _, tt := range badPathTests { + err := Unmarshal([]byte(pathTestString), tt.v) + if !reflect.DeepEqual(err, tt.e) { + t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e) + } + } +} + +const OK = "OK" +const withoutNameTypeData = ` +<?xml version="1.0" charset="utf-8"?> +<Test3 Attr="OK" />` + +type TestThree struct { + XMLName Name `xml:"Test3"` + Attr string `xml:",attr"` +} + +func TestUnmarshalWithoutNameType(t *testing.T) { + var x TestThree + if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if x.Attr != OK { + t.Fatalf("have %v\nwant %v", x.Attr, OK) + } +} + +func TestUnmarshalAttr(t *testing.T) { + type ParamVal struct { + Int int `xml:"int,attr"` + } + + type ParamPtr struct { + Int *int `xml:"int,attr"` + } + + type ParamStringPtr struct { + Int *string `xml:"int,attr"` + } + + x := []byte(`<Param int="1" />`) + + p1 := &ParamPtr{} + if err := Unmarshal(x, p1); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p1.Int == nil { + t.Fatalf("Unmarshal failed in to *int field") + } else if *p1.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1) + } + + p2 := &ParamVal{} + if err := Unmarshal(x, p2); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p2.Int != 1 { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1) + } + + p3 := &ParamStringPtr{} + if err := Unmarshal(x, p3); err != nil { + t.Fatalf("Unmarshal: %s", err) + } + if p3.Int == nil { + t.Fatalf("Unmarshal failed in to *string field") + } else if *p3.Int != "1" { + t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1) + } +} + +type Tables struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table"` + FTable string `xml:"http://www.w3schools.com/furniture table"` +} + +var tables = []struct { + xml string + tab Tables + ns string +}{ + { + xml: `<Tables>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table xmlns="http://www.w3schools.com/furniture">world</table>` + + `<table xmlns="http://www.w3.org/TR/html4/">hello</table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/">` + + `<f:table>world</f:table>` + + `<h:table>hello</h:table>` + + `</Tables>`, + tab: Tables{"hello", "world"}, + }, + { + xml: `<Tables>` + + `<table>bogus</table>` + + `</Tables>`, + tab: Tables{}, + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{HTable: "only"}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{FTable: "only"}, + ns: "http://www.w3schools.com/furniture", + }, + { + xml: `<Tables>` + + `<table>only</table>` + + `</Tables>`, + tab: Tables{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNS(t *testing.T) { + for i, tt := range tables { + var dst Tables + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNS(t *testing.T) { + dst := Tables{"hello", "world"} + data, err := Marshal(&dst) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<Tables><table xmlns="http://www.w3.org/TR/html4/">hello</table><table xmlns="http://www.w3schools.com/furniture">world</table></Tables>` + str := string(data) + if str != want { + t.Errorf("have: %q\nwant: %q\n", str, want) + } +} + +type TableAttrs struct { + TAttr TAttr +} + +type TAttr struct { + HTable string `xml:"http://www.w3.org/TR/html4/ table,attr"` + FTable string `xml:"http://www.w3schools.com/furniture table,attr"` + Lang string `xml:"http://www.w3.org/XML/1998/namespace lang,attr,omitempty"` + Other1 string `xml:"http://golang.org/xml/ other,attr,omitempty"` + Other2 string `xml:"http://golang.org/xmlfoo/ other,attr,omitempty"` + Other3 string `xml:"http://golang.org/json/ other,attr,omitempty"` + Other4 string `xml:"http://golang.org/2/json/ other,attr,omitempty"` +} + +var tableAttrs = []struct { + xml string + tab TableAttrs + ns string +}{ + { + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `h:table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `h:table="hello" f:table="world" xmlns:f="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: "world"}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns="http://www.w3schools.com/furniture" xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr xmlns="http://www.w3.org/TR/html4/" ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:h="http://www.w3.org/TR/html4/"><TAttr ` + + `h:table="hello" table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "hello", FTable: ""}}, + ns: "http://www.w3schools.com/furniture", + }, + { + // Default space does not apply to attribute names. + xml: `<TableAttrs xmlns:f="http://www.w3schools.com/furniture"><TAttr ` + + `table="hello" f:table="world" ` + + `/></TableAttrs>`, + tab: TableAttrs{TAttr{HTable: "", FTable: "world"}}, + ns: "http://www.w3.org/TR/html4/", + }, + { + xml: `<TableAttrs><TAttr ` + + `table="bogus" ` + + `/></TableAttrs>`, + tab: TableAttrs{}, + ns: "something else entirely", + }, +} + +func TestUnmarshalNSAttr(t *testing.T) { + for i, tt := range tableAttrs { + var dst TableAttrs + var err error + if tt.ns != "" { + d := NewDecoder(strings.NewReader(tt.xml)) + d.DefaultSpace = tt.ns + err = d.Decode(&dst) + } else { + err = Unmarshal([]byte(tt.xml), &dst) + } + if err != nil { + t.Errorf("#%d: Unmarshal: %v", i, err) + continue + } + want := tt.tab + if dst != want { + t.Errorf("#%d: dst=%+v, want %+v", i, dst, want) + } + } +} + +func TestMarshalNSAttr(t *testing.T) { + src := TableAttrs{TAttr{"hello", "world", "en_US", "other1", "other2", "other3", "other4"}} + data, err := Marshal(&src) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + want := `<TableAttrs><TAttr xmlns:html4="http://www.w3.org/TR/html4/" html4:table="hello" xmlns:furniture="http://www.w3schools.com/furniture" furniture:table="world" xml:lang="en_US" xmlns:_xml="http://golang.org/xml/" _xml:other="other1" xmlns:_xmlfoo="http://golang.org/xmlfoo/" _xmlfoo:other="other2" xmlns:json="http://golang.org/json/" json:other="other3" xmlns:json_1="http://golang.org/2/json/" json_1:other="other4"></TAttr></TableAttrs>` + str := string(data) + if str != want { + t.Errorf("Marshal:\nhave: %#q\nwant: %#q\n", str, want) + } + + var dst TableAttrs + if err := Unmarshal(data, &dst); err != nil { + t.Errorf("Unmarshal: %v", err) + } + + if dst != src { + t.Errorf("Unmarshal = %q, want %q", dst, src) + } +} + +type MyCharData struct { + body string +} + +func (m *MyCharData) UnmarshalXML(d *Decoder, start StartElement) error { + for { + t, err := d.Token() + if err == io.EOF { // found end of element + break + } + if err != nil { + return err + } + if char, ok := t.(CharData); ok { + m.body += string(char) + } + } + return nil +} + +var _ Unmarshaler = (*MyCharData)(nil) + +func (m *MyCharData) UnmarshalXMLAttr(attr Attr) error { + panic("must not call") +} + +type MyAttr struct { + attr string +} + +func (m *MyAttr) UnmarshalXMLAttr(attr Attr) error { + m.attr = attr.Value + return nil +} + +var _ UnmarshalerAttr = (*MyAttr)(nil) + +type MyStruct struct { + Data *MyCharData + Attr *MyAttr `xml:",attr"` + + Data2 MyCharData + Attr2 MyAttr `xml:",attr"` +} + +func TestUnmarshaler(t *testing.T) { + xml := `<?xml version="1.0" encoding="utf-8"?> + <MyStruct Attr="attr1" Attr2="attr2"> + <Data>hello <!-- comment -->world</Data> + <Data2>howdy <!-- comment -->world</Data2> + </MyStruct> + ` + + var m MyStruct + if err := Unmarshal([]byte(xml), &m); err != nil { + t.Fatal(err) + } + + if m.Data == nil || m.Attr == nil || m.Data.body != "hello world" || m.Attr.attr != "attr1" || m.Data2.body != "howdy world" || m.Attr2.attr != "attr2" { + t.Errorf("m=%#+v\n", m) + } +} + +type Pea struct { + Cotelydon string +} + +type Pod struct { + Pea any `xml:"Pea"` +} + +// https://golang.org/issue/6836 +func TestUnmarshalIntoInterface(t *testing.T) { + pod := new(Pod) + pod.Pea = new(Pea) + xml := `<Pod><Pea><Cotelydon>Green stuff</Cotelydon></Pea></Pod>` + err := Unmarshal([]byte(xml), pod) + if err != nil { + t.Fatalf("failed to unmarshal %q: %v", xml, err) + } + pea, ok := pod.Pea.(*Pea) + if !ok { + t.Fatalf("unmarshaled into wrong type: have %T want *Pea", pod.Pea) + } + have, want := pea.Cotelydon, "Green stuff" + if have != want { + t.Errorf("failed to unmarshal into interface, have %q want %q", have, want) + } +} + +type X struct { + D string `xml:",comment"` +} + +// Issue 11112. Unmarshal must reject invalid comments. +func TestMalformedComment(t *testing.T) { + testData := []string{ + "<X><!-- a---></X>", + "<X><!-- -- --></X>", + "<X><!-- a--b --></X>", + "<X><!------></X>", + } + for i, test := range testData { + data := []byte(test) + v := new(X) + if err := Unmarshal(data, v); err == nil { + t.Errorf("%d: unmarshal should reject invalid comments", i) + } + } +} + +type IXField struct { + Five int `xml:"five"` + NotInnerXML []string `xml:",innerxml"` +} + +// Issue 15600. ",innerxml" on a field that can't hold it. +func TestInvalidInnerXMLType(t *testing.T) { + v := new(IXField) + if err := Unmarshal([]byte(`<tag><five>5</five><innertag/></tag>`), v); err != nil { + t.Errorf("Unmarshal failed: got %v", err) + } + if v.Five != 5 { + t.Errorf("Five = %v, want 5", v.Five) + } + if v.NotInnerXML != nil { + t.Errorf("NotInnerXML = %v, want nil", v.NotInnerXML) + } +} + +type Child struct { + G struct { + I int + } +} + +type ChildToEmbed struct { + X bool +} + +type Parent struct { + I int + IPtr *int + Is []int + IPtrs []*int + F float32 + FPtr *float32 + Fs []float32 + FPtrs []*float32 + B bool + BPtr *bool + Bs []bool + BPtrs []*bool + Bytes []byte + BytesPtr *[]byte + S string + SPtr *string + Ss []string + SPtrs []*string + MyI MyInt + Child Child + Children []Child + ChildPtr *Child + ChildToEmbed +} + +const ( + emptyXML = ` +<Parent> + <I></I> + <IPtr></IPtr> + <Is></Is> + <IPtrs></IPtrs> + <F></F> + <FPtr></FPtr> + <Fs></Fs> + <FPtrs></FPtrs> + <B></B> + <BPtr></BPtr> + <Bs></Bs> + <BPtrs></BPtrs> + <Bytes></Bytes> + <BytesPtr></BytesPtr> + <S></S> + <SPtr></SPtr> + <Ss></Ss> + <SPtrs></SPtrs> + <MyI></MyI> + <Child></Child> + <Children></Children> + <ChildPtr></ChildPtr> + <X></X> +</Parent> +` +) + +// golang.org/issues/13417 +func TestUnmarshalEmptyValues(t *testing.T) { + // Test first with a zero-valued dst. + v := new(Parent) + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("zero: Unmarshal failed: got %v", err) + } + + zBytes, zInt, zStr, zFloat, zBool := []byte{}, 0, "", float32(0), false + want := &Parent{ + IPtr: &zInt, + Is: []int{zInt}, + IPtrs: []*int{&zInt}, + FPtr: &zFloat, + Fs: []float32{zFloat}, + FPtrs: []*float32{&zFloat}, + BPtr: &zBool, + Bs: []bool{zBool}, + BPtrs: []*bool{&zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{zStr}, + SPtrs: []*string{&zStr}, + Children: []Child{{}}, + ChildPtr: new(Child), + ChildToEmbed: ChildToEmbed{}, + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("zero: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } + + // Test with a pre-populated dst. + // Multiple addressable copies, as pointer-to fields will replace value during unmarshal. + vBytes0, vInt0, vStr0, vFloat0, vBool0 := []byte("x"), 1, "x", float32(1), true + vBytes1, vInt1, vStr1, vFloat1, vBool1 := []byte("x"), 1, "x", float32(1), true + vInt2, vStr2, vFloat2, vBool2 := 1, "x", float32(1), true + v = &Parent{ + I: vInt0, + IPtr: &vInt1, + Is: []int{vInt0}, + IPtrs: []*int{&vInt2}, + F: vFloat0, + FPtr: &vFloat1, + Fs: []float32{vFloat0}, + FPtrs: []*float32{&vFloat2}, + B: vBool0, + BPtr: &vBool1, + Bs: []bool{vBool0}, + BPtrs: []*bool{&vBool2}, + Bytes: vBytes0, + BytesPtr: &vBytes1, + S: vStr0, + SPtr: &vStr1, + Ss: []string{vStr0}, + SPtrs: []*string{&vStr2}, + MyI: MyInt(vInt0), + Child: Child{G: struct{ I int }{I: vInt0}}, + Children: []Child{{G: struct{ I int }{I: vInt0}}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, + ChildToEmbed: ChildToEmbed{X: vBool0}, + } + if err := Unmarshal([]byte(emptyXML), v); err != nil { + t.Fatalf("populated: Unmarshal failed: got %v", err) + } + + want = &Parent{ + IPtr: &zInt, + Is: []int{vInt0, zInt}, + IPtrs: []*int{&vInt0, &zInt}, + FPtr: &zFloat, + Fs: []float32{vFloat0, zFloat}, + FPtrs: []*float32{&vFloat0, &zFloat}, + BPtr: &zBool, + Bs: []bool{vBool0, zBool}, + BPtrs: []*bool{&vBool0, &zBool}, + Bytes: []byte{}, + BytesPtr: &zBytes, + SPtr: &zStr, + Ss: []string{vStr0, zStr}, + SPtrs: []*string{&vStr0, &zStr}, + Child: Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + Children: []Child{{G: struct{ I int }{I: vInt0}}, {}}, + ChildPtr: &Child{G: struct{ I int }{I: vInt0}}, // I should == zInt0? (zero value) + } + if !reflect.DeepEqual(v, want) { + t.Fatalf("populated: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceValuesParent struct { + BFalse bool + BTrue bool + I int + INeg int + I8 int8 + I8Neg int8 + I16 int16 + I16Neg int16 + I32 int32 + I32Neg int32 + I64 int64 + I64Neg int64 + UI uint + UI8 uint8 + UI16 uint16 + UI32 uint32 + UI64 uint64 + F32 float32 + F32Neg float32 + F64 float64 + F64Neg float64 +} + +const whitespaceValuesXML = ` +<WhitespaceValuesParent> + <BFalse> false </BFalse> + <BTrue> true </BTrue> + <I> 266703 </I> + <INeg> -266703 </INeg> + <I8> 112 </I8> + <I8Neg> -112 </I8Neg> + <I16> 6703 </I16> + <I16Neg> -6703 </I16Neg> + <I32> 266703 </I32> + <I32Neg> -266703 </I32Neg> + <I64> 266703 </I64> + <I64Neg> -266703 </I64Neg> + <UI> 266703 </UI> + <UI8> 112 </UI8> + <UI16> 6703 </UI16> + <UI32> 266703 </UI32> + <UI64> 266703 </UI64> + <F32> 266.703 </F32> + <F32Neg> -266.703 </F32Neg> + <F64> 266.703 </F64> + <F64Neg> -266.703 </F64Neg> +</WhitespaceValuesParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceValues(t *testing.T) { + v := WhitespaceValuesParent{} + if err := Unmarshal([]byte(whitespaceValuesXML), &v); err != nil { + t.Fatalf("whitespace values: Unmarshal failed: got %v", err) + } + + want := WhitespaceValuesParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace values: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +type WhitespaceAttrsParent struct { + BFalse bool `xml:",attr"` + BTrue bool `xml:",attr"` + I int `xml:",attr"` + INeg int `xml:",attr"` + I8 int8 `xml:",attr"` + I8Neg int8 `xml:",attr"` + I16 int16 `xml:",attr"` + I16Neg int16 `xml:",attr"` + I32 int32 `xml:",attr"` + I32Neg int32 `xml:",attr"` + I64 int64 `xml:",attr"` + I64Neg int64 `xml:",attr"` + UI uint `xml:",attr"` + UI8 uint8 `xml:",attr"` + UI16 uint16 `xml:",attr"` + UI32 uint32 `xml:",attr"` + UI64 uint64 `xml:",attr"` + F32 float32 `xml:",attr"` + F32Neg float32 `xml:",attr"` + F64 float64 `xml:",attr"` + F64Neg float64 `xml:",attr"` +} + +const whitespaceAttrsXML = ` +<WhitespaceAttrsParent + BFalse=" false " + BTrue=" true " + I=" 266703 " + INeg=" -266703 " + I8=" 112 " + I8Neg=" -112 " + I16=" 6703 " + I16Neg=" -6703 " + I32=" 266703 " + I32Neg=" -266703 " + I64=" 266703 " + I64Neg=" -266703 " + UI=" 266703 " + UI8=" 112 " + UI16=" 6703 " + UI32=" 266703 " + UI64=" 266703 " + F32=" 266.703 " + F32Neg=" -266.703 " + F64=" 266.703 " + F64Neg=" -266.703 " +> +</WhitespaceAttrsParent> +` + +// golang.org/issues/22146 +func TestUnmarshalWhitespaceAttrs(t *testing.T) { + v := WhitespaceAttrsParent{} + if err := Unmarshal([]byte(whitespaceAttrsXML), &v); err != nil { + t.Fatalf("whitespace attrs: Unmarshal failed: got %v", err) + } + + want := WhitespaceAttrsParent{ + BFalse: false, + BTrue: true, + I: 266703, + INeg: -266703, + I8: 112, + I8Neg: -112, + I16: 6703, + I16Neg: -6703, + I32: 266703, + I32Neg: -266703, + I64: 266703, + I64Neg: -266703, + UI: 266703, + UI8: 112, + UI16: 6703, + UI32: 266703, + UI64: 266703, + F32: 266.703, + F32Neg: -266.703, + F64: 266.703, + F64Neg: -266.703, + } + if v != want { + t.Fatalf("whitespace attrs: Unmarshal:\nhave: %#+v\nwant: %#+v", v, want) + } +} + +// golang.org/issues/53350 +func TestUnmarshalIntoNil(t *testing.T) { + type T struct { + A int `xml:"A"` + } + + var nilPointer *T + err := Unmarshal([]byte("<T><A>1</A></T>"), nilPointer) + + if err == nil { + t.Fatalf("no error in unmarshalling") + } + +} + +func TestCVE202228131(t *testing.T) { + type nested struct { + Parent *nested `xml:",any"` + } + var n nested + err := Unmarshal(bytes.Repeat([]byte("<a>"), maxUnmarshalDepth+1), &n) + if err == nil { + t.Fatal("Unmarshal did not fail") + } else if !errors.Is(err, errUnmarshalDepth) { + t.Fatalf("Unmarshal unexpected error: got %q, want %q", err, errUnmarshalDepth) + } +} + +func TestCVE202230633(t *testing.T) { + if testing.Short() || runtime.GOARCH == "wasm" { + t.Skip("test requires significant memory") + } + defer func() { + p := recover() + if p != nil { + t.Fatal("Unmarshal panicked") + } + }() + var example struct { + Things []string + } + Unmarshal(bytes.Repeat([]byte("<a>"), 17_000_000), &example) +} diff --git a/src/encoding/xml/typeinfo.go b/src/encoding/xml/typeinfo.go new file mode 100644 index 0000000..b18ed28 --- /dev/null +++ b/src/encoding/xml/typeinfo.go @@ -0,0 +1,367 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "fmt" + "reflect" + "strings" + "sync" +) + +// typeInfo holds details for the xml representation of a type. +type typeInfo struct { + xmlname *fieldInfo + fields []fieldInfo +} + +// fieldInfo holds details for the xml representation of a single field. +type fieldInfo struct { + idx []int + name string + xmlns string + flags fieldFlags + parents []string +} + +type fieldFlags int + +const ( + fElement fieldFlags = 1 << iota + fAttr + fCDATA + fCharData + fInnerXML + fComment + fAny + + fOmitEmpty + + fMode = fElement | fAttr | fCDATA | fCharData | fInnerXML | fComment | fAny + + xmlName = "XMLName" +) + +var tinfoMap sync.Map // map[reflect.Type]*typeInfo + +var nameType = reflect.TypeFor[Name]() + +// getTypeInfo returns the typeInfo structure with details necessary +// for marshaling and unmarshaling typ. +func getTypeInfo(typ reflect.Type) (*typeInfo, error) { + if ti, ok := tinfoMap.Load(typ); ok { + return ti.(*typeInfo), nil + } + + tinfo := &typeInfo{} + if typ.Kind() == reflect.Struct && typ != nameType { + n := typ.NumField() + for i := 0; i < n; i++ { + f := typ.Field(i) + if (!f.IsExported() && !f.Anonymous) || f.Tag.Get("xml") == "-" { + continue // Private field + } + + // For embedded structs, embed its fields. + if f.Anonymous { + t := f.Type + if t.Kind() == reflect.Pointer { + t = t.Elem() + } + if t.Kind() == reflect.Struct { + inner, err := getTypeInfo(t) + if err != nil { + return nil, err + } + if tinfo.xmlname == nil { + tinfo.xmlname = inner.xmlname + } + for _, finfo := range inner.fields { + finfo.idx = append([]int{i}, finfo.idx...) + if err := addFieldInfo(typ, tinfo, &finfo); err != nil { + return nil, err + } + } + continue + } + } + + finfo, err := structFieldInfo(typ, &f) + if err != nil { + return nil, err + } + + if f.Name == xmlName { + tinfo.xmlname = finfo + continue + } + + // Add the field if it doesn't conflict with other fields. + if err := addFieldInfo(typ, tinfo, finfo); err != nil { + return nil, err + } + } + } + + ti, _ := tinfoMap.LoadOrStore(typ, tinfo) + return ti.(*typeInfo), nil +} + +// structFieldInfo builds and returns a fieldInfo for f. +func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) { + finfo := &fieldInfo{idx: f.Index} + + // Split the tag from the xml namespace if necessary. + tag := f.Tag.Get("xml") + if ns, t, ok := strings.Cut(tag, " "); ok { + finfo.xmlns, tag = ns, t + } + + // Parse flags. + tokens := strings.Split(tag, ",") + if len(tokens) == 1 { + finfo.flags = fElement + } else { + tag = tokens[0] + for _, flag := range tokens[1:] { + switch flag { + case "attr": + finfo.flags |= fAttr + case "cdata": + finfo.flags |= fCDATA + case "chardata": + finfo.flags |= fCharData + case "innerxml": + finfo.flags |= fInnerXML + case "comment": + finfo.flags |= fComment + case "any": + finfo.flags |= fAny + case "omitempty": + finfo.flags |= fOmitEmpty + } + } + + // Validate the flags used. + valid := true + switch mode := finfo.flags & fMode; mode { + case 0: + finfo.flags |= fElement + case fAttr, fCDATA, fCharData, fInnerXML, fComment, fAny, fAny | fAttr: + if f.Name == xmlName || tag != "" && mode != fAttr { + valid = false + } + default: + // This will also catch multiple modes in a single field. + valid = false + } + if finfo.flags&fMode == fAny { + finfo.flags |= fElement + } + if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 { + valid = false + } + if !valid { + return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + } + + // Use of xmlns without a name is not allowed. + if finfo.xmlns != "" && tag == "" { + return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q", + f.Name, typ, f.Tag.Get("xml")) + } + + if f.Name == xmlName { + // The XMLName field records the XML element name. Don't + // process it as usual because its name should default to + // empty rather than to the field name. + finfo.name = tag + return finfo, nil + } + + if tag == "" { + // If the name part of the tag is completely empty, get + // default from XMLName of underlying struct if feasible, + // or field name otherwise. + if xmlname := lookupXMLName(f.Type); xmlname != nil { + finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name + } else { + finfo.name = f.Name + } + return finfo, nil + } + + // Prepare field name and parents. + parents := strings.Split(tag, ">") + if parents[0] == "" { + parents[0] = f.Name + } + if parents[len(parents)-1] == "" { + return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ) + } + finfo.name = parents[len(parents)-1] + if len(parents) > 1 { + if (finfo.flags & fElement) == 0 { + return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ",")) + } + finfo.parents = parents[:len(parents)-1] + } + + // If the field type has an XMLName field, the names must match + // so that the behavior of both marshaling and unmarshaling + // is straightforward and unambiguous. + if finfo.flags&fElement != 0 { + ftyp := f.Type + xmlname := lookupXMLName(ftyp) + if xmlname != nil && xmlname.name != finfo.name { + return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName", + finfo.name, typ, f.Name, xmlname.name, ftyp) + } + } + return finfo, nil +} + +// lookupXMLName returns the fieldInfo for typ's XMLName field +// in case it exists and has a valid xml field tag, otherwise +// it returns nil. +func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) { + for typ.Kind() == reflect.Pointer { + typ = typ.Elem() + } + if typ.Kind() != reflect.Struct { + return nil + } + for i, n := 0, typ.NumField(); i < n; i++ { + f := typ.Field(i) + if f.Name != xmlName { + continue + } + finfo, err := structFieldInfo(typ, &f) + if err == nil && finfo.name != "" { + return finfo + } + // Also consider errors as a non-existent field tag + // and let getTypeInfo itself report the error. + break + } + return nil +} + +// addFieldInfo adds finfo to tinfo.fields if there are no +// conflicts, or if conflicts arise from previous fields that were +// obtained from deeper embedded structures than finfo. In the latter +// case, the conflicting entries are dropped. +// A conflict occurs when the path (parent + name) to a field is +// itself a prefix of another path, or when two paths match exactly. +// It is okay for field paths to share a common, shorter prefix. +func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error { + var conflicts []int +Loop: + // First, figure all conflicts. Most working code will have none. + for i := range tinfo.fields { + oldf := &tinfo.fields[i] + if oldf.flags&fMode != newf.flags&fMode { + continue + } + if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns { + continue + } + minl := min(len(newf.parents), len(oldf.parents)) + for p := 0; p < minl; p++ { + if oldf.parents[p] != newf.parents[p] { + continue Loop + } + } + if len(oldf.parents) > len(newf.parents) { + if oldf.parents[len(newf.parents)] == newf.name { + conflicts = append(conflicts, i) + } + } else if len(oldf.parents) < len(newf.parents) { + if newf.parents[len(oldf.parents)] == oldf.name { + conflicts = append(conflicts, i) + } + } else { + if newf.name == oldf.name && newf.xmlns == oldf.xmlns { + conflicts = append(conflicts, i) + } + } + } + // Without conflicts, add the new field and return. + if conflicts == nil { + tinfo.fields = append(tinfo.fields, *newf) + return nil + } + + // If any conflict is shallower, ignore the new field. + // This matches the Go field resolution on embedding. + for _, i := range conflicts { + if len(tinfo.fields[i].idx) < len(newf.idx) { + return nil + } + } + + // Otherwise, if any of them is at the same depth level, it's an error. + for _, i := range conflicts { + oldf := &tinfo.fields[i] + if len(oldf.idx) == len(newf.idx) { + f1 := typ.FieldByIndex(oldf.idx) + f2 := typ.FieldByIndex(newf.idx) + return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")} + } + } + + // Otherwise, the new field is shallower, and thus takes precedence, + // so drop the conflicting fields from tinfo and append the new one. + for c := len(conflicts) - 1; c >= 0; c-- { + i := conflicts[c] + copy(tinfo.fields[i:], tinfo.fields[i+1:]) + tinfo.fields = tinfo.fields[:len(tinfo.fields)-1] + } + tinfo.fields = append(tinfo.fields, *newf) + return nil +} + +// A TagPathError represents an error in the unmarshaling process +// caused by the use of field tags with conflicting paths. +type TagPathError struct { + Struct reflect.Type + Field1, Tag1 string + Field2, Tag2 string +} + +func (e *TagPathError) Error() string { + return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) +} + +const ( + initNilPointers = true + dontInitNilPointers = false +) + +// value returns v's field value corresponding to finfo. +// It's equivalent to v.FieldByIndex(finfo.idx), but when passed +// initNilPointers, it initializes and dereferences pointers as necessary. +// When passed dontInitNilPointers and a nil pointer is reached, the function +// returns a zero reflect.Value. +func (finfo *fieldInfo) value(v reflect.Value, shouldInitNilPointers bool) reflect.Value { + for i, x := range finfo.idx { + if i > 0 { + t := v.Type() + if t.Kind() == reflect.Pointer && t.Elem().Kind() == reflect.Struct { + if v.IsNil() { + if !shouldInitNilPointers { + return reflect.Value{} + } + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + } + v = v.Field(x) + } + return v +} diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go new file mode 100644 index 0000000..73eedad --- /dev/null +++ b/src/encoding/xml/xml.go @@ -0,0 +1,2060 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package xml implements a simple XML 1.0 parser that +// understands XML name spaces. +package xml + +// References: +// Annotated XML spec: https://www.xml.com/axml/testaxml.htm +// XML name spaces: https://www.w3.org/TR/REC-xml-names/ + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A SyntaxError represents a syntax error in the XML input stream. +type SyntaxError struct { + Msg string + Line int +} + +func (e *SyntaxError) Error() string { + return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg +} + +// A Name represents an XML name (Local) annotated +// with a name space identifier (Space). +// In tokens returned by [Decoder.Token], the Space identifier +// is given as a canonical URL, not the short prefix used +// in the document being parsed. +type Name struct { + Space, Local string +} + +// An Attr represents an attribute in an XML element (Name=Value). +type Attr struct { + Name Name + Value string +} + +// A Token is an interface holding one of the token types: +// [StartElement], [EndElement], [CharData], [Comment], [ProcInst], or [Directive]. +type Token any + +// A StartElement represents an XML start element. +type StartElement struct { + Name Name + Attr []Attr +} + +// Copy creates a new copy of StartElement. +func (e StartElement) Copy() StartElement { + attrs := make([]Attr, len(e.Attr)) + copy(attrs, e.Attr) + e.Attr = attrs + return e +} + +// End returns the corresponding XML end element. +func (e StartElement) End() EndElement { + return EndElement{e.Name} +} + +// An EndElement represents an XML end element. +type EndElement struct { + Name Name +} + +// A CharData represents XML character data (raw text), +// in which XML escape sequences have been replaced by +// the characters they represent. +type CharData []byte + +// Copy creates a new copy of CharData. +func (c CharData) Copy() CharData { return CharData(bytes.Clone(c)) } + +// A Comment represents an XML comment of the form <!--comment-->. +// The bytes do not include the <!-- and --> comment markers. +type Comment []byte + +// Copy creates a new copy of Comment. +func (c Comment) Copy() Comment { return Comment(bytes.Clone(c)) } + +// A ProcInst represents an XML processing instruction of the form <?target inst?> +type ProcInst struct { + Target string + Inst []byte +} + +// Copy creates a new copy of ProcInst. +func (p ProcInst) Copy() ProcInst { + p.Inst = bytes.Clone(p.Inst) + return p +} + +// A Directive represents an XML directive of the form <!text>. +// The bytes do not include the <! and > markers. +type Directive []byte + +// Copy creates a new copy of Directive. +func (d Directive) Copy() Directive { return Directive(bytes.Clone(d)) } + +// CopyToken returns a copy of a Token. +func CopyToken(t Token) Token { + switch v := t.(type) { + case CharData: + return v.Copy() + case Comment: + return v.Copy() + case Directive: + return v.Copy() + case ProcInst: + return v.Copy() + case StartElement: + return v.Copy() + } + return t +} + +// A TokenReader is anything that can decode a stream of XML tokens, including a +// [Decoder]. +// +// When Token encounters an error or end-of-file condition after successfully +// reading a token, it returns the token. It may return the (non-nil) error from +// the same call or return the error (and a nil token) from a subsequent call. +// An instance of this general case is that a TokenReader returning a non-nil +// token at the end of the token stream may return either io.EOF or a nil error. +// The next Read should return nil, [io.EOF]. +// +// Implementations of Token are discouraged from returning a nil token with a +// nil error. Callers should treat a return of nil, nil as indicating that +// nothing happened; in particular it does not indicate EOF. +type TokenReader interface { + Token() (Token, error) +} + +// A Decoder represents an XML parser reading a particular input stream. +// The parser assumes that its input is encoded in UTF-8. +type Decoder struct { + // Strict defaults to true, enforcing the requirements + // of the XML specification. + // If set to false, the parser allows input containing common + // mistakes: + // * If an element is missing an end tag, the parser invents + // end tags as necessary to keep the return values from Token + // properly balanced. + // * In attribute values and character data, unknown or malformed + // character entities (sequences beginning with &) are left alone. + // + // Setting: + // + // d.Strict = false + // d.AutoClose = xml.HTMLAutoClose + // d.Entity = xml.HTMLEntity + // + // creates a parser that can handle typical HTML. + // + // Strict mode does not enforce the requirements of the XML name spaces TR. + // In particular it does not reject name space tags using undefined prefixes. + // Such tags are recorded with the unknown prefix as the name space URL. + Strict bool + + // When Strict == false, AutoClose indicates a set of elements to + // consider closed immediately after they are opened, regardless + // of whether an end element is present. + AutoClose []string + + // Entity can be used to map non-standard entity names to string replacements. + // The parser behaves as if these standard mappings are present in the map, + // regardless of the actual map content: + // + // "lt": "<", + // "gt": ">", + // "amp": "&", + // "apos": "'", + // "quot": `"`, + Entity map[string]string + + // CharsetReader, if non-nil, defines a function to generate + // charset-conversion readers, converting from the provided + // non-UTF-8 charset into UTF-8. If CharsetReader is nil or + // returns an error, parsing stops with an error. One of the + // CharsetReader's result values must be non-nil. + CharsetReader func(charset string, input io.Reader) (io.Reader, error) + + // DefaultSpace sets the default name space used for unadorned tags, + // as if the entire XML stream were wrapped in an element containing + // the attribute xmlns="DefaultSpace". + DefaultSpace string + + r io.ByteReader + t TokenReader + buf bytes.Buffer + saved *bytes.Buffer + stk *stack + free *stack + needClose bool + toClose Name + nextToken Token + nextByte int + ns map[string]string + err error + line int + linestart int64 + offset int64 + unmarshalDepth int +} + +// NewDecoder creates a new XML parser reading from r. +// If r does not implement [io.ByteReader], NewDecoder will +// do its own buffering. +func NewDecoder(r io.Reader) *Decoder { + d := &Decoder{ + ns: make(map[string]string), + nextByte: -1, + line: 1, + Strict: true, + } + d.switchToReader(r) + return d +} + +// NewTokenDecoder creates a new XML parser using an underlying token stream. +func NewTokenDecoder(t TokenReader) *Decoder { + // Is it already a Decoder? + if d, ok := t.(*Decoder); ok { + return d + } + d := &Decoder{ + ns: make(map[string]string), + t: t, + nextByte: -1, + line: 1, + Strict: true, + } + return d +} + +// Token returns the next XML token in the input stream. +// At the end of the input stream, Token returns nil, [io.EOF]. +// +// Slices of bytes in the returned token data refer to the +// parser's internal buffer and remain valid only until the next +// call to Token. To acquire a copy of the bytes, call [CopyToken] +// or the token's Copy method. +// +// Token expands self-closing elements such as <br> +// into separate start and end elements returned by successive calls. +// +// Token guarantees that the [StartElement] and [EndElement] +// tokens it returns are properly nested and matched: +// if Token encounters an unexpected end element +// or EOF before all expected end elements, +// it will return an error. +// +// If [Decoder.CharsetReader] is called and returns an error, +// the error is wrapped and returned. +// +// Token implements XML name spaces as described by +// https://www.w3.org/TR/REC-xml-names/. Each of the +// [Name] structures contained in the Token has the Space +// set to the URL identifying its name space when known. +// If Token encounters an unrecognized name space prefix, +// it uses the prefix as the Space rather than report an error. +func (d *Decoder) Token() (Token, error) { + var t Token + var err error + if d.stk != nil && d.stk.kind == stkEOF { + return nil, io.EOF + } + if d.nextToken != nil { + t = d.nextToken + d.nextToken = nil + } else { + if t, err = d.rawToken(); t == nil && err != nil { + if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF { + err = d.syntaxError("unexpected EOF") + } + return nil, err + } + // We still have a token to process, so clear any + // errors (e.g. EOF) and proceed. + err = nil + } + if !d.Strict { + if t1, ok := d.autoClose(t); ok { + d.nextToken = t + t = t1 + } + } + switch t1 := t.(type) { + case StartElement: + // In XML name spaces, the translations listed in the + // attributes apply to the element name and + // to the other attribute names, so process + // the translations first. + for _, a := range t1.Attr { + if a.Name.Space == xmlnsPrefix { + v, ok := d.ns[a.Name.Local] + d.pushNs(a.Name.Local, v, ok) + d.ns[a.Name.Local] = a.Value + } + if a.Name.Space == "" && a.Name.Local == xmlnsPrefix { + // Default space for untagged names + v, ok := d.ns[""] + d.pushNs("", v, ok) + d.ns[""] = a.Value + } + } + + d.pushElement(t1.Name) + d.translate(&t1.Name, true) + for i := range t1.Attr { + d.translate(&t1.Attr[i].Name, false) + } + t = t1 + + case EndElement: + if !d.popElement(&t1) { + return nil, d.err + } + t = t1 + } + return t, err +} + +const ( + xmlURL = "http://www.w3.org/XML/1998/namespace" + xmlnsPrefix = "xmlns" + xmlPrefix = "xml" +) + +// Apply name space translation to name n. +// The default name space (for Space=="") +// applies only to element names, not to attribute names. +func (d *Decoder) translate(n *Name, isElementName bool) { + switch { + case n.Space == xmlnsPrefix: + return + case n.Space == "" && !isElementName: + return + case n.Space == xmlPrefix: + n.Space = xmlURL + case n.Space == "" && n.Local == xmlnsPrefix: + return + } + if v, ok := d.ns[n.Space]; ok { + n.Space = v + } else if n.Space == "" { + n.Space = d.DefaultSpace + } +} + +func (d *Decoder) switchToReader(r io.Reader) { + // Get efficient byte at a time reader. + // Assume that if reader has its own + // ReadByte, it's efficient enough. + // Otherwise, use bufio. + if rb, ok := r.(io.ByteReader); ok { + d.r = rb + } else { + d.r = bufio.NewReader(r) + } +} + +// Parsing state - stack holds old name space translations +// and the current set of open elements. The translations to pop when +// ending a given tag are *below* it on the stack, which is +// more work but forced on us by XML. +type stack struct { + next *stack + kind int + name Name + ok bool +} + +const ( + stkStart = iota + stkNs + stkEOF +) + +func (d *Decoder) push(kind int) *stack { + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.next = d.stk + s.kind = kind + d.stk = s + return s +} + +func (d *Decoder) pop() *stack { + s := d.stk + if s != nil { + d.stk = s.next + s.next = d.free + d.free = s + } + return s +} + +// Record that after the current element is finished +// (that element is already pushed on the stack) +// Token should return EOF until popEOF is called. +func (d *Decoder) pushEOF() { + // Walk down stack to find Start. + // It might not be the top, because there might be stkNs + // entries above it. + start := d.stk + for start.kind != stkStart { + start = start.next + } + // The stkNs entries below a start are associated with that + // element too; skip over them. + for start.next != nil && start.next.kind == stkNs { + start = start.next + } + s := d.free + if s != nil { + d.free = s.next + } else { + s = new(stack) + } + s.kind = stkEOF + s.next = start.next + start.next = s +} + +// Undo a pushEOF. +// The element must have been finished, so the EOF should be at the top of the stack. +func (d *Decoder) popEOF() bool { + if d.stk == nil || d.stk.kind != stkEOF { + return false + } + d.pop() + return true +} + +// Record that we are starting an element with the given name. +func (d *Decoder) pushElement(name Name) { + s := d.push(stkStart) + s.name = name +} + +// Record that we are changing the value of ns[local]. +// The old value is url, ok. +func (d *Decoder) pushNs(local string, url string, ok bool) { + s := d.push(stkNs) + s.name.Local = local + s.name.Space = url + s.ok = ok +} + +// Creates a SyntaxError with the current line number. +func (d *Decoder) syntaxError(msg string) error { + return &SyntaxError{Msg: msg, Line: d.line} +} + +// Record that we are ending an element with the given name. +// The name must match the record at the top of the stack, +// which must be a pushElement record. +// After popping the element, apply any undo records from +// the stack to restore the name translations that existed +// before we saw this element. +func (d *Decoder) popElement(t *EndElement) bool { + s := d.pop() + name := t.Name + switch { + case s == nil || s.kind != stkStart: + d.err = d.syntaxError("unexpected end element </" + name.Local + ">") + return false + case s.name.Local != name.Local: + if !d.Strict { + d.needClose = true + d.toClose = t.Name + t.Name = s.name + return true + } + d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") + return false + case s.name.Space != name.Space: + d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + + " closed by </" + name.Local + "> in space " + name.Space) + return false + } + + d.translate(&t.Name, true) + + // Pop stack until a Start or EOF is on the top, undoing the + // translations that were associated with the element we just closed. + for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF { + s := d.pop() + if s.ok { + d.ns[s.name.Local] = s.name.Space + } else { + delete(d.ns, s.name.Local) + } + } + + return true +} + +// If the top element on the stack is autoclosing and +// t is not the end tag, invent the end tag. +func (d *Decoder) autoClose(t Token) (Token, bool) { + if d.stk == nil || d.stk.kind != stkStart { + return nil, false + } + for _, s := range d.AutoClose { + if strings.EqualFold(s, d.stk.name.Local) { + // This one should be auto closed if t doesn't close it. + et, ok := t.(EndElement) + if !ok || !strings.EqualFold(et.Name.Local, d.stk.name.Local) { + return EndElement{d.stk.name}, true + } + break + } + } + return nil, false +} + +var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method") + +// RawToken is like [Decoder.Token] but does not verify that +// start and end elements match and does not translate +// name space prefixes to their corresponding URLs. +func (d *Decoder) RawToken() (Token, error) { + if d.unmarshalDepth > 0 { + return nil, errRawToken + } + return d.rawToken() +} + +func (d *Decoder) rawToken() (Token, error) { + if d.t != nil { + return d.t.Token() + } + if d.err != nil { + return nil, d.err + } + if d.needClose { + // The last element we read was self-closing and + // we returned just the StartElement half. + // Return the EndElement half now. + d.needClose = false + return EndElement{d.toClose}, nil + } + + b, ok := d.getc() + if !ok { + return nil, d.err + } + + if b != '<' { + // Text section. + d.ungetc(b) + data := d.text(-1, false) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + switch b { + case '/': + // </: End element + var name Name + if name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected element name after </") + } + return nil, d.err + } + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '>' { + d.err = d.syntaxError("invalid characters between </" + name.Local + " and >") + return nil, d.err + } + return EndElement{name}, nil + + case '?': + // <?: Processing instruction. + var target string + if target, ok = d.name(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected target name after <?") + } + return nil, d.err + } + d.space() + d.buf.Reset() + var b0 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + d.buf.WriteByte(b) + if b0 == '?' && b == '>' { + break + } + b0 = b + } + data := d.buf.Bytes() + data = data[0 : len(data)-2] // chop ?> + + if target == "xml" { + content := string(data) + ver := procInst("version", content) + if ver != "" && ver != "1.0" { + d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver) + return nil, d.err + } + enc := procInst("encoding", content) + if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") { + if d.CharsetReader == nil { + d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc) + return nil, d.err + } + newr, err := d.CharsetReader(enc, d.r.(io.Reader)) + if err != nil { + d.err = fmt.Errorf("xml: opening charset %q: %w", enc, err) + return nil, d.err + } + if newr == nil { + panic("CharsetReader returned a nil Reader for charset " + enc) + } + d.switchToReader(newr) + } + } + return ProcInst{target, data}, nil + + case '!': + // <!: Maybe comment, maybe CDATA. + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + switch b { + case '-': // <!- + // Probably <!-- for a comment. + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '-' { + d.err = d.syntaxError("invalid sequence <!- not part of <!--") + return nil, d.err + } + // Look for terminator. + d.buf.Reset() + var b0, b1 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + d.buf.WriteByte(b) + if b0 == '-' && b1 == '-' { + if b != '>' { + d.err = d.syntaxError( + `invalid sequence "--" not allowed in comments`) + return nil, d.err + } + break + } + b0, b1 = b1, b + } + data := d.buf.Bytes() + data = data[0 : len(data)-3] // chop --> + return Comment(data), nil + + case '[': // <![ + // Probably <![CDATA[. + for i := 0; i < 6; i++ { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != "CDATA["[i] { + d.err = d.syntaxError("invalid <![ sequence") + return nil, d.err + } + } + // Have <![CDATA[. Read text until ]]>. + data := d.text(-1, true) + if data == nil { + return nil, d.err + } + return CharData(data), nil + } + + // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. + // We don't care, but accumulate for caller. Quoted angle + // brackets do not count for nesting. + d.buf.Reset() + d.buf.WriteByte(b) + inquote := uint8(0) + depth := 0 + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if inquote == 0 && b == '>' && depth == 0 { + break + } + HandleB: + d.buf.WriteByte(b) + switch { + case b == inquote: + inquote = 0 + + case inquote != 0: + // in quotes, no special action + + case b == '\'' || b == '"': + inquote = b + + case b == '>' && inquote == 0: + depth-- + + case b == '<' && inquote == 0: + // Look for <!-- to begin comment. + s := "!--" + for i := 0; i < len(s); i++ { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != s[i] { + for j := 0; j < i; j++ { + d.buf.WriteByte(s[j]) + } + depth++ + goto HandleB + } + } + + // Remove < that was written above. + d.buf.Truncate(d.buf.Len() - 1) + + // Look for terminator. + var b0, b1 byte + for { + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b0 == '-' && b1 == '-' && b == '>' { + break + } + b0, b1 = b1, b + } + + // Replace the comment with a space in the returned Directive + // body, so that markup parts that were separated by the comment + // (like a "<" and a "!") don't get joined when re-encoding the + // Directive, taking new semantic meaning. + d.buf.WriteByte(' ') + } + } + return Directive(d.buf.Bytes()), nil + } + + // Must be an open element like <a href="foo"> + d.ungetc(b) + + var ( + name Name + empty bool + attr []Attr + ) + if name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected element name after <") + } + return nil, d.err + } + + attr = []Attr{} + for { + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b == '/' { + empty = true + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '>' { + d.err = d.syntaxError("expected /> in element") + return nil, d.err + } + break + } + if b == '>' { + break + } + d.ungetc(b) + + a := Attr{} + if a.Name, ok = d.nsname(); !ok { + if d.err == nil { + d.err = d.syntaxError("expected attribute name in element") + } + return nil, d.err + } + d.space() + if b, ok = d.mustgetc(); !ok { + return nil, d.err + } + if b != '=' { + if d.Strict { + d.err = d.syntaxError("attribute name without = in element") + return nil, d.err + } + d.ungetc(b) + a.Value = a.Name.Local + } else { + d.space() + data := d.attrval() + if data == nil { + return nil, d.err + } + a.Value = string(data) + } + attr = append(attr, a) + } + if empty { + d.needClose = true + d.toClose = name + } + return StartElement{name, attr}, nil +} + +func (d *Decoder) attrval() []byte { + b, ok := d.mustgetc() + if !ok { + return nil + } + // Handle quoted attribute values + if b == '"' || b == '\'' { + return d.text(int(b), false) + } + // Handle unquoted attribute values for strict parsers + if d.Strict { + d.err = d.syntaxError("unquoted or missing attribute value in element") + return nil + } + // Handle unquoted attribute values for unstrict parsers + d.ungetc(b) + d.buf.Reset() + for { + b, ok = d.mustgetc() + if !ok { + return nil + } + // https://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 + if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || + '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { + d.buf.WriteByte(b) + } else { + d.ungetc(b) + break + } + } + return d.buf.Bytes() +} + +// Skip spaces if any +func (d *Decoder) space() { + for { + b, ok := d.getc() + if !ok { + return + } + switch b { + case ' ', '\r', '\n', '\t': + default: + d.ungetc(b) + return + } + } +} + +// Read a single byte. +// If there is no byte to read, return ok==false +// and leave the error in d.err. +// Maintain line number. +func (d *Decoder) getc() (b byte, ok bool) { + if d.err != nil { + return 0, false + } + if d.nextByte >= 0 { + b = byte(d.nextByte) + d.nextByte = -1 + } else { + b, d.err = d.r.ReadByte() + if d.err != nil { + return 0, false + } + if d.saved != nil { + d.saved.WriteByte(b) + } + } + if b == '\n' { + d.line++ + d.linestart = d.offset + 1 + } + d.offset++ + return b, true +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token +// and the beginning of the next token. +func (d *Decoder) InputOffset() int64 { + return d.offset +} + +// InputPos returns the line of the current decoder position and the 1 based +// input position of the line. The position gives the location of the end of the +// most recently returned token. +func (d *Decoder) InputPos() (line, column int) { + return d.line, int(d.offset-d.linestart) + 1 +} + +// Return saved offset. +// If we did ungetc (nextByte >= 0), have to back up one. +func (d *Decoder) savedOffset() int { + n := d.saved.Len() + if d.nextByte >= 0 { + n-- + } + return n +} + +// Must read a single byte. +// If there is no byte to read, +// set d.err to SyntaxError("unexpected EOF") +// and return ok==false +func (d *Decoder) mustgetc() (b byte, ok bool) { + if b, ok = d.getc(); !ok { + if d.err == io.EOF { + d.err = d.syntaxError("unexpected EOF") + } + } + return +} + +// Unread a single byte. +func (d *Decoder) ungetc(b byte) { + if b == '\n' { + d.line-- + } + d.nextByte = int(b) + d.offset-- +} + +var entity = map[string]rune{ + "lt": '<', + "gt": '>', + "amp": '&', + "apos": '\'', + "quot": '"', +} + +// Read plain text section (XML calls it character data). +// If quote >= 0, we are in a quoted string and need to find the matching quote. +// If cdata == true, we are in a <![CDATA[ section and need to find ]]>. +// On failure return nil and leave the error in d.err. +func (d *Decoder) text(quote int, cdata bool) []byte { + var b0, b1 byte + var trunc int + d.buf.Reset() +Input: + for { + b, ok := d.getc() + if !ok { + if cdata { + if d.err == io.EOF { + d.err = d.syntaxError("unexpected EOF in CDATA section") + } + return nil + } + break Input + } + + // <![CDATA[ section ends with ]]>. + // It is an error for ]]> to appear in ordinary text. + if b0 == ']' && b1 == ']' && b == '>' { + if cdata { + trunc = 2 + break Input + } + d.err = d.syntaxError("unescaped ]]> not in CDATA section") + return nil + } + + // Stop reading text if we see a <. + if b == '<' && !cdata { + if quote >= 0 { + d.err = d.syntaxError("unescaped < inside quoted string") + return nil + } + d.ungetc('<') + break Input + } + if quote >= 0 && b == byte(quote) { + break Input + } + if b == '&' && !cdata { + // Read escaped character expression up to semicolon. + // XML in all its glory allows a document to define and use + // its own character names with <!ENTITY ...> directives. + // Parsers are required to recognize lt, gt, amp, apos, and quot + // even if they have not been declared. + before := d.buf.Len() + d.buf.WriteByte('&') + var ok bool + var text string + var haveText bool + if b, ok = d.mustgetc(); !ok { + return nil + } + if b == '#' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + base := 10 + if b == 'x' { + base = 16 + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + } + start := d.buf.Len() + for '0' <= b && b <= '9' || + base == 16 && 'a' <= b && b <= 'f' || + base == 16 && 'A' <= b && b <= 'F' { + d.buf.WriteByte(b) + if b, ok = d.mustgetc(); !ok { + return nil + } + } + if b != ';' { + d.ungetc(b) + } else { + s := string(d.buf.Bytes()[start:]) + d.buf.WriteByte(';') + n, err := strconv.ParseUint(s, base, 64) + if err == nil && n <= unicode.MaxRune { + text = string(rune(n)) + haveText = true + } + } + } else { + d.ungetc(b) + if !d.readName() { + if d.err != nil { + return nil + } + } + if b, ok = d.mustgetc(); !ok { + return nil + } + if b != ';' { + d.ungetc(b) + } else { + name := d.buf.Bytes()[before+1:] + d.buf.WriteByte(';') + if isName(name) { + s := string(name) + if r, ok := entity[s]; ok { + text = string(r) + haveText = true + } else if d.Entity != nil { + text, haveText = d.Entity[s] + } + } + } + } + + if haveText { + d.buf.Truncate(before) + d.buf.WriteString(text) + b0, b1 = 0, 0 + continue Input + } + if !d.Strict { + b0, b1 = 0, 0 + continue Input + } + ent := string(d.buf.Bytes()[before:]) + if ent[len(ent)-1] != ';' { + ent += " (no semicolon)" + } + d.err = d.syntaxError("invalid character entity " + ent) + return nil + } + + // We must rewrite unescaped \r and \r\n into \n. + if b == '\r' { + d.buf.WriteByte('\n') + } else if b1 == '\r' && b == '\n' { + // Skip \r\n--we already wrote \n. + } else { + d.buf.WriteByte(b) + } + + b0, b1 = b1, b + } + data := d.buf.Bytes() + data = data[0 : len(data)-trunc] + + // Inspect each rune for being a disallowed character. + buf := data + for len(buf) > 0 { + r, size := utf8.DecodeRune(buf) + if r == utf8.RuneError && size == 1 { + d.err = d.syntaxError("invalid UTF-8") + return nil + } + buf = buf[size:] + if !isInCharacterRange(r) { + d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r)) + return nil + } + } + + return data +} + +// Decide whether the given rune is in the XML Character Range, per +// the Char production of https://www.xml.com/axml/testaxml.htm, +// Section 2.2 Characters. +func isInCharacterRange(r rune) (inrange bool) { + return r == 0x09 || + r == 0x0A || + r == 0x0D || + r >= 0x20 && r <= 0xD7FF || + r >= 0xE000 && r <= 0xFFFD || + r >= 0x10000 && r <= 0x10FFFF +} + +// Get name space name: name with a : stuck in the middle. +// The part before the : is the name space identifier. +func (d *Decoder) nsname() (name Name, ok bool) { + s, ok := d.name() + if !ok { + return + } + if strings.Count(s, ":") > 1 { + return name, false + } else if space, local, ok := strings.Cut(s, ":"); !ok || space == "" || local == "" { + name.Local = s + } else { + name.Space = space + name.Local = local + } + return name, true +} + +// Get name: /first(first|second)*/ +// Do not set d.err if the name is missing (unless unexpected EOF is received): +// let the caller provide better context. +func (d *Decoder) name() (s string, ok bool) { + d.buf.Reset() + if !d.readName() { + return "", false + } + + // Now we check the characters. + b := d.buf.Bytes() + if !isName(b) { + d.err = d.syntaxError("invalid XML name: " + string(b)) + return "", false + } + return string(b), true +} + +// Read a name and append its bytes to d.buf. +// The name is delimited by any single-byte character not valid in names. +// All multi-byte characters are accepted; the caller must check their validity. +func (d *Decoder) readName() (ok bool) { + var b byte + if b, ok = d.mustgetc(); !ok { + return + } + if b < utf8.RuneSelf && !isNameByte(b) { + d.ungetc(b) + return false + } + d.buf.WriteByte(b) + + for { + if b, ok = d.mustgetc(); !ok { + return + } + if b < utf8.RuneSelf && !isNameByte(b) { + d.ungetc(b) + break + } + d.buf.WriteByte(b) + } + return true +} + +func isNameByte(c byte) bool { + return 'A' <= c && c <= 'Z' || + 'a' <= c && c <= 'z' || + '0' <= c && c <= '9' || + c == '_' || c == ':' || c == '.' || c == '-' +} + +func isName(s []byte) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRune(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + +func isNameString(s string) bool { + if len(s) == 0 { + return false + } + c, n := utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) { + return false + } + for n < len(s) { + s = s[n:] + c, n = utf8.DecodeRuneInString(s) + if c == utf8.RuneError && n == 1 { + return false + } + if !unicode.Is(first, c) && !unicode.Is(second, c) { + return false + } + } + return true +} + +// These tables were generated by cut and paste from Appendix B of +// the XML spec at https://www.xml.com/axml/testaxml.htm +// and then reformatting. First corresponds to (Letter | '_' | ':') +// and second corresponds to NameChar. + +var first = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x003A, 0x003A, 1}, + {0x0041, 0x005A, 1}, + {0x005F, 0x005F, 1}, + {0x0061, 0x007A, 1}, + {0x00C0, 0x00D6, 1}, + {0x00D8, 0x00F6, 1}, + {0x00F8, 0x00FF, 1}, + {0x0100, 0x0131, 1}, + {0x0134, 0x013E, 1}, + {0x0141, 0x0148, 1}, + {0x014A, 0x017E, 1}, + {0x0180, 0x01C3, 1}, + {0x01CD, 0x01F0, 1}, + {0x01F4, 0x01F5, 1}, + {0x01FA, 0x0217, 1}, + {0x0250, 0x02A8, 1}, + {0x02BB, 0x02C1, 1}, + {0x0386, 0x0386, 1}, + {0x0388, 0x038A, 1}, + {0x038C, 0x038C, 1}, + {0x038E, 0x03A1, 1}, + {0x03A3, 0x03CE, 1}, + {0x03D0, 0x03D6, 1}, + {0x03DA, 0x03E0, 2}, + {0x03E2, 0x03F3, 1}, + {0x0401, 0x040C, 1}, + {0x040E, 0x044F, 1}, + {0x0451, 0x045C, 1}, + {0x045E, 0x0481, 1}, + {0x0490, 0x04C4, 1}, + {0x04C7, 0x04C8, 1}, + {0x04CB, 0x04CC, 1}, + {0x04D0, 0x04EB, 1}, + {0x04EE, 0x04F5, 1}, + {0x04F8, 0x04F9, 1}, + {0x0531, 0x0556, 1}, + {0x0559, 0x0559, 1}, + {0x0561, 0x0586, 1}, + {0x05D0, 0x05EA, 1}, + {0x05F0, 0x05F2, 1}, + {0x0621, 0x063A, 1}, + {0x0641, 0x064A, 1}, + {0x0671, 0x06B7, 1}, + {0x06BA, 0x06BE, 1}, + {0x06C0, 0x06CE, 1}, + {0x06D0, 0x06D3, 1}, + {0x06D5, 0x06D5, 1}, + {0x06E5, 0x06E6, 1}, + {0x0905, 0x0939, 1}, + {0x093D, 0x093D, 1}, + {0x0958, 0x0961, 1}, + {0x0985, 0x098C, 1}, + {0x098F, 0x0990, 1}, + {0x0993, 0x09A8, 1}, + {0x09AA, 0x09B0, 1}, + {0x09B2, 0x09B2, 1}, + {0x09B6, 0x09B9, 1}, + {0x09DC, 0x09DD, 1}, + {0x09DF, 0x09E1, 1}, + {0x09F0, 0x09F1, 1}, + {0x0A05, 0x0A0A, 1}, + {0x0A0F, 0x0A10, 1}, + {0x0A13, 0x0A28, 1}, + {0x0A2A, 0x0A30, 1}, + {0x0A32, 0x0A33, 1}, + {0x0A35, 0x0A36, 1}, + {0x0A38, 0x0A39, 1}, + {0x0A59, 0x0A5C, 1}, + {0x0A5E, 0x0A5E, 1}, + {0x0A72, 0x0A74, 1}, + {0x0A85, 0x0A8B, 1}, + {0x0A8D, 0x0A8D, 1}, + {0x0A8F, 0x0A91, 1}, + {0x0A93, 0x0AA8, 1}, + {0x0AAA, 0x0AB0, 1}, + {0x0AB2, 0x0AB3, 1}, + {0x0AB5, 0x0AB9, 1}, + {0x0ABD, 0x0AE0, 0x23}, + {0x0B05, 0x0B0C, 1}, + {0x0B0F, 0x0B10, 1}, + {0x0B13, 0x0B28, 1}, + {0x0B2A, 0x0B30, 1}, + {0x0B32, 0x0B33, 1}, + {0x0B36, 0x0B39, 1}, + {0x0B3D, 0x0B3D, 1}, + {0x0B5C, 0x0B5D, 1}, + {0x0B5F, 0x0B61, 1}, + {0x0B85, 0x0B8A, 1}, + {0x0B8E, 0x0B90, 1}, + {0x0B92, 0x0B95, 1}, + {0x0B99, 0x0B9A, 1}, + {0x0B9C, 0x0B9C, 1}, + {0x0B9E, 0x0B9F, 1}, + {0x0BA3, 0x0BA4, 1}, + {0x0BA8, 0x0BAA, 1}, + {0x0BAE, 0x0BB5, 1}, + {0x0BB7, 0x0BB9, 1}, + {0x0C05, 0x0C0C, 1}, + {0x0C0E, 0x0C10, 1}, + {0x0C12, 0x0C28, 1}, + {0x0C2A, 0x0C33, 1}, + {0x0C35, 0x0C39, 1}, + {0x0C60, 0x0C61, 1}, + {0x0C85, 0x0C8C, 1}, + {0x0C8E, 0x0C90, 1}, + {0x0C92, 0x0CA8, 1}, + {0x0CAA, 0x0CB3, 1}, + {0x0CB5, 0x0CB9, 1}, + {0x0CDE, 0x0CDE, 1}, + {0x0CE0, 0x0CE1, 1}, + {0x0D05, 0x0D0C, 1}, + {0x0D0E, 0x0D10, 1}, + {0x0D12, 0x0D28, 1}, + {0x0D2A, 0x0D39, 1}, + {0x0D60, 0x0D61, 1}, + {0x0E01, 0x0E2E, 1}, + {0x0E30, 0x0E30, 1}, + {0x0E32, 0x0E33, 1}, + {0x0E40, 0x0E45, 1}, + {0x0E81, 0x0E82, 1}, + {0x0E84, 0x0E84, 1}, + {0x0E87, 0x0E88, 1}, + {0x0E8A, 0x0E8D, 3}, + {0x0E94, 0x0E97, 1}, + {0x0E99, 0x0E9F, 1}, + {0x0EA1, 0x0EA3, 1}, + {0x0EA5, 0x0EA7, 2}, + {0x0EAA, 0x0EAB, 1}, + {0x0EAD, 0x0EAE, 1}, + {0x0EB0, 0x0EB0, 1}, + {0x0EB2, 0x0EB3, 1}, + {0x0EBD, 0x0EBD, 1}, + {0x0EC0, 0x0EC4, 1}, + {0x0F40, 0x0F47, 1}, + {0x0F49, 0x0F69, 1}, + {0x10A0, 0x10C5, 1}, + {0x10D0, 0x10F6, 1}, + {0x1100, 0x1100, 1}, + {0x1102, 0x1103, 1}, + {0x1105, 0x1107, 1}, + {0x1109, 0x1109, 1}, + {0x110B, 0x110C, 1}, + {0x110E, 0x1112, 1}, + {0x113C, 0x1140, 2}, + {0x114C, 0x1150, 2}, + {0x1154, 0x1155, 1}, + {0x1159, 0x1159, 1}, + {0x115F, 0x1161, 1}, + {0x1163, 0x1169, 2}, + {0x116D, 0x116E, 1}, + {0x1172, 0x1173, 1}, + {0x1175, 0x119E, 0x119E - 0x1175}, + {0x11A8, 0x11AB, 0x11AB - 0x11A8}, + {0x11AE, 0x11AF, 1}, + {0x11B7, 0x11B8, 1}, + {0x11BA, 0x11BA, 1}, + {0x11BC, 0x11C2, 1}, + {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, + {0x11F9, 0x11F9, 1}, + {0x1E00, 0x1E9B, 1}, + {0x1EA0, 0x1EF9, 1}, + {0x1F00, 0x1F15, 1}, + {0x1F18, 0x1F1D, 1}, + {0x1F20, 0x1F45, 1}, + {0x1F48, 0x1F4D, 1}, + {0x1F50, 0x1F57, 1}, + {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, + {0x1F5D, 0x1F5D, 1}, + {0x1F5F, 0x1F7D, 1}, + {0x1F80, 0x1FB4, 1}, + {0x1FB6, 0x1FBC, 1}, + {0x1FBE, 0x1FBE, 1}, + {0x1FC2, 0x1FC4, 1}, + {0x1FC6, 0x1FCC, 1}, + {0x1FD0, 0x1FD3, 1}, + {0x1FD6, 0x1FDB, 1}, + {0x1FE0, 0x1FEC, 1}, + {0x1FF2, 0x1FF4, 1}, + {0x1FF6, 0x1FFC, 1}, + {0x2126, 0x2126, 1}, + {0x212A, 0x212B, 1}, + {0x212E, 0x212E, 1}, + {0x2180, 0x2182, 1}, + {0x3007, 0x3007, 1}, + {0x3021, 0x3029, 1}, + {0x3041, 0x3094, 1}, + {0x30A1, 0x30FA, 1}, + {0x3105, 0x312C, 1}, + {0x4E00, 0x9FA5, 1}, + {0xAC00, 0xD7A3, 1}, + }, +} + +var second = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x002D, 0x002E, 1}, + {0x0030, 0x0039, 1}, + {0x00B7, 0x00B7, 1}, + {0x02D0, 0x02D1, 1}, + {0x0300, 0x0345, 1}, + {0x0360, 0x0361, 1}, + {0x0387, 0x0387, 1}, + {0x0483, 0x0486, 1}, + {0x0591, 0x05A1, 1}, + {0x05A3, 0x05B9, 1}, + {0x05BB, 0x05BD, 1}, + {0x05BF, 0x05BF, 1}, + {0x05C1, 0x05C2, 1}, + {0x05C4, 0x0640, 0x0640 - 0x05C4}, + {0x064B, 0x0652, 1}, + {0x0660, 0x0669, 1}, + {0x0670, 0x0670, 1}, + {0x06D6, 0x06DC, 1}, + {0x06DD, 0x06DF, 1}, + {0x06E0, 0x06E4, 1}, + {0x06E7, 0x06E8, 1}, + {0x06EA, 0x06ED, 1}, + {0x06F0, 0x06F9, 1}, + {0x0901, 0x0903, 1}, + {0x093C, 0x093C, 1}, + {0x093E, 0x094C, 1}, + {0x094D, 0x094D, 1}, + {0x0951, 0x0954, 1}, + {0x0962, 0x0963, 1}, + {0x0966, 0x096F, 1}, + {0x0981, 0x0983, 1}, + {0x09BC, 0x09BC, 1}, + {0x09BE, 0x09BF, 1}, + {0x09C0, 0x09C4, 1}, + {0x09C7, 0x09C8, 1}, + {0x09CB, 0x09CD, 1}, + {0x09D7, 0x09D7, 1}, + {0x09E2, 0x09E3, 1}, + {0x09E6, 0x09EF, 1}, + {0x0A02, 0x0A3C, 0x3A}, + {0x0A3E, 0x0A3F, 1}, + {0x0A40, 0x0A42, 1}, + {0x0A47, 0x0A48, 1}, + {0x0A4B, 0x0A4D, 1}, + {0x0A66, 0x0A6F, 1}, + {0x0A70, 0x0A71, 1}, + {0x0A81, 0x0A83, 1}, + {0x0ABC, 0x0ABC, 1}, + {0x0ABE, 0x0AC5, 1}, + {0x0AC7, 0x0AC9, 1}, + {0x0ACB, 0x0ACD, 1}, + {0x0AE6, 0x0AEF, 1}, + {0x0B01, 0x0B03, 1}, + {0x0B3C, 0x0B3C, 1}, + {0x0B3E, 0x0B43, 1}, + {0x0B47, 0x0B48, 1}, + {0x0B4B, 0x0B4D, 1}, + {0x0B56, 0x0B57, 1}, + {0x0B66, 0x0B6F, 1}, + {0x0B82, 0x0B83, 1}, + {0x0BBE, 0x0BC2, 1}, + {0x0BC6, 0x0BC8, 1}, + {0x0BCA, 0x0BCD, 1}, + {0x0BD7, 0x0BD7, 1}, + {0x0BE7, 0x0BEF, 1}, + {0x0C01, 0x0C03, 1}, + {0x0C3E, 0x0C44, 1}, + {0x0C46, 0x0C48, 1}, + {0x0C4A, 0x0C4D, 1}, + {0x0C55, 0x0C56, 1}, + {0x0C66, 0x0C6F, 1}, + {0x0C82, 0x0C83, 1}, + {0x0CBE, 0x0CC4, 1}, + {0x0CC6, 0x0CC8, 1}, + {0x0CCA, 0x0CCD, 1}, + {0x0CD5, 0x0CD6, 1}, + {0x0CE6, 0x0CEF, 1}, + {0x0D02, 0x0D03, 1}, + {0x0D3E, 0x0D43, 1}, + {0x0D46, 0x0D48, 1}, + {0x0D4A, 0x0D4D, 1}, + {0x0D57, 0x0D57, 1}, + {0x0D66, 0x0D6F, 1}, + {0x0E31, 0x0E31, 1}, + {0x0E34, 0x0E3A, 1}, + {0x0E46, 0x0E46, 1}, + {0x0E47, 0x0E4E, 1}, + {0x0E50, 0x0E59, 1}, + {0x0EB1, 0x0EB1, 1}, + {0x0EB4, 0x0EB9, 1}, + {0x0EBB, 0x0EBC, 1}, + {0x0EC6, 0x0EC6, 1}, + {0x0EC8, 0x0ECD, 1}, + {0x0ED0, 0x0ED9, 1}, + {0x0F18, 0x0F19, 1}, + {0x0F20, 0x0F29, 1}, + {0x0F35, 0x0F39, 2}, + {0x0F3E, 0x0F3F, 1}, + {0x0F71, 0x0F84, 1}, + {0x0F86, 0x0F8B, 1}, + {0x0F90, 0x0F95, 1}, + {0x0F97, 0x0F97, 1}, + {0x0F99, 0x0FAD, 1}, + {0x0FB1, 0x0FB7, 1}, + {0x0FB9, 0x0FB9, 1}, + {0x20D0, 0x20DC, 1}, + {0x20E1, 0x3005, 0x3005 - 0x20E1}, + {0x302A, 0x302F, 1}, + {0x3031, 0x3035, 1}, + {0x3099, 0x309A, 1}, + {0x309D, 0x309E, 1}, + {0x30FC, 0x30FE, 1}, + }, +} + +// HTMLEntity is an entity map containing translations for the +// standard HTML entity characters. +// +// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation. +var HTMLEntity map[string]string = htmlEntity + +var htmlEntity = map[string]string{ + /* + hget http://www.w3.org/TR/html4/sgml/entities.html | + ssam ' + ,y /\>/ x/\<(.|\n)+/ s/\n/ /g + ,x v/^\<!ENTITY/d + ,s/\<!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g + ' + */ + "nbsp": "\u00A0", + "iexcl": "\u00A1", + "cent": "\u00A2", + "pound": "\u00A3", + "curren": "\u00A4", + "yen": "\u00A5", + "brvbar": "\u00A6", + "sect": "\u00A7", + "uml": "\u00A8", + "copy": "\u00A9", + "ordf": "\u00AA", + "laquo": "\u00AB", + "not": "\u00AC", + "shy": "\u00AD", + "reg": "\u00AE", + "macr": "\u00AF", + "deg": "\u00B0", + "plusmn": "\u00B1", + "sup2": "\u00B2", + "sup3": "\u00B3", + "acute": "\u00B4", + "micro": "\u00B5", + "para": "\u00B6", + "middot": "\u00B7", + "cedil": "\u00B8", + "sup1": "\u00B9", + "ordm": "\u00BA", + "raquo": "\u00BB", + "frac14": "\u00BC", + "frac12": "\u00BD", + "frac34": "\u00BE", + "iquest": "\u00BF", + "Agrave": "\u00C0", + "Aacute": "\u00C1", + "Acirc": "\u00C2", + "Atilde": "\u00C3", + "Auml": "\u00C4", + "Aring": "\u00C5", + "AElig": "\u00C6", + "Ccedil": "\u00C7", + "Egrave": "\u00C8", + "Eacute": "\u00C9", + "Ecirc": "\u00CA", + "Euml": "\u00CB", + "Igrave": "\u00CC", + "Iacute": "\u00CD", + "Icirc": "\u00CE", + "Iuml": "\u00CF", + "ETH": "\u00D0", + "Ntilde": "\u00D1", + "Ograve": "\u00D2", + "Oacute": "\u00D3", + "Ocirc": "\u00D4", + "Otilde": "\u00D5", + "Ouml": "\u00D6", + "times": "\u00D7", + "Oslash": "\u00D8", + "Ugrave": "\u00D9", + "Uacute": "\u00DA", + "Ucirc": "\u00DB", + "Uuml": "\u00DC", + "Yacute": "\u00DD", + "THORN": "\u00DE", + "szlig": "\u00DF", + "agrave": "\u00E0", + "aacute": "\u00E1", + "acirc": "\u00E2", + "atilde": "\u00E3", + "auml": "\u00E4", + "aring": "\u00E5", + "aelig": "\u00E6", + "ccedil": "\u00E7", + "egrave": "\u00E8", + "eacute": "\u00E9", + "ecirc": "\u00EA", + "euml": "\u00EB", + "igrave": "\u00EC", + "iacute": "\u00ED", + "icirc": "\u00EE", + "iuml": "\u00EF", + "eth": "\u00F0", + "ntilde": "\u00F1", + "ograve": "\u00F2", + "oacute": "\u00F3", + "ocirc": "\u00F4", + "otilde": "\u00F5", + "ouml": "\u00F6", + "divide": "\u00F7", + "oslash": "\u00F8", + "ugrave": "\u00F9", + "uacute": "\u00FA", + "ucirc": "\u00FB", + "uuml": "\u00FC", + "yacute": "\u00FD", + "thorn": "\u00FE", + "yuml": "\u00FF", + "fnof": "\u0192", + "Alpha": "\u0391", + "Beta": "\u0392", + "Gamma": "\u0393", + "Delta": "\u0394", + "Epsilon": "\u0395", + "Zeta": "\u0396", + "Eta": "\u0397", + "Theta": "\u0398", + "Iota": "\u0399", + "Kappa": "\u039A", + "Lambda": "\u039B", + "Mu": "\u039C", + "Nu": "\u039D", + "Xi": "\u039E", + "Omicron": "\u039F", + "Pi": "\u03A0", + "Rho": "\u03A1", + "Sigma": "\u03A3", + "Tau": "\u03A4", + "Upsilon": "\u03A5", + "Phi": "\u03A6", + "Chi": "\u03A7", + "Psi": "\u03A8", + "Omega": "\u03A9", + "alpha": "\u03B1", + "beta": "\u03B2", + "gamma": "\u03B3", + "delta": "\u03B4", + "epsilon": "\u03B5", + "zeta": "\u03B6", + "eta": "\u03B7", + "theta": "\u03B8", + "iota": "\u03B9", + "kappa": "\u03BA", + "lambda": "\u03BB", + "mu": "\u03BC", + "nu": "\u03BD", + "xi": "\u03BE", + "omicron": "\u03BF", + "pi": "\u03C0", + "rho": "\u03C1", + "sigmaf": "\u03C2", + "sigma": "\u03C3", + "tau": "\u03C4", + "upsilon": "\u03C5", + "phi": "\u03C6", + "chi": "\u03C7", + "psi": "\u03C8", + "omega": "\u03C9", + "thetasym": "\u03D1", + "upsih": "\u03D2", + "piv": "\u03D6", + "bull": "\u2022", + "hellip": "\u2026", + "prime": "\u2032", + "Prime": "\u2033", + "oline": "\u203E", + "frasl": "\u2044", + "weierp": "\u2118", + "image": "\u2111", + "real": "\u211C", + "trade": "\u2122", + "alefsym": "\u2135", + "larr": "\u2190", + "uarr": "\u2191", + "rarr": "\u2192", + "darr": "\u2193", + "harr": "\u2194", + "crarr": "\u21B5", + "lArr": "\u21D0", + "uArr": "\u21D1", + "rArr": "\u21D2", + "dArr": "\u21D3", + "hArr": "\u21D4", + "forall": "\u2200", + "part": "\u2202", + "exist": "\u2203", + "empty": "\u2205", + "nabla": "\u2207", + "isin": "\u2208", + "notin": "\u2209", + "ni": "\u220B", + "prod": "\u220F", + "sum": "\u2211", + "minus": "\u2212", + "lowast": "\u2217", + "radic": "\u221A", + "prop": "\u221D", + "infin": "\u221E", + "ang": "\u2220", + "and": "\u2227", + "or": "\u2228", + "cap": "\u2229", + "cup": "\u222A", + "int": "\u222B", + "there4": "\u2234", + "sim": "\u223C", + "cong": "\u2245", + "asymp": "\u2248", + "ne": "\u2260", + "equiv": "\u2261", + "le": "\u2264", + "ge": "\u2265", + "sub": "\u2282", + "sup": "\u2283", + "nsub": "\u2284", + "sube": "\u2286", + "supe": "\u2287", + "oplus": "\u2295", + "otimes": "\u2297", + "perp": "\u22A5", + "sdot": "\u22C5", + "lceil": "\u2308", + "rceil": "\u2309", + "lfloor": "\u230A", + "rfloor": "\u230B", + "lang": "\u2329", + "rang": "\u232A", + "loz": "\u25CA", + "spades": "\u2660", + "clubs": "\u2663", + "hearts": "\u2665", + "diams": "\u2666", + "quot": "\u0022", + "amp": "\u0026", + "lt": "\u003C", + "gt": "\u003E", + "OElig": "\u0152", + "oelig": "\u0153", + "Scaron": "\u0160", + "scaron": "\u0161", + "Yuml": "\u0178", + "circ": "\u02C6", + "tilde": "\u02DC", + "ensp": "\u2002", + "emsp": "\u2003", + "thinsp": "\u2009", + "zwnj": "\u200C", + "zwj": "\u200D", + "lrm": "\u200E", + "rlm": "\u200F", + "ndash": "\u2013", + "mdash": "\u2014", + "lsquo": "\u2018", + "rsquo": "\u2019", + "sbquo": "\u201A", + "ldquo": "\u201C", + "rdquo": "\u201D", + "bdquo": "\u201E", + "dagger": "\u2020", + "Dagger": "\u2021", + "permil": "\u2030", + "lsaquo": "\u2039", + "rsaquo": "\u203A", + "euro": "\u20AC", +} + +// HTMLAutoClose is the set of HTML elements that +// should be considered to close automatically. +// +// See the [Decoder.Strict] and [Decoder.Entity] fields' documentation. +var HTMLAutoClose []string = htmlAutoClose + +var htmlAutoClose = []string{ + /* + hget http://www.w3.org/TR/html4/loose.dtd | + 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/ "\1",/p' | tr A-Z a-z + */ + "basefont", + "br", + "area", + "link", + "img", + "param", + "hr", + "input", + "col", + "frame", + "isindex", + "base", + "meta", +} + +var ( + escQuot = []byte(""") // shorter than """ + escApos = []byte("'") // shorter than "'" + escAmp = []byte("&") + escLT = []byte("<") + escGT = []byte(">") + escTab = []byte("	") + escNL = []byte("
") + escCR = []byte("
") + escFFFD = []byte("\uFFFD") // Unicode replacement character +) + +// EscapeText writes to w the properly escaped XML equivalent +// of the plain text data s. +func EscapeText(w io.Writer, s []byte) error { + return escapeText(w, s, true) +} + +// escapeText writes to w the properly escaped XML equivalent +// of the plain text data s. If escapeNewline is true, newline +// characters will be escaped. +func escapeText(w io.Writer, s []byte, escapeNewline bool) error { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRune(s[i:]) + i += width + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + if !escapeNewline { + continue + } + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + if _, err := w.Write(s[last : i-width]); err != nil { + return err + } + if _, err := w.Write(esc); err != nil { + return err + } + last = i + } + _, err := w.Write(s[last:]) + return err +} + +// EscapeString writes to p the properly escaped XML equivalent +// of the plain text data s. +func (p *printer) EscapeString(s string) { + var esc []byte + last := 0 + for i := 0; i < len(s); { + r, width := utf8.DecodeRuneInString(s[i:]) + i += width + switch r { + case '"': + esc = escQuot + case '\'': + esc = escApos + case '&': + esc = escAmp + case '<': + esc = escLT + case '>': + esc = escGT + case '\t': + esc = escTab + case '\n': + esc = escNL + case '\r': + esc = escCR + default: + if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { + esc = escFFFD + break + } + continue + } + p.WriteString(s[last : i-width]) + p.Write(esc) + last = i + } + p.WriteString(s[last:]) +} + +// Escape is like [EscapeText] but omits the error return value. +// It is provided for backwards compatibility with Go 1.0. +// Code targeting Go 1.1 or later should use [EscapeText]. +func Escape(w io.Writer, s []byte) { + EscapeText(w, s) +} + +var ( + cdataStart = []byte("<![CDATA[") + cdataEnd = []byte("]]>") + cdataEscape = []byte("]]]]><![CDATA[>") +) + +// emitCDATA writes to w the CDATA-wrapped plain text data s. +// It escapes CDATA directives nested in s. +func emitCDATA(w io.Writer, s []byte) error { + if len(s) == 0 { + return nil + } + if _, err := w.Write(cdataStart); err != nil { + return err + } + + for { + before, after, ok := bytes.Cut(s, cdataEnd) + if !ok { + break + } + // Found a nested CDATA directive end. + if _, err := w.Write(before); err != nil { + return err + } + if _, err := w.Write(cdataEscape); err != nil { + return err + } + s = after + } + + if _, err := w.Write(s); err != nil { + return err + } + + _, err := w.Write(cdataEnd) + return err +} + +// procInst parses the `param="..."` or `param='...'` +// value out of the provided string, returning "" if not found. +func procInst(param, s string) string { + // TODO: this parsing is somewhat lame and not exact. + // It works for all actual cases, though. + param = param + "=" + _, v, _ := strings.Cut(s, param) + if v == "" { + return "" + } + if v[0] != '\'' && v[0] != '"' { + return "" + } + unquote, _, ok := strings.Cut(v[1:], v[:1]) + if !ok { + return "" + } + return unquote +} diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go new file mode 100644 index 0000000..42f5f5f --- /dev/null +++ b/src/encoding/xml/xml_test.go @@ -0,0 +1,1479 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package xml + +import ( + "bytes" + "fmt" + "io" + "reflect" + "strings" + "testing" + "unicode/utf8" +) + +type toks struct { + earlyEOF bool + t []Token +} + +func (t *toks) Token() (Token, error) { + if len(t.t) == 0 { + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + if t.earlyEOF && len(t.t) == 0 { + return tok, io.EOF + } + return tok, nil +} + +func TestDecodeEOF(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + tests := []struct { + name string + tokens []Token + ok bool + }{ + { + name: "OK", + tokens: []Token{ + start, + start.End(), + }, + ok: true, + }, + { + name: "Malformed", + tokens: []Token{ + start, + StartElement{Name: Name{Local: "bad"}}, + start.End(), + }, + ok: false, + }, + } + for _, tc := range tests { + for _, eof := range []bool{true, false} { + name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) + t.Run(name, func(t *testing.T) { + d := NewTokenDecoder(&toks{ + earlyEOF: eof, + t: tc.tokens, + }) + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if tc.ok && err != nil { + t.Fatalf("d.Decode: expected nil error, got %v", err) + } + if _, ok := err.(*SyntaxError); !tc.ok && !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } + } +} + +type toksNil struct { + returnEOF bool + t []Token +} + +func (t *toksNil) Token() (Token, error) { + if len(t.t) == 0 { + if !t.returnEOF { + // Return nil, nil before returning an EOF. It's legal, but + // discouraged. + t.returnEOF = true + return nil, nil + } + return nil, io.EOF + } + var tok Token + tok, t.t = t.t[0], t.t[1:] + return tok, nil +} + +func TestDecodeNilToken(t *testing.T) { + for _, strict := range []bool{true, false} { + name := fmt.Sprintf("Strict=%v", strict) + t.Run(name, func(t *testing.T) { + start := StartElement{Name: Name{Local: "test"}} + bad := StartElement{Name: Name{Local: "bad"}} + d := NewTokenDecoder(&toksNil{ + // Malformed + t: []Token{start, bad, start.End()}, + }) + d.Strict = strict + err := d.Decode(&struct { + XMLName Name `xml:"test"` + }{}) + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("d.Decode: expected syntax error, got %v", err) + } + }) + } +} + +const testInput = ` +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + + "\r\n\t" + ` > + <hello lang="en">World <>'" 白鵬翔</hello> + <query>&何; &is-it;</query> + <goodbye /> + <outer foo:attr="value" xmlns:tag="ns4"> + <inner/> + </outer> + <tag:name> + <![CDATA[Some text here.]]> + </tag:name> +</body><!-- missing final newline -->` + +var testEntity = map[string]string{"何": "What", "is-it": "is it?"} + +var rawTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"", "hello"}}, + CharData("\n "), + StartElement{Name{"", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"", "query"}}, + CharData("\n "), + StartElement{Name{"", "goodbye"}, []Attr{}}, + EndElement{Name{"", "goodbye"}}, + CharData("\n "), + StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"", "inner"}, []Attr{}}, + EndElement{Name{"", "inner"}}, + CharData("\n "), + EndElement{Name{"", "outer"}}, + CharData("\n "), + StartElement{Name{"tag", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"tag", "name"}}, + CharData("\n"), + EndElement{Name{"", "body"}}, + Comment(" missing final newline "), +} + +var cookedTokens = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), + CharData("\n"), + StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, + CharData("\n "), + StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, + CharData("World <>'\" 白鵬翔"), + EndElement{Name{"ns2", "hello"}}, + CharData("\n "), + StartElement{Name{"ns2", "query"}, []Attr{}}, + CharData("What is it?"), + EndElement{Name{"ns2", "query"}}, + CharData("\n "), + StartElement{Name{"ns2", "goodbye"}, []Attr{}}, + EndElement{Name{"ns2", "goodbye"}}, + CharData("\n "), + StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, + CharData("\n "), + StartElement{Name{"ns2", "inner"}, []Attr{}}, + EndElement{Name{"ns2", "inner"}}, + CharData("\n "), + EndElement{Name{"ns2", "outer"}}, + CharData("\n "), + StartElement{Name{"ns3", "name"}, []Attr{}}, + CharData("\n "), + CharData("Some text here."), + CharData("\n "), + EndElement{Name{"ns3", "name"}}, + CharData("\n"), + EndElement{Name{"ns2", "body"}}, + Comment(" missing final newline "), +} + +const testInputAltEncoding = ` +<?xml version="1.0" encoding="x-testing-uppercase"?> +<TAG>VALUE</TAG>` + +var rawTokensAltEncoding = []Token{ + CharData("\n"), + ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("value"), + EndElement{Name{"", "tag"}}, +} + +var xmlInput = []string{ + // unexpected EOF cases + "<", + "<t", + "<t ", + "<t/", + "<!", + "<!-", + "<!--", + "<!--c-", + "<!--c--", + "<!d", + "<t></", + "<t></t", + "<?", + "<?p", + "<t a", + "<t a=", + "<t a='", + "<t a=''", + "<t/><![", + "<t/><![C", + "<t/><![CDATA[d", + "<t/><![CDATA[d]", + "<t/><![CDATA[d]]", + + // other Syntax errors + "<>", + "<t/a", + "<0 />", + "<?0 >", + // "<!0 >", // let the Token() caller handle + "</0>", + "<t 0=''>", + "<t a='&'>", + "<t a='<'>", + "<t> c;</t>", + "<t a>", + "<t a=>", + "<t a=v>", + // "<![CDATA[d]]>", // let the Token() caller handle + "<t></e>", + "<t></>", + "<t></t!", + "<t>cdata]]></t>", +} + +func TestRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + testRawToken(t, d, testInput, rawTokens) +} + +const nonStrictInput = ` +<tag>non&entity</tag> +<tag>&unknown;entity</tag> +<tag>{</tag> +<tag>&#zzz;</tag> +<tag>&なまえ3;</tag> +<tag><-gt;</tag> +<tag>&;</tag> +<tag>&0a;</tag> +` + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&なまえ3;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("<-gt;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&0a;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictInput, nonStrictTokens) +} + +type downCaser struct { + t *testing.T + r io.ByteReader +} + +func (d *downCaser) ReadByte() (c byte, err error) { + c, err = d.r.ReadByte() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + return +} + +func (d *downCaser) Read(p []byte) (int, error) { + d.t.Fatalf("unexpected Read call on downCaser reader") + panic("unreachable") +} + +func TestRawTokenAltEncoding(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + if charset != "x-testing-uppercase" { + t.Fatalf("unexpected charset %q", charset) + } + return &downCaser{t, input.(io.ByteReader)}, nil + } + testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) +} + +func TestRawTokenAltEncodingNoConverter(t *testing.T) { + d := NewDecoder(strings.NewReader(testInputAltEncoding)) + token, err := d.RawToken() + if token == nil { + t.Fatalf("expected a token on first RawToken call") + } + if err != nil { + t.Fatal(err) + } + token, err = d.RawToken() + if token != nil { + t.Errorf("expected a nil token; got %#v", token) + } + if err == nil { + t.Fatalf("expected an error on second RawToken call") + } + const encoding = "x-testing-uppercase" + if !strings.Contains(err.Error(), encoding) { + t.Errorf("expected error to contain %q; got error: %v", + encoding, err) + } +} + +func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { + lastEnd := int64(0) + for i, want := range rawTokens { + start := d.InputOffset() + have, err := d.RawToken() + end := d.InputOffset() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) + } + + // Check that InputOffset returned actual token. + switch { + case start < lastEnd: + t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) + case start >= end: + // Special case: EndElement can be synthesized. + if start == end && end == lastEnd { + break + } + t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) + case end > int64(len(raw)): + t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) + default: + text := raw[start:end] + if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { + t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) + } + } + lastEnd = end + } +} + +// Ensure that directives (specifically !DOCTYPE) include the complete +// text of any nested directives, noting that < and > do not change +// nesting depth if they are in single or double quotes. + +var nestedDirectivesInput = ` +<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY xlt ">">]> +<!DOCTYPE [<!ENTITY xlt "<">]> +<!DOCTYPE [<!ENTITY xlt '>'>]> +<!DOCTYPE [<!ENTITY xlt '<'>]> +<!DOCTYPE [<!ENTITY xlt '">'>]> +<!DOCTYPE [<!ENTITY xlt "'<">]> +` + +var nestedDirectivesTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt ">">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "<">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), + CharData("\n"), +} + +func TestNestedDirectives(t *testing.T) { + d := NewDecoder(strings.NewReader(nestedDirectivesInput)) + + for i, want := range nestedDirectivesTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestToken(t *testing.T) { + d := NewDecoder(strings.NewReader(testInput)) + d.Entity = testEntity + + for i, want := range cookedTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +func TestSyntax(t *testing.T) { + for i := range xmlInput { + d := NewDecoder(strings.NewReader(xmlInput[i])) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) + } + } +} + +func TestInputLinePos(t *testing.T) { + testInput := `<root> +<?pi + ?> <elt +att += +"val"> +<![CDATA[ +]]><!-- + +--></elt> +</root>` + linePos := [][]int{ + {1, 7}, + {2, 1}, + {3, 4}, + {3, 6}, + {6, 7}, + {7, 1}, + {8, 4}, + {10, 4}, + {10, 10}, + {11, 1}, + {11, 8}, + } + dec := NewDecoder(strings.NewReader(testInput)) + for _, want := range linePos { + if _, err := dec.Token(); err != nil { + t.Errorf("Unexpected error: %v", err) + continue + } + + gotLine, gotCol := dec.InputPos() + if gotLine != want[0] || gotCol != want[1] { + t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) + } + } +} + +type allScalars struct { + True1 bool + True2 bool + False1 bool + False2 bool + Int int + Int8 int8 + Int16 int16 + Int32 int32 + Int64 int64 + Uint int + Uint8 uint8 + Uint16 uint16 + Uint32 uint32 + Uint64 uint64 + Uintptr uintptr + Float32 float32 + Float64 float64 + String string + PtrString *string +} + +var all = allScalars{ + True1: true, + True2: true, + False1: false, + False2: false, + Int: 1, + Int8: -2, + Int16: 3, + Int32: -4, + Int64: 5, + Uint: 6, + Uint8: 7, + Uint16: 8, + Uint32: 9, + Uint64: 10, + Uintptr: 11, + Float32: 13.0, + Float64: 14.0, + String: "15", + PtrString: &sixteen, +} + +var sixteen = "16" + +const testScalarsInput = `<allscalars> + <True1>true</True1> + <True2>1</True2> + <False1>false</False1> + <False2>0</False2> + <Int>1</Int> + <Int8>-2</Int8> + <Int16>3</Int16> + <Int32>-4</Int32> + <Int64>5</Int64> + <Uint>6</Uint> + <Uint8>7</Uint8> + <Uint16>8</Uint16> + <Uint32>9</Uint32> + <Uint64>10</Uint64> + <Uintptr>11</Uintptr> + <Float>12.0</Float> + <Float32>13.0</Float32> + <Float64>14.0</Float64> + <String>15</String> + <PtrString>16</PtrString> +</allscalars>` + +func TestAllScalars(t *testing.T) { + var a allScalars + err := Unmarshal([]byte(testScalarsInput), &a) + + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(a, all) { + t.Errorf("have %+v want %+v", a, all) + } +} + +type item struct { + FieldA string +} + +func TestIssue569(t *testing.T) { + data := `<item><FieldA>abcd</FieldA></item>` + var i item + err := Unmarshal([]byte(data), &i) + + if err != nil || i.FieldA != "abcd" { + t.Fatal("Expecting abcd") + } +} + +func TestUnquotedAttrs(t *testing.T) { + data := "<tag attr=azAZ09:-_\t>" + d := NewDecoder(strings.NewReader(data)) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != "tag" { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != "azAZ09:-_" { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != "attr" { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } +} + +func TestValuelessAttrs(t *testing.T) { + tests := [][3]string{ + {"<p nowrap>", "p", "nowrap"}, + {"<p nowrap >", "p", "nowrap"}, + {"<input checked/>", "input", "checked"}, + {"<input checked />", "input", "checked"}, + } + for _, test := range tests { + d := NewDecoder(strings.NewReader(test[0])) + d.Strict = false + token, err := d.Token() + if _, ok := err.(*SyntaxError); ok { + t.Errorf("Unexpected error: %v", err) + } + if token.(StartElement).Name.Local != test[1] { + t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) + } + attr := token.(StartElement).Attr[0] + if attr.Value != test[2] { + t.Errorf("Unexpected attribute value: %v", attr.Value) + } + if attr.Name.Local != test[2] { + t.Errorf("Unexpected attribute name: %v", attr.Name.Local) + } + } +} + +func TestCopyTokenCharData(t *testing.T) { + data := []byte("same data") + var tok1 Token = CharData(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) != CharData") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenStartElement(t *testing.T) { + elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} + var tok1 Token = elt + tok2 := CopyToken(tok1) + if tok1.(StartElement).Attr[0].Value != "en" { + t.Error("CopyToken overwrote Attr[0]") + } + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(StartElement) != StartElement") + } + tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(CharData) uses same buffer.") + } +} + +func TestCopyTokenComment(t *testing.T) { + data := []byte("<!-- some comment -->") + var tok1 Token = Comment(data) + tok2 := CopyToken(tok1) + if !reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) != Comment") + } + data[1] = 'o' + if reflect.DeepEqual(tok1, tok2) { + t.Error("CopyToken(Comment) uses same buffer.") + } +} + +func TestSyntaxErrorLineNum(t *testing.T) { + testInput := "<P>Foo<P>\n\n<P>Bar</>\n" + d := NewDecoder(strings.NewReader(testInput)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Error("Expected SyntaxError.") + } + if synerr.Line != 3 { + t.Error("SyntaxError didn't have correct line number.") + } +} + +func TestTrailingRawToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { + } + if err != io.EOF { + t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) + } +} + +func TestTrailingToken(t *testing.T) { + input := `<FOO></FOO> ` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +func TestEntityInsideCDATA(t *testing.T) { + input := `<test><![CDATA[ &val=foo ]]></test>` + d := NewDecoder(strings.NewReader(input)) + var err error + for _, err = d.Token(); err == nil; _, err = d.Token() { + } + if err != io.EOF { + t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) + } +} + +var characterTests = []struct { + in string + err string +}{ + {"\x12<doc/>", "illegal character code U+0012"}, + {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, + {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, + {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, + {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, + {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, + {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, + {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, + {"<doc>&hello;</doc>", "invalid character entity &hello;"}, +} + +func TestDisallowedCharacters(t *testing.T) { + + for i, tt := range characterTests { + d := NewDecoder(strings.NewReader(tt.in)) + var err error + + for err == nil { + _, err = d.Token() + } + synerr, ok := err.(*SyntaxError) + if !ok { + t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) + } + if synerr.Msg != tt.err { + t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) + } + } +} + +func TestIsInCharacterRange(t *testing.T) { + invalid := []rune{ + utf8.MaxRune + 1, + 0xD800, // surrogate min + 0xDFFF, // surrogate max + -1, + } + for _, r := range invalid { + if isInCharacterRange(r) { + t.Errorf("rune %U considered valid", r) + } + } +} + +var procInstTests = []struct { + input string + expect [2]string +}{ + {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, + {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, + {`encoding="FOO" `, [2]string{"", "FOO"}}, +} + +func TestProcInstEncoding(t *testing.T) { + for _, test := range procInstTests { + if got := procInst("version", test.input); got != test.expect[0] { + t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) + } + if got := procInst("encoding", test.input); got != test.expect[1] { + t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) + } + } +} + +// Ensure that directives with comments include the complete +// text of any nested directives. + +var directivesWithCommentsInput = ` +<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> +<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> +<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> +` + +var directivesWithCommentsTokens = []Token{ + CharData("\n"), + Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), + CharData("\n"), + Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), + CharData("\n"), + Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), + CharData("\n"), +} + +func TestDirectivesWithComments(t *testing.T) { + d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) + + for i, want := range directivesWithCommentsTokens { + have, err := d.Token() + if err != nil { + t.Fatalf("token %d: unexpected error: %s", i, err) + } + if !reflect.DeepEqual(have, want) { + t.Errorf("token %d = %#v want %#v", i, have, want) + } + } +} + +// Writer whose Write method always returns an error. +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } + +func TestEscapeTextIOErrors(t *testing.T) { + expectErr := "unwritable" + err := EscapeText(errWriter{}, []byte{'A'}) + + if err == nil || err.Error() != expectErr { + t.Errorf("have %v, want %v", err, expectErr) + } +} + +func TestEscapeTextInvalidChar(t *testing.T) { + input := []byte("A \x00 terminated string.") + expected := "A \uFFFD terminated string." + + buff := new(strings.Builder) + if err := EscapeText(buff, input); err != nil { + t.Fatalf("have %v, want nil", err) + } + text := buff.String() + + if text != expected { + t.Errorf("have %v, want %v", text, expected) + } +} + +func TestIssue5880(t *testing.T) { + type T []byte + data, err := Marshal(T{192, 168, 0, 1}) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !utf8.Valid(data) { + t.Errorf("Marshal generated invalid UTF-8: %x", data) + } +} + +func TestIssue8535(t *testing.T) { + + type ExampleConflict struct { + XMLName Name `xml:"example"` + Link string `xml:"link"` + AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space + } + testCase := `<example> + <title>Example</title> + <link>http://example.com/default</link> <!-- not assigned --> + <link>http://example.com/home</link> <!-- not assigned --> + <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> + </example>` + + var dest ExampleConflict + d := NewDecoder(strings.NewReader(testCase)) + if err := d.Decode(&dest); err != nil { + t.Fatal(err) + } +} + +func TestEncodeXMLNS(t *testing.T) { + testCases := []struct { + f func() ([]byte, error) + want string + ok bool + }{ + {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, + {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, + {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, + } + + for i, tc := range testCases { + if b, err := tc.f(); err == nil { + if got, want := string(b), tc.want; got != want { + t.Errorf("%d: got %s, want %s \n", i, got, want) + } + } else { + t.Errorf("%d: marshal failed with %s", i, err) + } + } +} + +func encodeXMLNS1() ([]byte, error) { + + type T struct { + XMLName Name `xml:"Test"` + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &T{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS2() ([]byte, error) { + + type Test struct { + Body string `xml:"http://example.com/ns body"` + } + + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS3() ([]byte, error) { + + type Test struct { + XMLName Name `xml:"http://example.com/ns Test"` + Body string + } + + //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing + // as documentation states + s := &Test{Body: "hello world"} + return Marshal(s) +} + +func encodeXMLNS4() ([]byte, error) { + + type Test struct { + Ns string `xml:"xmlns,attr"` + Body string + } + + s := &Test{Ns: "http://example.com/ns", Body: "hello world"} + return Marshal(s) +} + +func TestIssue11405(t *testing.T) { + testCases := []string{ + "<root>", + "<root><foo>", + "<root><foo></foo>", + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if _, ok := err.(*SyntaxError); !ok { + t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) + } + } +} + +func TestIssue12417(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, + {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) + } + } +} + +func TestIssue7113(t *testing.T) { + type C struct { + XMLName Name `xml:""` // Sets empty namespace + } + + type D struct { + XMLName Name `xml:"d"` + } + + type A struct { + XMLName Name `xml:""` + C C `xml:""` + D D + } + + var a A + structSpace := "b" + xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>` + t.Log(xmlTest) + err := Unmarshal([]byte(xmlTest), &a) + if err != nil { + t.Fatal(err) + } + + if a.XMLName.Space != structSpace { + t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(a.C.XMLName.Space) != 0 { + t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space) + } + + var b []byte + b, err = Marshal(&a) + if err != nil { + t.Fatal(err) + } + if len(a.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) + } + if string(b) != xmlTest { + t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest) + } + var c A + err = Unmarshal(b, &c) + if err != nil { + t.Fatalf("second Unmarshal failed: %s", err) + } + if c.XMLName.Space != "b" { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) + } + if len(c.C.XMLName.Space) != 0 { + t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) + } +} + +func TestIssue20396(t *testing.T) { + + var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") + + testCases := []struct { + s string + wantErr error + }{ + {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd"/>`, attrError}, + {`<a:te&st xmlns:a="abcd"/>`, attrError}, + {`<a:test xmlns:a="abcd"/>`, nil}, + {`<a:te:st xmlns:a="abcd">1</a:te:st>`, + UnmarshalError("XML syntax error on line 1: expected element name after <")}, + {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, + {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, + {`<a:test xmlns:a="abcd">1</a:test>`, nil}, + } + + var dest string + for _, tc := range testCases { + if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { + if got == nil { + t.Errorf("%s: Unexpected success, want %v", tc.s, want) + } else if want == nil { + t.Errorf("%s: Unexpected error, got %v", tc.s, got) + } else if got.Error() != want.Error() { + t.Errorf("%s: got %v, want %v", tc.s, got, want) + } + } + } +} + +func TestIssue20685(t *testing.T) { + testCases := []struct { + s string + ok bool + }{ + {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, + {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, + {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, + {`<x:book xmlns:x="abcd">one</y:book>`, false}, + {`<x:book>one</y:book>`, false}, + {`<xbook>one</ybook>`, false}, + } + for _, tc := range testCases { + d := NewDecoder(strings.NewReader(tc.s)) + var err error + for { + _, err = d.Token() + if err != nil { + if err == io.EOF { + err = nil + } + break + } + } + if err != nil && tc.ok { + t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) + continue + } + if err == nil && !tc.ok { + t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) + } + } +} + +func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { + return func(src TokenReader) TokenReader { + return mapper{ + t: src, + f: mapping, + } + } +} + +type mapper struct { + t TokenReader + f func(Token) Token +} + +func (m mapper) Token() (Token, error) { + tok, err := m.t.Token() + if err != nil { + return nil, err + } + return m.f(tok), nil +} + +func TestNewTokenDecoderIdempotent(t *testing.T) { + d := NewDecoder(strings.NewReader(`<br>`)) + d2 := NewTokenDecoder(d) + if d != d2 { + t.Error("NewTokenDecoder did not detect underlying Decoder") + } +} + +func TestWrapDecoder(t *testing.T) { + d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) + m := tokenMap(func(t Token) Token { + switch tok := t.(type) { + case StartElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + case EndElement: + if tok.Name.Local == "quote" { + tok.Name.Local = "blocking" + return tok + } + } + return t + }) + + d = NewTokenDecoder(m(d)) + + o := struct { + XMLName Name `xml:"blocking"` + Chardata string `xml:",chardata"` + }{} + + if err := d.Decode(&o); err != nil { + t.Fatal("Got unexpected error while decoding:", err) + } + + if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { + t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) + } +} + +type tokReader struct{} + +func (tokReader) Token() (Token, error) { + return StartElement{}, nil +} + +type Failure struct{} + +func (Failure) UnmarshalXML(*Decoder, StartElement) error { + return nil +} + +func TestTokenUnmarshaler(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Error("Unexpected panic using custom token unmarshaler") + } + }() + + d := NewTokenDecoder(tokReader{}) + d.Decode(&Failure{}) +} + +func testRoundTrip(t *testing.T, input string) { + d := NewDecoder(strings.NewReader(input)) + var tokens []Token + var buf bytes.Buffer + e := NewEncoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("invalid input: %v", err) + } + if err := e.EncodeToken(tok); err != nil { + t.Fatalf("failed to re-encode input: %v", err) + } + tokens = append(tokens, CopyToken(tok)) + } + if err := e.Flush(); err != nil { + t.Fatal(err) + } + + d = NewDecoder(&buf) + for { + tok, err := d.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("failed to decode output: %v", err) + } + if len(tokens) == 0 { + t.Fatalf("unexpected token: %#v", tok) + } + a, b := tokens[0], tok + if !reflect.DeepEqual(a, b) { + t.Fatalf("token mismatch: %#v vs %#v", a, b) + } + tokens = tokens[1:] + } + if len(tokens) > 0 { + t.Fatalf("lost tokens: %#v", tokens) + } +} + +func TestRoundTrip(t *testing.T) { + tests := map[string]string{ + "trailing colon": `<foo abc:="x"></foo>`, + "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, + } + for name, input := range tests { + t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) + } +} + +func TestParseErrors(t *testing.T) { + withDefaultHeader := func(s string) string { + return `<?xml version="1.0" encoding="UTF-8"?>` + s + } + tests := []struct { + src string + err string + }{ + {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, + {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, + {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, + {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, + {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, + {withDefaultHeader("\xf1"), `invalid UTF-8`}, + + // Header-related errors. + {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, + + // Cases below are for "no errors". + {withDefaultHeader(`<?ok?>`), ``}, + {withDefaultHeader(`<?ok version="ok"?>`), ``}, + } + + for _, test := range tests { + d := NewDecoder(strings.NewReader(test.src)) + var err error + for { + _, err = d.Token() + if err != nil { + break + } + } + if test.err == "" { + if err != io.EOF { + t.Errorf("parse %s: have %q error, expected none", test.src, err) + } + continue + } + // Inv: err != nil + if err == io.EOF { + t.Errorf("parse %s: unexpected EOF", test.src) + continue + } + if !strings.Contains(err.Error(), test.err) { + t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) + continue + } + } +} + +const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> +<br> +<br/><br/> +<br><br> +<br></br> +<BR> +<BR/><BR/> +<Br></Br> +<BR><span id="test">abc</span><br/><br/>` + +func BenchmarkHTMLAutoClose(b *testing.B) { + b.RunParallel(func(p *testing.PB) { + for p.Next() { + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + for { + _, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + b.Fatalf("unexpected error: %v", err) + } + } + } + }) +} + +func TestHTMLAutoClose(t *testing.T) { + wantTokens := []Token{ + ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + CharData("\n"), + StartElement{Name{"", "Br"}, []Attr{}}, + EndElement{Name{"", "Br"}}, + CharData("\n"), + StartElement{Name{"", "BR"}, []Attr{}}, + EndElement{Name{"", "BR"}}, + StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, + CharData("abc"), + EndElement{Name{"", "span"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + StartElement{Name{"", "br"}, []Attr{}}, + EndElement{Name{"", "br"}}, + } + + d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) + d.Strict = false + d.AutoClose = HTMLAutoClose + d.Entity = HTMLEntity + var haveTokens []Token + for { + tok, err := d.Token() + if err != nil { + if err == io.EOF { + break + } + t.Fatalf("unexpected error: %v", err) + } + haveTokens = append(haveTokens, CopyToken(tok)) + } + if len(haveTokens) != len(wantTokens) { + t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) + } + for i, want := range wantTokens { + if i >= len(haveTokens) { + t.Errorf("token[%d] expected %#v, have no token", i, want) + } else { + have := haveTokens[i] + if !reflect.DeepEqual(have, want) { + t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) + } + } + } +} |