diff --git a/CHANGELOG.md b/CHANGELOG.md index 95c434d..4ef8a73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed a bug in YAML and TOML parsers that caused them to fail when parsing non-base10 numbers (e.g. hex, binary, octal). - Fixed a bug in the `toInt` function that caused it to fail when parsing non-base10 numbers (e.g. hex, binary, octal). - XML child element ordering now has more comprehensive round-trip handling. Thanks @takeokunn. +- XML writer now validates element and attribute names per the XML 1.0 spec, returning a clear error for invalid names instead of producing malformed XML. Thanks @lawrence3699. ## [v3.4.1] - 2026-03-30 diff --git a/parsing/xml/writer.go b/parsing/xml/writer.go index 0539241..6efb3a6 100644 --- a/parsing/xml/writer.go +++ b/parsing/xml/writer.go @@ -5,11 +5,62 @@ import ( "encoding/xml" "fmt" "strings" + "unicode/utf8" "github.com/tomwright/dasel/v3/model" "github.com/tomwright/dasel/v3/parsing" ) +// isValidXMLNameStartChar reports whether r is a valid first character of an +// XML Name, per the XML 1.0 (Fifth Edition) specification §2.3. +func isValidXMLNameStartChar(r rune) bool { + return r == ':' || r == '_' || + (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= 0xC0 && r <= 0xD6) || + (r >= 0xD8 && r <= 0xF6) || + (r >= 0xF8 && r <= 0x2FF) || + (r >= 0x370 && r <= 0x37D) || + (r >= 0x37F && r <= 0x1FFF) || + (r >= 0x200C && r <= 0x200D) || + (r >= 0x2070 && r <= 0x218F) || + (r >= 0x2C00 && r <= 0x2FEF) || + (r >= 0x3001 && r <= 0xD7FF) || + (r >= 0xF900 && r <= 0xFDCF) || + (r >= 0xFDF0 && r <= 0xFFFD) || + (r >= 0x10000 && r <= 0xEFFFF) +} + +// isValidXMLNameChar reports whether r is a valid subsequent character of an +// XML Name, per the XML 1.0 (Fifth Edition) specification §2.3. +func isValidXMLNameChar(r rune) bool { + return isValidXMLNameStartChar(r) || + r == '-' || r == '.' || + (r >= '0' && r <= '9') || + r == 0xB7 || + (r >= 0x0300 && r <= 0x036F) || + (r >= 0x203F && r <= 0x2040) +} + +// isValidXMLName reports whether s is a valid XML element or attribute name. +func isValidXMLName(s string) bool { + if s == "" { + return false + } + first, size := utf8.DecodeRuneInString(s) + if (first == utf8.RuneError && size == 1) || !isValidXMLNameStartChar(first) { + return false + } + for i := size; i < len(s); { + r, w := utf8.DecodeRuneInString(s[i:]) + if (r == utf8.RuneError && w == 1) || !isValidXMLNameChar(r) { + return false + } + i += w + } + return true +} + func newXMLWriter(options parsing.WriterOptions) (parsing.Writer, error) { return &xmlWriter{ options: options, @@ -60,6 +111,9 @@ func (j *xmlWriter) Write(value *model.Value) ([]byte, error) { } func (j *xmlWriter) toElement(key string, value *model.Value) (*xmlElement, error) { + if !isValidXMLName(key) { + return nil, fmt.Errorf("key %q is not a valid XML element name", key) + } readProcessingInstructions := func() []*xmlProcessingInstruction { if piMeta, ok := value.MetadataValue("xml_processing_instructions"); ok && piMeta != nil { if pis, ok := piMeta.([]*xmlProcessingInstruction); ok { @@ -151,8 +205,12 @@ func (j *xmlWriter) toElement(key string, value *model.Value) (*xmlElement, erro func extractAttrsAndText(kvs []model.KeyValue, el *xmlElement) error { for _, kv := range kvs { if strings.HasPrefix(kv.Key, "-") { + attrName := kv.Key[1:] + if !isValidXMLName(attrName) { + return fmt.Errorf("invalid XML attribute name %q from map key %q", attrName, kv.Key) + } attr := xmlAttr{ - Name: kv.Key[1:], + Name: attrName, } var err error attr.Value, err = valueToString(kv.Value) diff --git a/parsing/xml/writer_internal_test.go b/parsing/xml/writer_internal_test.go index bea71e8..7102271 100644 --- a/parsing/xml/writer_internal_test.go +++ b/parsing/xml/writer_internal_test.go @@ -70,6 +70,23 @@ func TestXmlWriter_CommentValidation(t *testing.T) { }) } +// Test_isValidXMLName tests XML name validation per the XML 1.0 spec. +func Test_isValidXMLName(t *testing.T) { + valid := []string{"foo", "Foo", "_bar", "ns:local", "a1", "a-b", "a.b", "über"} + for _, name := range valid { + if !isValidXMLName(name) { + t.Errorf("Expected %q to be a valid XML name", name) + } + } + + invalid := []string{"", "<", ">", "&", "foo bar", "1abc", "-abc", ".abc", "ab", "a&b"} + for _, name := range invalid { + if isValidXMLName(name) { + t.Errorf("Expected %q to be an invalid XML name", name) + } + } +} + // Test_valueToString tests the valueToString function for all supported types func Test_valueToString(t *testing.T) { t.Run("null value returns empty string", func(t *testing.T) { diff --git a/parsing/xml/writer_test.go b/parsing/xml/writer_test.go index 0dcf6ae..10880a4 100644 --- a/parsing/xml/writer_test.go +++ b/parsing/xml/writer_test.go @@ -260,6 +260,76 @@ func TestXmlReader_Write(t *testing.T) { } }) + t.Run("invalid element name", func(t *testing.T) { + w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) + if err != nil { + t.Fatalf("Unexpected error: %s", err) + } + + toEncode := model.NewMapValue() + _ = toEncode.SetMapKey("<", model.NewStringValue("value")) + _, err = w.Write(toEncode) + if err == nil { + t.Fatal("Expected error for invalid XML element name, got nil") + } + if !strings.Contains(err.Error(), `"<"`) || !strings.Contains(err.Error(), "not a valid XML element name") { + t.Fatalf("Expected error to mention the offending key, got: %s", err) + } + }) + + t.Run("invalid element name ampersand", func(t *testing.T) { + w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) + if err != nil { + t.Fatalf("Unexpected error: %s", err) + } + + toEncode := model.NewMapValue() + _ = toEncode.SetMapKey("&", model.NewStringValue("value")) + _, err = w.Write(toEncode) + if err == nil { + t.Fatal("Expected error for invalid XML element name, got nil") + } + if !strings.Contains(err.Error(), `"&"`) || !strings.Contains(err.Error(), "not a valid XML element name") { + t.Fatalf("Expected error to mention the offending key, got: %s", err) + } + }) + + t.Run("invalid element name with space", func(t *testing.T) { + w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) + if err != nil { + t.Fatalf("Unexpected error: %s", err) + } + + toEncode := model.NewMapValue() + _ = toEncode.SetMapKey("foo bar", model.NewStringValue("value")) + _, err = w.Write(toEncode) + if err == nil { + t.Fatal("Expected error for invalid XML element name, got nil") + } + if !strings.Contains(err.Error(), `"foo bar"`) || !strings.Contains(err.Error(), "not a valid XML element name") { + t.Fatalf("Expected error to mention the offending key, got: %s", err) + } + }) + + t.Run("invalid attribute name", func(t *testing.T) { + w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) + if err != nil { + t.Fatalf("Unexpected error: %s", err) + } + + toEncode := model.NewMapValue() + child := model.NewMapValue() + _ = child.SetMapKey("-<", model.NewStringValue("value")) + _ = toEncode.SetMapKey("foo", child) + _, err = w.Write(toEncode) + if err == nil { + t.Fatal("Expected error for invalid XML attribute name, got nil") + } + if !strings.Contains(err.Error(), "invalid XML attribute name") || !strings.Contains(err.Error(), `"-<"`) { + t.Fatalf("Expected error to mention the offending attribute key, got: %s", err) + } + }) + t.Run("encode cdata", func(t *testing.T) { w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) if err != nil {