-
-
Notifications
You must be signed in to change notification settings - Fork 166
fix(xml): reject invalid XML element and attribute names #536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,11 +5,62 @@ import ( | |
| "encoding/xml" | ||
| "fmt" | ||
| "strings" | ||
| "unicode/utf8" | ||
|
|
||
| "github.com/tomwright/dasel/v3/model" | ||
| "github.com/tomwright/dasel/v3/parsing" | ||
| ) | ||
|
|
||
| // isValidXMLNameStartChar reports whether r is a valid first character of an | ||
| // XML Name, per the XML 1.0 (Fifth Edition) specification §2.3. | ||
| func isValidXMLNameStartChar(r rune) bool { | ||
| return r == ':' || r == '_' || | ||
| (r >= 'A' && r <= 'Z') || | ||
| (r >= 'a' && r <= 'z') || | ||
| (r >= 0xC0 && r <= 0xD6) || | ||
| (r >= 0xD8 && r <= 0xF6) || | ||
| (r >= 0xF8 && r <= 0x2FF) || | ||
| (r >= 0x370 && r <= 0x37D) || | ||
| (r >= 0x37F && r <= 0x1FFF) || | ||
| (r >= 0x200C && r <= 0x200D) || | ||
| (r >= 0x2070 && r <= 0x218F) || | ||
| (r >= 0x2C00 && r <= 0x2FEF) || | ||
| (r >= 0x3001 && r <= 0xD7FF) || | ||
| (r >= 0xF900 && r <= 0xFDCF) || | ||
| (r >= 0xFDF0 && r <= 0xFFFD) || | ||
| (r >= 0x10000 && r <= 0xEFFFF) | ||
| } | ||
|
|
||
| // isValidXMLNameChar reports whether r is a valid subsequent character of an | ||
| // XML Name, per the XML 1.0 (Fifth Edition) specification §2.3. | ||
| func isValidXMLNameChar(r rune) bool { | ||
| return isValidXMLNameStartChar(r) || | ||
| r == '-' || r == '.' || | ||
| (r >= '0' && r <= '9') || | ||
| r == 0xB7 || | ||
| (r >= 0x0300 && r <= 0x036F) || | ||
| (r >= 0x203F && r <= 0x2040) | ||
| } | ||
|
|
||
| // isValidXMLName reports whether s is a valid XML element or attribute name. | ||
| func isValidXMLName(s string) bool { | ||
| if s == "" { | ||
| return false | ||
| } | ||
| first, size := utf8.DecodeRuneInString(s) | ||
| if (first == utf8.RuneError && size == 1) || !isValidXMLNameStartChar(first) { | ||
| return false | ||
| } | ||
| for i := size; i < len(s); { | ||
| r, w := utf8.DecodeRuneInString(s[i:]) | ||
| if (r == utf8.RuneError && w == 1) || !isValidXMLNameChar(r) { | ||
| return false | ||
| } | ||
| i += w | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| func newXMLWriter(options parsing.WriterOptions) (parsing.Writer, error) { | ||
| return &xmlWriter{ | ||
| options: options, | ||
|
|
@@ -53,6 +104,9 @@ func (j *xmlWriter) Write(value *model.Value) ([]byte, error) { | |
| } | ||
|
|
||
| func (j *xmlWriter) toElement(key string, value *model.Value) (*xmlElement, error) { | ||
| if !isValidXMLName(key) { | ||
| return nil, fmt.Errorf("key %q is not a valid XML element name", key) | ||
| } | ||
| readProcessingInstructions := func() []*xmlProcessingInstruction { | ||
| if piMeta, ok := value.MetadataValue("xml_processing_instructions"); ok && piMeta != nil { | ||
| if pis, ok := piMeta.([]*xmlProcessingInstruction); ok { | ||
|
|
@@ -144,8 +198,12 @@ func (j *xmlWriter) toElement(key string, value *model.Value) (*xmlElement, erro | |
| func extractAttrsAndText(kvs []model.KeyValue, el *xmlElement) error { | ||
| for _, kv := range kvs { | ||
| if strings.HasPrefix(kv.Key, "-") { | ||
| attrName := kv.Key[1:] | ||
| if !isValidXMLName(attrName) { | ||
| return fmt.Errorf("key %q is not a valid XML attribute name", attrName) | ||
| } | ||
|
Comment on lines
207
to
+211
|
||
| attr := xmlAttr{ | ||
| Name: kv.Key[1:], | ||
| Name: attrName, | ||
| } | ||
| var err error | ||
| attr.Value, err = valueToString(kv.Value) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,6 +151,48 @@ func TestXmlReader_Write(t *testing.T) { | |
| } | ||
| }) | ||
|
|
||
| t.Run("invalid element name", func(t *testing.T) { | ||
| w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) | ||
| if err != nil { | ||
| t.Fatalf("Unexpected error: %s", err) | ||
| } | ||
|
|
||
| toEncode := model.NewMapValue() | ||
| _ = toEncode.SetMapKey("<", model.NewStringValue("value")) | ||
| _, err = w.Write(toEncode) | ||
| if err == nil { | ||
| t.Fatal("Expected error for invalid XML element name, got nil") | ||
| } | ||
| }) | ||
|
Comment on lines
+263
to
+278
|
||
|
|
||
| t.Run("invalid element name ampersand", func(t *testing.T) { | ||
| w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) | ||
| if err != nil { | ||
| t.Fatalf("Unexpected error: %s", err) | ||
| } | ||
|
|
||
| toEncode := model.NewMapValue() | ||
| _ = toEncode.SetMapKey("&", model.NewStringValue("value")) | ||
| _, err = w.Write(toEncode) | ||
| if err == nil { | ||
| t.Fatal("Expected error for invalid XML element name, got nil") | ||
| } | ||
| }) | ||
|
|
||
| t.Run("invalid element name with space", func(t *testing.T) { | ||
| w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) | ||
| if err != nil { | ||
| t.Fatalf("Unexpected error: %s", err) | ||
| } | ||
|
|
||
| toEncode := model.NewMapValue() | ||
| _ = toEncode.SetMapKey("foo bar", model.NewStringValue("value")) | ||
| _, err = w.Write(toEncode) | ||
| if err == nil { | ||
| t.Fatal("Expected error for invalid XML element name, got nil") | ||
| } | ||
| }) | ||
|
|
||
| t.Run("encode cdata", func(t *testing.T) { | ||
| w, err := xml.XML.NewWriter(parsing.DefaultWriterOptions()) | ||
| if err != nil { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The attribute-name error message reports the stripped attribute name as a “key” (e.g. input key "-lang" would produce an error about key "lang"). That can be confusing for users debugging map keys. Consider wording the message as an attribute name error and/or include the original map key (including the leading "-") in the error details.