Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions pkg/yqlib/base64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,13 @@ var base64Scenarios = []formatScenario{
scenarioType: "decode",
},
{
skipDoc: true,
description: "decode yaml document",
input: base64EncodedYaml,
expected: base64DecodedYaml + "\n",
skipDoc: true,
description: "decode yaml document",
input: base64EncodedYaml,
// The decoded payload ("a: apple\n") would re-parse as a map if
// emitted bare, so the yaml encoder keeps it as a block literal to
// preserve roundtrip safety. See issue #2608.
expected: "|\n a: apple\n",
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what worries me:
a) how would you now emit this as a bare string if you wanted to;
b) this change could break a whole bunch of scripts / CI/CD pipelines for people

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's a valid concern. A couple thoughts:

(a) Users can still get the raw value with -o=raw / --outputformat=raw, which bypasses the yaml encoder entirely. So there is an escape hatch.

(b) On breakage — this only kicks in for strings whose bare form re-parses as a map or sequence (like a: b or - item). Plain strings, numbers, bools are untouched. So it's a pretty narrow change, but I get that even narrow changes can bite someone.

If you'd prefer a flag to opt into this behavior instead of making it the default, I can rework it that way.

scenarioType: "decode",
},
{
Expand Down
27 changes: 26 additions & 1 deletion pkg/yqlib/encoder_yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func (ye *yamlEncoder) Encode(writer io.Writer, node *CandidateNode) error {
if strings.Contains(node.LeadingContent, "\r\n") {
lineEnding = "\r\n"
}
if node.Kind == ScalarNode && ye.prefs.UnwrapScalar {
if node.Kind == ScalarNode && ye.prefs.UnwrapScalar && !bareStringNeedsQuoting(node) {
valueToPrint := node.Value
if node.LeadingContent == "" || valueToPrint != "" {
valueToPrint = valueToPrint + lineEnding
Expand Down Expand Up @@ -78,3 +78,28 @@ func (ye *yamlEncoder) Encode(writer io.Writer, node *CandidateNode) error {
}
return nil
}

// bareStringNeedsQuoting reports whether a top-level string scalar would be
// structurally reinterpreted if emitted as an unquoted bare value. The
// unwrap-scalar fast-path writes node.Value verbatim, which silently turns a
// string like "this: should really work" into a mapping on the next read, or
// "- item" into a sequence. When this returns true the caller falls through
// to the full yaml encoder, which applies the quoting style required by the
// YAML spec. Scalar-to-scalar reinterpretations (e.g. "123" parsing as an int
// tag) are not covered here: they preserve the node shape and are handled by
// callers that care about explicit tag preservation.
func bareStringNeedsQuoting(node *CandidateNode) bool {
if node.Tag != "!!str" {
return false
}
var parsed yaml.Node
if err := yaml.Unmarshal([]byte(node.Value), &parsed); err != nil {
// Unparseable bare form (e.g. control characters): leave it on the
// fast-path so callers that check for those characters still see them.
return false
}
if parsed.Kind != yaml.DocumentNode || len(parsed.Content) != 1 {
return false
}
return parsed.Content[0].Kind != yaml.ScalarNode
}
84 changes: 84 additions & 0 deletions pkg/yqlib/encoder_yaml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package yqlib

import (
"bytes"
"strings"
"testing"
)

// TestYamlEncoderUnwrapScalarRoundtripSafety verifies that a top-level string
// scalar whose unquoted form would re-parse as a non-scalar node (map or
// sequence) is emitted quoted even when UnwrapScalar is enabled. Safe plain
// strings continue to round-trip through the existing fast-path. See #2608.
func TestYamlEncoderUnwrapScalarRoundtripSafety(t *testing.T) {
cases := []struct {
name string
value string
wantBare bool // true: output equals value+"\n"; false: output must differ
}{
{name: "colon_parses_as_map", value: "this: should really work"},
{name: "dash_parses_as_seq", value: "- item"},
{name: "multiline_maplike", value: "a: a\nb: b"},
{name: "safe_plain_string", value: "hello world", wantBare: true},
{name: "safe_identifier", value: "cat", wantBare: true},
{name: "safe_digits_preserved", value: "123", wantBare: true},
{name: "safe_null_word_preserved", value: "null", wantBare: true},
{name: "safe_tag_shorthand_preserved", value: "!!int", wantBare: true},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
prefs := NewDefaultYamlPreferences()
prefs.UnwrapScalar = true

var buf bytes.Buffer
err := NewYamlEncoder(prefs).Encode(&buf, &CandidateNode{
Kind: ScalarNode,
Tag: "!!str",
Value: tc.value,
})
if err != nil {
t.Fatalf("encode failed: %v", err)
}
got := buf.String()

if tc.wantBare {
if got != tc.value+"\n" {
t.Fatalf("expected bare %q, got %q", tc.value+"\n", got)
}
return
}

// Ambiguous input: must not be emitted as the bare value.
if got == tc.value+"\n" {
t.Fatalf("value %q was emitted bare; expected quoted form", tc.value)
}

// The output must round-trip back to a string scalar with the
// same value, proving structural roundtrip safety.
decoder := NewYamlDecoder(NewDefaultYamlPreferences())
nodes, err := readDocuments(strings.NewReader(got), "test.yaml", 0, decoder)
if err != nil {
t.Fatalf("decode of %q failed: %v", got, err)
}
if nodes.Len() != 1 {
t.Fatalf("expected one document, got %d", nodes.Len())
}
candidate := nodes.Front().Value.(*CandidateNode)
// readDocuments wraps the document; descend to the scalar.
scalar := candidate
for scalar.Kind != ScalarNode && len(scalar.Content) == 1 {
scalar = scalar.Content[0]
}
if scalar.Kind != ScalarNode {
t.Fatalf("round-tripped node is not a scalar: kind=%v value=%q", scalar.Kind, scalar.Value)
}
if scalar.Tag != "!!str" {
t.Fatalf("round-tripped tag is %q, want !!str", scalar.Tag)
}
if scalar.Value != tc.value {
t.Fatalf("round-tripped value is %q, want %q", scalar.Value, tc.value)
}
})
}
}