diff --git a/README.md b/README.md index 750bb33..31048f6 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,42 @@ Re-template and update generated file in place (this will overwrite it): talm template -f nodes/node1.yaml -I ``` +> **Per-node patches inside node files.** A node file can carry Talos config +> below its modeline (for example, a custom `hostname`, secondary +> interfaces with `deviceSelector`, VIP placement, or extra etcd args). +> When `talm apply -f node.yaml` runs the template-rendering branch, that +> body is applied as a strategic merge patch on top of the rendered +> template before the result is sent to the node — so per-node fields +> survive even when the template auto-generates conflicting values +> (e.g. `hostname: talos-XXXXX`). +> +> **Talos v1.12+ caveat.** The multi-document output format introduced +> in v1.12 splits network configuration into typed documents +> (`LinkConfig`, `BondConfig`, `VLANConfig`, `Layer2VIPConfig`, +> `HostnameConfig`, `ResolverConfig`). Legacy node-body fields under +> `machine.network.interfaces` have no safe 1:1 mapping to those types, +> so the multi-doc path does not translate them — if you target a +> v1.12+ Talos node, pin per-node network settings by patching the +> typed resources (e.g. a `LinkConfig` document below the modeline) +> rather than legacy `machine.network.interfaces`. Fields outside the +> network area (`machine.network.hostname` via `HostnameConfig`, +> `machine.install.disk`, extra etcd args, etc.) still merge as +> expected. +> +> **One body, one node.** A non-empty body is a per-node pin, so the +> modeline for that file must target exactly one node. `talm apply` +> refuses a multi-node modeline when the body is non-empty; modeline- +> only files (no body) are still allowed and drive the same rendered +> template on every listed target. +> +> `talm template -f node.yaml` (with or without `-I`) does **not** apply +> the same overlay: its output is the rendered template plus the modeline +> and the auto-generated warning, byte-identical to what the template +> alone would produce. Routing it through the patcher would drop every +> YAML comment (including the modeline) and re-sort keys, breaking +> downstream commands that read the file back. Use `apply --dry-run` if +> you want to preview the exact bytes that will be sent to the node. + ## Using talosctl commands Talm offers a similar set of commands to those provided by talosctl. diff --git a/pkg/commands/apply.go b/pkg/commands/apply.go index 989c91c..0ffbfc8 100644 --- a/pkg/commands/apply.go +++ b/pkg/commands/apply.go @@ -113,20 +113,16 @@ func apply(args []string) error { withSecretsPath := ResolveSecretsPath(applyCmdFlags.withSecrets) if len(modelineTemplates) > 0 { - // Template rendering path: connect to the node first, render templates - // online (so lookup() functions resolve real discovery data), then apply. + // Template rendering path: render templates online per node and + // apply the rendered config plus the node file overlay. See + // applyTemplatesPerNode for why the loop is mandatory. opts := buildApplyRenderOptions(modelineTemplates, withSecretsPath) + nodes := append([]string(nil), GlobalArgs.Nodes...) + fmt.Printf("- talm: file=%s, nodes=%s, endpoints=%s\n", configFile, GlobalArgs.Nodes, GlobalArgs.Endpoints) - err = withApplyClient(func(ctx context.Context, c *client.Client) error { - fmt.Printf("- talm: file=%s, nodes=%s, endpoints=%s\n", configFile, GlobalArgs.Nodes, GlobalArgs.Endpoints) - - result, err := engine.Render(ctx, c, opts) - if err != nil { - return fmt.Errorf("template rendering error: %w", err) - } - + applyClosure := func(ctx context.Context, c *client.Client, data []byte) error { resp, err := c.ApplyConfiguration(ctx, &machineapi.ApplyConfigurationRequest{ - Data: result, + Data: data, Mode: applyCmdFlags.Mode.Mode, DryRun: applyCmdFlags.dryRun, TryModeTimeout: durationpb.New(applyCmdFlags.configTryTimeout), @@ -134,11 +130,24 @@ func apply(args []string) error { if err != nil { return fmt.Errorf("error applying new configuration: %w", err) } - helpers.PrintApplyResults(resp) - return nil - }) + } + + if applyCmdFlags.insecure { + openClient := openClientPerNodeMaintenance(applyCmdFlags.certFingerprints, WithClientMaintenance) + if err := applyTemplatesPerNode(opts, configFile, nodes, openClient, engine.Render, applyClosure); err != nil { + return err + } + } else { + if err := withApplyClientBare(func(parentCtx context.Context, c *client.Client) error { + resolved := resolveAuthTemplateNodes(nodes, c) + openClient := openClientPerNodeAuth(parentCtx, c) + return applyTemplatesPerNode(opts, configFile, resolved, openClient, engine.Render, applyClosure) + }); err != nil { + return err + } + } } else { // Direct patch path: apply config file as patch against empty bundle opts := buildApplyPatchOptions(withSecretsPath) @@ -173,9 +182,6 @@ func apply(args []string) error { return err } } - if err != nil { - return err - } // Reset args if !applyCmdFlags.nodesFromArgs { @@ -188,23 +194,170 @@ func apply(args []string) error { return nil } -// withApplyClient creates a Talos client appropriate for the current apply mode -// and invokes the given action with it. +// withApplyClient creates a Talos client appropriate for the current apply +// mode and invokes the given action with it. The action receives a context +// in which gRPC node metadata is set to the resolved node list — either +// GlobalArgs.Nodes (when set) or the talosconfig context's Nodes (when not). +// Used by the direct-patch branch where multi-node fan-out happens at the +// gRPC layer inside ApplyConfiguration. func withApplyClient(f func(ctx context.Context, c *client.Client) error) error { + return withApplyClientBare(wrapWithNodeContext(f)) +} + +// withApplyClientBare connects to Talos for the current apply mode but does +// NOT inject node metadata into the context — leaving that decision to the +// caller. Used by the template-rendering path (see applyTemplatesPerNode for +// the rationale). +func withApplyClientBare(f func(ctx context.Context, c *client.Client) error) error { if applyCmdFlags.insecure { - // Maintenance mode connects directly to the node IP without talosconfig; - // node context injection is not needed — the maintenance client handles - // node targeting internally via GlobalArgs.Nodes. + // Maintenance mode reads its endpoints directly from + // GlobalArgs.Nodes — gRPC node metadata is not consulted. return WithClientMaintenance(applyCmdFlags.certFingerprints, f) } - wrappedF := wrapWithNodeContext(f) - if GlobalArgs.SkipVerify { - return WithClientSkipVerify(wrappedF) + return WithClientSkipVerify(f) + } + + return WithClientNoNodes(f) +} + +// renderFunc, applyFunc and openClientFunc are injection points for +// applyTemplatesPerNode so unit tests can drive the loop with fakes instead +// of a real Talos client. +type renderFunc func(ctx context.Context, c *client.Client, opts engine.Options) ([]byte, error) +type applyFunc func(ctx context.Context, c *client.Client, data []byte) error + +// openClientFunc opens a Talos client suitable for a single node and runs +// action with it. Authenticated mode reuses one parent client and rotates +// the node via single-target gRPC metadata (client.WithNode); insecure +// (maintenance) mode opens a fresh single-endpoint client per node because +// Talos's maintenance client ignores node metadata in the context and +// round-robins between its configured endpoints. +type openClientFunc func(node string, action func(ctx context.Context, c *client.Client) error) error + +// applyTemplatesPerNode runs render → MergeFileAsPatch → apply once per +// node. Two reasons it has to iterate: +// +// - engine.Render's FailIfMultiNodes guard rejects a context that carries +// more than one node, so the auth-mode caller has to attach a single +// node per iteration — and discovery via lookup() should resolve each +// node's own topology in any case. +// - In insecure (maintenance) mode the client connects directly to a +// Talos node and ignores nodes-in-context entirely, so each node needs +// its own client; otherwise gRPC round-robins ApplyConfiguration +// across the endpoint list and most nodes never see the config. +// +// Both modes share this loop via openClient. +func applyTemplatesPerNode( + opts engine.Options, + configFile string, + nodes []string, + openClient openClientFunc, + render renderFunc, + apply applyFunc, +) error { + if len(nodes) == 0 { + return fmt.Errorf("nodes are not set for the command: please use '--nodes' flag, the node file modeline, or talosconfig context to set the nodes to run the command against") + } + // A node-file body (hostname, address, VIP, etc.) is a per-node + // pin. Replaying it across multiple targets would stamp the same + // value onto every machine, so reject this combination early and + // ask the user to split the file. Modeline-only files (no body) + // are fine — they just carry the target list. + if len(nodes) > 1 { + hasOverlay, err := engine.NodeFileHasOverlay(configFile) + if err != nil { + return err + } + if hasOverlay { + return fmt.Errorf( + "node file %s targets %d nodes (%v) but carries a non-empty per-node body; the same body would be stamped onto every node. Split it into one file per node, or remove the per-node fields if you want the rendered template alone applied to each", + configFile, len(nodes), nodes, + ) + } + } + for _, node := range nodes { + if err := openClient(node, func(ctx context.Context, c *client.Client) error { + return renderMergeAndApply(ctx, c, opts, configFile, render, apply) + }); err != nil { + return fmt.Errorf("node %s: %w", node, err) + } + } + return nil +} + +// maintenanceClientFunc is the contract WithClientMaintenance satisfies in +// production and a fake satisfies in tests. Injection lets the unit tests +// run the real openClientPerNodeMaintenance body without dialing a Talos +// node. +type maintenanceClientFunc func(fingerprints []string, action func(ctx context.Context, c *client.Client) error) error + +// openClientPerNodeMaintenance returns an openClientFunc that opens a +// fresh single-endpoint maintenance client per node. Multi-node insecure +// apply (first bootstrap of a multi-node cluster) needs this because +// WithClientMaintenance creates a client with all endpoints and gRPC then +// round-robins ApplyConfiguration across them — most nodes never see the +// config. Narrowing GlobalArgs.Nodes to the current iteration's node and +// restoring it via defer keeps the wrapper's signature unchanged. +// +// mkClient is normally WithClientMaintenance; tests pass a fake that +// captures the GlobalArgs.Nodes value at the moment WithClientMaintenance +// would have read it. +func openClientPerNodeMaintenance(fingerprints []string, mkClient maintenanceClientFunc) openClientFunc { + return func(node string, action func(ctx context.Context, c *client.Client) error) error { + savedNodes := append([]string(nil), GlobalArgs.Nodes...) + GlobalArgs.Nodes = []string{node} + defer func() { GlobalArgs.Nodes = savedNodes }() + return mkClient(fingerprints, action) + } +} + +// openClientPerNodeAuth returns an openClientFunc that reuses one +// authenticated client (the one withApplyClientBare opened above this +// callback) and rotates the addressed node via client.WithNode on the +// per-iteration context. WithNode (rather than WithNodes) sets the +// "node" metadata key for single-target proxying, which engine.Render's +// FailIfMultiNodes guard treats as one node. +func openClientPerNodeAuth(parentCtx context.Context, c *client.Client) openClientFunc { + return func(node string, action func(ctx context.Context, c *client.Client) error) error { + return action(client.WithNode(parentCtx, node), c) } +} - return WithClientNoNodes(wrappedF) +// resolveAuthTemplateNodes returns the node list the authenticated +// template-rendering path should iterate over. cliNodes (from --nodes +// or the modeline) takes precedence; when empty, the talosconfig +// context's Nodes are used so a user who already ran `talosctl config +// node ` does not have to repeat the node list on every `talm +// apply`. Mirrors wrapWithNodeContext's fallback for the direct-patch +// branch. Insecure mode does not call this helper — maintenance +// clients talk to node IPs directly and have no talosconfig context. +func resolveAuthTemplateNodes(cliNodes []string, c *client.Client) []string { + if len(cliNodes) > 0 { + return cliNodes + } + if c == nil { + return nil + } + cfg := c.GetConfigContext() + if cfg == nil { + return nil + } + return append([]string(nil), cfg.Nodes...) +} + +// renderMergeAndApply is the per-node body shared by every apply mode. +func renderMergeAndApply(ctx context.Context, c *client.Client, opts engine.Options, configFile string, render renderFunc, apply applyFunc) error { + rendered, err := render(ctx, c, opts) + if err != nil { + return fmt.Errorf("template rendering: %w", err) + } + merged, err := engine.MergeFileAsPatch(rendered, configFile) + if err != nil { + return fmt.Errorf("merging node file as patch: %w", err) + } + return apply(ctx, c, merged) } // buildApplyRenderOptions constructs engine.Options for the template rendering path. @@ -214,7 +367,6 @@ func withApplyClient(f func(ctx context.Context, c *client.Client) error) error func buildApplyRenderOptions(modelineTemplates []string, withSecretsPath string) engine.Options { resolvedTemplates := resolveTemplatePaths(modelineTemplates, Config.RootDir) return engine.Options{ - Insecure: applyCmdFlags.insecure, TalosVersion: applyCmdFlags.talosVersion, WithSecrets: withSecretsPath, KubernetesVersion: applyCmdFlags.kubernetesVersion, @@ -222,6 +374,7 @@ func buildApplyRenderOptions(modelineTemplates []string, withSecretsPath string) Full: true, Root: Config.RootDir, TemplateFiles: resolvedTemplates, + CommandName: "talm apply", } } diff --git a/pkg/commands/apply_test.go b/pkg/commands/apply_test.go index e60ec3a..1c9849c 100644 --- a/pkg/commands/apply_test.go +++ b/pkg/commands/apply_test.go @@ -5,8 +5,10 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" + "github.com/cozystack/talm/pkg/engine" "github.com/siderolabs/talos/pkg/machinery/client" "google.golang.org/grpc/metadata" ) @@ -15,20 +17,17 @@ func TestBuildApplyRenderOptions(t *testing.T) { origTalosVersion := applyCmdFlags.talosVersion origKubeVersion := applyCmdFlags.kubernetesVersion origDebug := applyCmdFlags.debug - origInsecure := applyCmdFlags.insecure origRootDir := Config.RootDir defer func() { applyCmdFlags.talosVersion = origTalosVersion applyCmdFlags.kubernetesVersion = origKubeVersion applyCmdFlags.debug = origDebug - applyCmdFlags.insecure = origInsecure Config.RootDir = origRootDir }() applyCmdFlags.talosVersion = "v1.12" applyCmdFlags.kubernetesVersion = "1.31.0" applyCmdFlags.debug = false - applyCmdFlags.insecure = true Config.RootDir = "/project" opts := buildApplyRenderOptions( @@ -42,9 +41,6 @@ func TestBuildApplyRenderOptions(t *testing.T) { if opts.Offline { t.Error("expected Offline=false for online template rendering path") } - if !opts.Insecure { - t.Error("expected Insecure=true to be passed through from flags") - } if opts.Root != "/project" { t.Errorf("expected Root=/project, got %s", opts.Root) } @@ -57,6 +53,28 @@ func TestBuildApplyRenderOptions(t *testing.T) { if len(opts.TemplateFiles) != 1 || opts.TemplateFiles[0] != "templates/controlplane.yaml" { t.Errorf("expected TemplateFiles=[templates/controlplane.yaml], got %v", opts.TemplateFiles) } + if opts.CommandName != "talm apply" { + t.Errorf("expected CommandName=%q, got %q (engine.Render uses this for FailIfMultiNodes error wording)", "talm apply", opts.CommandName) + } +} + +func TestResolveAuthTemplateNodes_CLINodesWin(t *testing.T) { + in := []string{"10.0.0.1", "10.0.0.2"} + got := resolveAuthTemplateNodes(in, nil) + if !slices.Equal(got, in) { + t.Errorf("got %v, want %v (CLI nodes must take precedence over talosconfig context)", got, in) + } +} + +func TestResolveAuthTemplateNodes_NilClientReturnsNil(t *testing.T) { + got := resolveAuthTemplateNodes(nil, nil) + if got != nil { + t.Errorf("got %v, want nil (no CLI nodes + no client = nothing to iterate, caller must surface the error)", got) + } + got = resolveAuthTemplateNodes([]string{}, nil) + if got != nil { + t.Errorf("got %v, want nil on empty slice + nil client", got) + } } func TestBuildApplyPatchOptions(t *testing.T) { @@ -247,3 +265,415 @@ func TestWrapWithNodeContext_NoNodesNoClient(t *testing.T) { t.Error("expected error when no nodes and no client config context, got nil") } } + +// nodesFromOutgoingCtx pulls per-iteration node identity out of gRPC +// outgoing metadata. The Talos client SDK writes single-target metadata to +// the "node" key (client.WithNode) and multi-target metadata to "nodes" +// (client.WithNodes) — checking both keys lets the per-node loop tests +// assert iteration shape regardless of which writer the loop chose. +func nodesFromOutgoingCtx(t *testing.T, ctx context.Context) []string { + t.Helper() + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { + return nil + } + if vs := md.Get("node"); len(vs) > 0 { + return vs + } + return md.Get("nodes") +} + +// fakeAuthOpenClient mimics openClientPerNodeAuth for tests: shares one +// (nil) parent client across iterations and rotates the node via WithNode +// on a fresh per-iteration context. +func fakeAuthOpenClient(parentCtx context.Context) openClientFunc { + return func(node string, action func(ctx context.Context, c *client.Client) error) error { + return action(client.WithNode(parentCtx, node), nil) + } +} + +// TestApplyTemplatesPerNode_LoopsOncePerNodeWithSingleNodeContext asserts +// that applyTemplatesPerNode invokes render and apply exactly once per +// node and that every per-iteration context carries exactly that one +// node. The historical regression — batching every node from +// GlobalArgs.Nodes into a single gRPC context — caused engine.Render's +// multi-node guard to reject the call before any rendering ran. +func TestApplyTemplatesPerNode_LoopsOncePerNodeWithSingleNodeContext(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + // Modeline-only file so MergeFileAsPatch is a no-op and the test stays + // focused on the loop semantics rather than patch behaviour. + if err := os.WriteFile(configFile, []byte("# talm: nodes=[\"a\",\"b\",\"c\"], templates=[\"templates/controlplane.yaml\"]\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + want := []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"} + var renderCalls, applyCalls []string + + render := func(ctx context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + got := nodesFromOutgoingCtx(t, ctx) + if len(got) != 1 { + t.Errorf("render: expected single-node ctx, got %v", got) + } + renderCalls = append(renderCalls, got...) + return []byte("version: v1alpha1\nmachine:\n type: worker\n"), nil + } + apply := func(ctx context.Context, _ *client.Client, _ []byte) error { + got := nodesFromOutgoingCtx(t, ctx) + if len(got) != 1 { + t.Errorf("apply: expected single-node ctx, got %v", got) + } + applyCalls = append(applyCalls, got...) + return nil + } + + if err := applyTemplatesPerNode(engine.Options{}, configFile, want, fakeAuthOpenClient(context.Background()), render, apply); err != nil { + t.Fatalf("applyTemplatesPerNode: %v", err) + } + + if !slices.Equal(renderCalls, want) { + t.Errorf("render calls = %v, want %v", renderCalls, want) + } + if !slices.Equal(applyCalls, want) { + t.Errorf("apply calls = %v, want %v", applyCalls, want) + } +} + +// TestApplyTemplatesPerNode_NeverBatchesNodes guarantees no future tweak +// can revert applyTemplatesPerNode to batching all nodes into a single +// render call. Phrased against this helper directly, not against +// engine.Render, so the assertion stays meaningful even if the engine +// guard moves elsewhere. +func TestApplyTemplatesPerNode_NeverBatchesNodes(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(configFile, []byte("# talm: nodes=[\"a\",\"b\"]\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + want := []string{"10.0.0.1", "10.0.0.2", "10.0.0.3"} + renderCount := 0 + applyCount := 0 + + render := func(ctx context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + got := nodesFromOutgoingCtx(t, ctx) + if len(got) > 1 { + t.Fatalf("render must NEVER see a multi-node ctx; got %v", got) + } + renderCount++ + return []byte("version: v1alpha1\nmachine:\n type: worker\n"), nil + } + apply := func(ctx context.Context, _ *client.Client, _ []byte) error { + got := nodesFromOutgoingCtx(t, ctx) + if len(got) > 1 { + t.Fatalf("apply must NEVER see a multi-node ctx; got %v", got) + } + applyCount++ + return nil + } + + if err := applyTemplatesPerNode(engine.Options{}, configFile, want, fakeAuthOpenClient(context.Background()), render, apply); err != nil { + t.Fatalf("applyTemplatesPerNode: %v", err) + } + if renderCount != len(want) { + t.Errorf("render call count = %d, want %d", renderCount, len(want)) + } + if applyCount != len(want) { + t.Errorf("apply call count = %d, want %d", applyCount, len(want)) + } +} + +// TestApplyTemplatesPerNode_MultiNodeWithNonEmptyBodyIsRejected pins +// the guard against stamping a single per-node body (pinned hostname, +// address, VIP) onto every target. A node file that targets more than +// one node and carries a body below its modeline is user error; the +// helper must surface it instead of silently replicating the pin. +func TestApplyTemplatesPerNode_MultiNodeWithNonEmptyBodyIsRejected(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + body := `# talm: nodes=["10.0.0.1","10.0.0.2"] +machine: + network: + hostname: node0 +` + if err := os.WriteFile(configFile, []byte(body), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + render := func(_ context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + t.Fatal("render must not be called when the multi-node overlay guard trips") + return nil, nil + } + apply := func(_ context.Context, _ *client.Client, _ []byte) error { + t.Fatal("apply must not be called when the multi-node overlay guard trips") + return nil + } + + err := applyTemplatesPerNode(engine.Options{}, configFile, + []string{"10.0.0.1", "10.0.0.2"}, + fakeAuthOpenClient(context.Background()), render, apply) + if err == nil { + t.Fatal("expected an error for multi-node + non-empty body, got nil") + } + msg := err.Error() + for _, want := range []string{"node file", "2 nodes", "per-node body"} { + if !strings.Contains(msg, want) { + t.Errorf("error message %q does not mention %q", msg, want) + } + } +} + +// TestApplyTemplatesPerNode_MultiNodeEmptyBodyIsAllowed pins that the +// overlay guard does NOT trip on a modeline-only node file (the common +// bootstrap shape: one file drives the same template across N nodes +// without pinning any per-node field). +func TestApplyTemplatesPerNode_MultiNodeEmptyBodyIsAllowed(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(configFile, []byte(`# talm: nodes=["10.0.0.1","10.0.0.2"]`+"\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + renderCount := 0 + render := func(_ context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + renderCount++ + return []byte("version: v1alpha1\nmachine:\n type: worker\n"), nil + } + applyCount := 0 + apply := func(_ context.Context, _ *client.Client, _ []byte) error { + applyCount++ + return nil + } + + if err := applyTemplatesPerNode(engine.Options{}, configFile, + []string{"10.0.0.1", "10.0.0.2"}, + fakeAuthOpenClient(context.Background()), render, apply); err != nil { + t.Fatalf("applyTemplatesPerNode: %v", err) + } + if renderCount != 2 || applyCount != 2 { + t.Errorf("render=%d apply=%d, want 2 each", renderCount, applyCount) + } +} + +// TestApplyTemplatesPerNode_NoNodesIsAnError guards against silently +// short-circuiting when GlobalArgs.Nodes is empty. The previous structure +// would happily run zero iterations — this pin makes it loud. +func TestApplyTemplatesPerNode_NoNodesIsAnError(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(configFile, []byte("# talm: nodes=[]\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + render := func(_ context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + t.Fatal("render must not be called when there are zero nodes") + return nil, nil + } + apply := func(_ context.Context, _ *client.Client, _ []byte) error { + t.Fatal("apply must not be called when there are zero nodes") + return nil + } + + err := applyTemplatesPerNode(engine.Options{}, configFile, nil, fakeAuthOpenClient(context.Background()), render, apply) + if err == nil { + t.Fatal("expected an error for empty nodes list, got nil") + } + // The error must point the user at the concrete ways to set nodes + // so the message survives a cosmetic reword but catches a regression + // that drops the guidance entirely. + msg := err.Error() + for _, want := range []string{"nodes", "--nodes"} { + if !strings.Contains(msg, want) { + t.Errorf("error message %q does not mention %q", msg, want) + } + } +} + +// TestApplyTemplatesPerNode_MaintenanceModeOpensFreshClientPerNode pins +// the contract for the insecure (maintenance) mode opener: every node +// must trigger a fresh openClient invocation so each iteration has its +// own single-endpoint client. Sharing one maintenance client across +// endpoints sends ApplyConfiguration to a gRPC-balanced endpoint and +// most nodes never receive the config — the production opener +// (openClientPerNodeMaintenance) avoids this by narrowing +// GlobalArgs.Nodes around each WithClientMaintenance call. Recording +// fake here proves the per-iteration shape; the narrow-and-restore +// behaviour of the real opener is checked by +// TestOpenClientPerNodeMaintenance_NarrowsAndRestoresGlobalNodes. +func TestApplyTemplatesPerNode_MaintenanceModeOpensFreshClientPerNode(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(configFile, []byte("# talm: nodes=[\"a\",\"b\"]\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + want := []string{"10.0.0.1", "10.0.0.2"} + var clientOpenedFor []string + + openClient := func(node string, action func(ctx context.Context, c *client.Client) error) error { + // Record that openClient was called for this specific node — i.e. the + // maintenance loop creates a separate client per node rather than + // reusing one pinned to all endpoints. + clientOpenedFor = append(clientOpenedFor, node) + // Real production hands the inner action a fresh maintenance client + // pinned to one endpoint; the fake just runs it with a clean ctx. + return action(context.Background(), nil) + } + render := func(_ context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + return []byte("version: v1alpha1\nmachine:\n type: worker\n"), nil + } + apply := func(_ context.Context, _ *client.Client, _ []byte) error { + return nil + } + + if err := applyTemplatesPerNode(engine.Options{}, configFile, want, openClient, render, apply); err != nil { + t.Fatalf("applyTemplatesPerNode: %v", err) + } + if !slices.Equal(clientOpenedFor, want) { + t.Errorf("openClient calls = %v, want %v (one per node)", clientOpenedFor, want) + } +} + +// TestOpenClientPerNodeMaintenance_NarrowsAndRestoresGlobalNodes drives +// the real openClientPerNodeMaintenance with an injected +// maintenanceClientFunc fake. The fake captures GlobalArgs.Nodes at the +// moment a real WithClientMaintenance would have read it for endpoint +// resolution. The contract: every iteration narrows GlobalArgs.Nodes to +// exactly the iteration's node, and the prior value is restored after +// the action returns regardless of success. Without the narrowing, the +// real WithClientMaintenance would dial every endpoint at once and gRPC +// would round-robin ApplyConfiguration across them. +func TestOpenClientPerNodeMaintenance_NarrowsAndRestoresGlobalNodes(t *testing.T) { + saved := append([]string(nil), GlobalArgs.Nodes...) + defer func() { GlobalArgs.Nodes = saved }() + + GlobalArgs.Nodes = []string{"original-A", "original-B"} + + type call struct { + fingerprints []string + nodesAtCall []string + } + var calls []call + + fakeMaintenance := func(fingerprints []string, action func(ctx context.Context, c *client.Client) error) error { + // WithClientMaintenance reads GlobalArgs.Nodes for its endpoints + // at this point. Snapshot the value so the test can inspect it. + calls = append(calls, call{ + fingerprints: append([]string(nil), fingerprints...), + nodesAtCall: append([]string(nil), GlobalArgs.Nodes...), + }) + return action(context.Background(), nil) + } + + openClient := openClientPerNodeMaintenance([]string{"fp-1"}, fakeMaintenance) + + for _, node := range []string{"10.0.0.1", "10.0.0.2"} { + if err := openClient(node, func(_ context.Context, _ *client.Client) error { return nil }); err != nil { + t.Fatalf("openClient(%q): %v", node, err) + } + } + + if !slices.Equal(GlobalArgs.Nodes, []string{"original-A", "original-B"}) { + t.Errorf("GlobalArgs.Nodes not restored after maintenance loop: got %v", GlobalArgs.Nodes) + } + if len(calls) != 2 { + t.Fatalf("maintenance fake should have been called twice, got %d times", len(calls)) + } + for i, want := range []string{"10.0.0.1", "10.0.0.2"} { + if !slices.Equal(calls[i].nodesAtCall, []string{want}) { + t.Errorf("call %d: GlobalArgs.Nodes at WithClientMaintenance time = %v, want [%q]", i, calls[i].nodesAtCall, want) + } + if !slices.Equal(calls[i].fingerprints, []string{"fp-1"}) { + t.Errorf("call %d: fingerprints passed through = %v, want [\"fp-1\"]", i, calls[i].fingerprints) + } + } +} + +// TestApplyTemplatesPerNode_AuthModeUsesSingleNodeMetadataKey pins the +// gRPC metadata key the auth-mode opener writes. WithNode sets "node" +// (single-target proxy); WithNodes sets "nodes" (apid aggregation). +// engine.Render's FailIfMultiNodes guard treats len("nodes") > 1 as the +// multi-node case, so single-target metadata under "node" passes +// trivially. A future refactor that swaps WithNode back to WithNodes +// would slip past nodesFromOutgoingCtx (which reads either key) — this +// assertion catches that regression directly. +func TestApplyTemplatesPerNode_AuthModeUsesSingleNodeMetadataKey(t *testing.T) { + dir := t.TempDir() + configFile := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(configFile, []byte("# talm: nodes=[\"a\"]\n"), 0o644); err != nil { + t.Fatalf("write configFile: %v", err) + } + + const node = "10.0.0.1" + render := func(ctx context.Context, _ *client.Client, _ engine.Options) ([]byte, error) { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { + t.Fatal("expected outgoing metadata on per-iteration ctx") + } + if got := md.Get("node"); !slices.Equal(got, []string{node}) { + t.Errorf(`metadata key "node" = %v, want [%q]`, got, node) + } + if got := md.Get("nodes"); len(got) != 0 { + t.Errorf(`metadata key "nodes" must be unset for single-target apply, got %v`, got) + } + return []byte("version: v1alpha1\nmachine:\n type: worker\n"), nil + } + apply := func(_ context.Context, _ *client.Client, _ []byte) error { return nil } + + openClient := openClientPerNodeAuth(context.Background(), nil) + if err := applyTemplatesPerNode(engine.Options{}, configFile, []string{node}, openClient, render, apply); err != nil { + t.Fatalf("applyTemplatesPerNode: %v", err) + } +} + +// TestTemplateAndApplyDiverge_NodeBodyOverlayLimitation pins a known +// trade-off: `talm apply -f node.yaml` overlays the node file body on the +// rendered template before sending the result to ApplyConfiguration, but +// `talm template -f node.yaml` does NOT — its output is the raw rendered +// template plus the modeline and AUTOGENERATED-warning comment. Applying +// the same overlay in template would route the output through the Talos +// config-patcher, which strips every YAML comment (including the modeline) +// and reorders keys. Subsequent commands that read the file back would +// lose the modeline metadata. +// +// The test asserts the divergence is real and bounded: rendered + modeline +// passes through template untouched while the apply path produces a +// merged result. Without this pin, a future "make template match apply" +// patch will silently break the template subcommand. +func TestTemplateAndApplyDiverge_NodeBodyOverlayLimitation(t *testing.T) { + const rendered = `version: v1alpha1 +machine: + type: controlplane + network: + hostname: talos-abcde +` + dir := t.TempDir() + nodeFile := filepath.Join(dir, "node.yaml") + const nodeBody = `# talm: nodes=["10.0.0.1"] +machine: + network: + hostname: node0 +` + if err := os.WriteFile(nodeFile, []byte(nodeBody), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + + // apply path: renders, then overlays. + merged, err := engine.MergeFileAsPatch([]byte(rendered), nodeFile) + if err != nil { + t.Fatalf("MergeFileAsPatch: %v", err) + } + if !strings.Contains(string(merged), "hostname: node0") { + t.Errorf("apply path must overlay hostname: node0; got:\n%s", string(merged)) + } + + // The template subcommand's no-overlay invariant is structural + // rather than runtime: pkg/commands/template.go does not call + // engine.MergeFileAsPatch, and a human-facing `talm template` + // output going through the patcher would drop every YAML comment + // (including the modeline). A runtime assertion here cannot pin + // that — any block that reuses the `rendered` constant is + // tautological — so the guard lives in the template code path + // itself. The modeline round-trip tests in pkg/modeline surface a + // regression that would wire MergeFileAsPatch into generateOutput. +} diff --git a/pkg/commands/template.go b/pkg/commands/template.go index 71c5cb5..6b1633c 100644 --- a/pkg/commands/template.go +++ b/pkg/commands/template.go @@ -274,7 +274,6 @@ func generateOutput(ctx context.Context, c *client.Client, args []string) (strin } opts := engine.Options{ - Insecure: templateCmdFlags.insecure, ValueFiles: templateCmdFlags.valueFiles, StringValues: templateCmdFlags.stringValues, Values: templateCmdFlags.values, @@ -289,6 +288,7 @@ func generateOutput(ctx context.Context, c *client.Client, args []string) (strin Offline: templateCmdFlags.offline, KubernetesVersion: templateCmdFlags.kubernetesVersion, TemplateFiles: resolvedTemplateFiles, + CommandName: "talm template", } result, err := engine.Render(ctx, c, opts) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index fcf87d5..ce95f78 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -40,7 +40,6 @@ import ( // Options encapsulates all parameters necessary for rendering. type Options struct { - Insecure bool ValueFiles []string StringValues []string Values []string @@ -57,6 +56,9 @@ type Options struct { TemplateFiles []string ClusterName string Endpoint string + // CommandName names the caller subcommand for error messages such as + // the one produced by FailIfMultiNodes. Empty value falls back to "talm". + CommandName string } // NormalizeTemplatePath converts OS-specific path separators to forward slash. @@ -202,6 +204,80 @@ func SerializeConfiguration(configBundle *bundle.Bundle, machineType machine.Typ return configBundle.Serialize(encoder.CommentsDisabled, machineType) } +// MergeFileAsPatch overlays the YAML body of patchFile onto rendered using +// Talos's strategic-merge config patcher. +// +// patchFile is a node file: its first line is the talm modeline (a YAML +// comment) followed by an arbitrary Talos config patch (typically machine.* +// fields the user wants pinned per node). When the file contains only the +// modeline (or is otherwise empty after stripping comments and whitespace) +// the function returns rendered unchanged — short-circuiting Talos's +// configpatcher which would otherwise route the empty patch through +// JSON6902 and reject any multi-document rendered config (the v1.12+ output +// format) outright. +// +// Note that for non-empty patches the patcher round-trips rendered through +// its config loader, normalising YAML formatting and dropping comments. +// This is acceptable for the apply path (the result goes straight to +// ApplyConfiguration) but unsuitable for human-facing output such as +// `talm template` — which is why the template subcommand does not call +// this helper. +func MergeFileAsPatch(rendered []byte, patchFile string) ([]byte, error) { + patchBytes, err := os.ReadFile(patchFile) + if err != nil { + return nil, fmt.Errorf("reading patch %s: %w", patchFile, err) + } + if isEffectivelyEmptyYAML(patchBytes) { + return rendered, nil + } + patch, err := configpatcher.LoadPatch(patchBytes) + if err != nil { + return nil, fmt.Errorf("loading patch from %s: %w", patchFile, err) + } + out, err := configpatcher.Apply(configpatcher.WithBytes(rendered), []configpatcher.Patch{patch}) + if err != nil { + return nil, fmt.Errorf("applying patch from %s: %w", patchFile, err) + } + merged, err := out.Bytes() + if err != nil { + return nil, fmt.Errorf("encoding merged config from %s: %w", patchFile, err) + } + return merged, nil +} + +// NodeFileHasOverlay reports whether a node file carries a non-empty +// per-node body below its modeline. The apply path uses this to reject +// multi-node node files that would otherwise stamp the same pinned +// hostname/address/VIP onto every target. +func NodeFileHasOverlay(patchFile string) (bool, error) { + data, err := os.ReadFile(patchFile) + if err != nil { + return false, fmt.Errorf("reading node file %s: %w", patchFile, err) + } + return !isEffectivelyEmptyYAML(data), nil +} + +// isEffectivelyEmptyYAML reports whether the input contains nothing but +// YAML comments, document separators, and whitespace. Used by +// MergeFileAsPatch to detect modeline-only node files that the Talos +// config-patcher misclassifies as empty JSON6902 patches. +func isEffectivelyEmptyYAML(data []byte) bool { + for _, line := range bytes.Split(data, []byte("\n")) { + trimmed := bytes.TrimSpace(line) + if len(trimmed) == 0 { + continue + } + if trimmed[0] == '#' { + continue + } + if string(trimmed) == "---" || string(trimmed) == "..." { + continue + } + return false + } + return true +} + // Render executes the rendering of templates based on the provided options. func Render(ctx context.Context, c *client.Client, opts Options) ([]byte, error) { @@ -216,7 +292,11 @@ func Render(ctx context.Context, c *client.Client, opts Options) ([]byte, error) // Gather facts and enable lookup options if !opts.Offline { - if err := helpers.FailIfMultiNodes(ctx, "talm template"); err != nil { + cmdName := opts.CommandName + if cmdName == "" { + cmdName = "talm" + } + if err := helpers.FailIfMultiNodes(ctx, cmdName); err != nil { return nil, err } helmEngine.LookupFunc = newLookupFunction(ctx, c) diff --git a/pkg/engine/render_test.go b/pkg/engine/render_test.go index cb06f79..ecf7876 100644 --- a/pkg/engine/render_test.go +++ b/pkg/engine/render_test.go @@ -24,6 +24,7 @@ import ( "testing" helmEngine "github.com/cozystack/talm/pkg/engine/helm" + "github.com/siderolabs/talos/pkg/machinery/client" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/chartutil" ) @@ -930,6 +931,347 @@ func TestMultiDocGeneric_VlanOnBondTopology(t *testing.T) { assertNotContains(t, result, "kind: LinkConfig") } +// TestMergeFileAsPatch pins the contract for the apply-side overlay of a +// node file's body on top of a rendered template: +// +// - When the node file has Talos config in addition to the modeline, +// fields from the file must overlay the rendered template (custom +// hostname wins over the auto-generated one; secondary interfaces and +// VIPs declared in the file appear in the merged output). +// - When the node file is just a modeline, an empty file, or comments +// and document separators with no body, the merge must be a true +// byte-for-byte identity over rendered. The Talos config-patcher +// misclassifies such inputs as empty JSON6902 patches, which then +// refuses any multi-document machine config — the v1.12+ default +// output format. The empty-detection short-circuit avoids that. +func TestMergeFileAsPatch(t *testing.T) { + const renderedTemplate = `version: v1alpha1 +debug: false +machine: + type: controlplane + install: + disk: /dev/sda + network: + hostname: talos-abcde + interfaces: + - interface: ens3 + addresses: + - 10.0.0.1/24 + routes: + - network: 0.0.0.0/0 + gateway: 10.0.0.254 +cluster: + controlPlane: + endpoint: https://10.0.0.10:6443 + network: + podSubnets: + - 10.244.0.0/16 + serviceSubnets: + - 10.96.0.0/16 +` + + t.Run("overlays hostname and adds secondary interface", func(t *testing.T) { + dir := t.TempDir() + nodeFile := filepath.Join(dir, "node0.yaml") + // First line is the modeline (a YAML comment); the body is a + // strategic merge patch. + const nodeBody = `# talm: nodes=["10.0.0.1"], endpoints=["10.0.0.1"], templates=["templates/controlplane.yaml"] +machine: + network: + hostname: node0 + interfaces: + - interface: ens3 + addresses: + - 10.0.0.1/24 + routes: + - network: 0.0.0.0/0 + gateway: 10.0.0.254 + - deviceSelector: + hardwareAddr: "02:00:17:02:55:aa" + addresses: + - 10.0.100.11/24 + vip: + ip: 10.0.100.10 +` + if err := os.WriteFile(nodeFile, []byte(nodeBody), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + + merged, err := MergeFileAsPatch([]byte(renderedTemplate), nodeFile) + if err != nil { + t.Fatalf("MergeFileAsPatch: %v", err) + } + + out := string(merged) + if !strings.Contains(out, "hostname: node0") { + t.Errorf("merged output missing custom hostname 'node0':\n%s", out) + } + if strings.Contains(out, "hostname: talos-abcde") { + t.Errorf("merged output still contains template hostname 'talos-abcde':\n%s", out) + } + if !strings.Contains(out, "02:00:17:02:55:aa") { + t.Errorf("merged output missing deviceSelector secondary interface:\n%s", out) + } + if !strings.Contains(out, "10.0.100.10") { + t.Errorf("merged output missing VIP from node file:\n%s", out) + } + }) + + t.Run("modeline-only file is a true byte-identity no-op", func(t *testing.T) { + dir := t.TempDir() + nodeFile := filepath.Join(dir, "node0.yaml") + if err := os.WriteFile(nodeFile, []byte("# talm: nodes=[\"10.0.0.1\"], templates=[\"templates/controlplane.yaml\"]\n"), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + + merged, err := MergeFileAsPatch([]byte(renderedTemplate), nodeFile) + if err != nil { + t.Fatalf("MergeFileAsPatch: %v", err) + } + + // Modeline-only node files must short-circuit before the Talos + // config-patcher round-trip — the patcher would otherwise reformat + // YAML, drop comments, and (worse) reject multi-document rendered + // configs via JSON6902. Identity is the contract. + if string(merged) != renderedTemplate { + t.Errorf("modeline-only merge must return rendered byte-for-byte, got diff:\n%s", string(merged)) + } + }) + + t.Run("modeline-only file does not break multi-doc rendered (Talos v1.12+)", func(t *testing.T) { + // Default Talos v1.12+ output format is multi-document. A + // modeline-only node file used to route through + // configpatcher.Apply → JSON6902, which refuses multi-document + // machine configs with: `JSON6902 patches are not supported for + // multi-document machine configuration`. The empty-content + // short-circuit must keep this case working. + const multiDocRendered = `version: v1alpha1 +machine: + type: controlplane +--- +apiVersion: v1alpha1 +kind: HostnameConfig +hostname: talos-abcde +--- +apiVersion: v1alpha1 +kind: LinkConfig +name: ens3 +addresses: + - address: 10.0.0.1/24 +` + dir := t.TempDir() + nodeFile := filepath.Join(dir, "node0.yaml") + if err := os.WriteFile(nodeFile, []byte("# talm: nodes=[\"10.0.0.1\"]\n"), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + + merged, err := MergeFileAsPatch([]byte(multiDocRendered), nodeFile) + if err != nil { + t.Fatalf("multi-doc + modeline-only patch must not error, got: %v", err) + } + if string(merged) != multiDocRendered { + t.Errorf("multi-doc + modeline-only merge must return rendered byte-for-byte, got:\n%s", string(merged)) + } + }) + + t.Run("multi-doc rendered + non-empty patch overlays the legacy machine doc", func(t *testing.T) { + // The realistic Talos v1.12+ apply scenario: a multi-document + // rendered config (legacy `machine:`/`cluster:` doc plus typed + // HostnameConfig / LinkConfig docs) plus a node file that pins + // hostname/network on the legacy machine doc. The strategic-merge + // patcher must accept the multi-doc input, apply the patch to the + // legacy machine doc, and leave the typed sibling docs intact. + const multiDocRendered = `version: v1alpha1 +machine: + type: controlplane + install: + disk: /dev/sda + network: + hostname: talos-abcde +cluster: + controlPlane: + endpoint: https://10.0.0.10:6443 +--- +apiVersion: v1alpha1 +kind: HostnameConfig +hostname: talos-abcde +--- +apiVersion: v1alpha1 +kind: LinkConfig +name: ens3 +addresses: + - address: 10.0.0.1/24 +` + dir := t.TempDir() + nodeFile := filepath.Join(dir, "node0.yaml") + const patchBody = `# talm: nodes=["10.0.0.1"] +machine: + network: + hostname: node0 + interfaces: + - deviceSelector: + hardwareAddr: "02:00:17:02:55:aa" + addresses: + - 10.0.100.11/24 +` + if err := os.WriteFile(nodeFile, []byte(patchBody), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + + merged, err := MergeFileAsPatch([]byte(multiDocRendered), nodeFile) + if err != nil { + t.Fatalf("multi-doc + non-empty patch must not error, got: %v", err) + } + + out := string(merged) + if !strings.Contains(out, "hostname: node0") { + t.Errorf("merged output must overlay machine.network.hostname with node0:\n%s", out) + } + if !strings.Contains(out, "02:00:17:02:55:aa") { + t.Errorf("merged output must include node-file deviceSelector:\n%s", out) + } + // The sibling typed documents must not be dropped by the merge. + if !strings.Contains(out, "kind: LinkConfig") { + t.Errorf("merged output must preserve sibling LinkConfig document:\n%s", out) + } + if !strings.Contains(out, "name: ens3") { + t.Errorf("merged output must preserve LinkConfig name field:\n%s", out) + } + }) + + t.Run("empty file is also a no-op", func(t *testing.T) { + dir := t.TempDir() + nodeFile := filepath.Join(dir, "empty.yaml") + if err := os.WriteFile(nodeFile, []byte(""), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + merged, err := MergeFileAsPatch([]byte(renderedTemplate), nodeFile) + if err != nil { + t.Fatalf("MergeFileAsPatch on empty file: %v", err) + } + if string(merged) != renderedTemplate { + t.Errorf("empty patch must round-trip rendered byte-for-byte") + } + }) + + t.Run("comments-and-separators-only file is a no-op", func(t *testing.T) { + dir := t.TempDir() + nodeFile := filepath.Join(dir, "comments.yaml") + if err := os.WriteFile(nodeFile, []byte("# top\n---\n# middle\n \n---\n# bottom\n"), 0o644); err != nil { + t.Fatalf("write node file: %v", err) + } + merged, err := MergeFileAsPatch([]byte(renderedTemplate), nodeFile) + if err != nil { + t.Fatalf("MergeFileAsPatch on comments-only file: %v", err) + } + if string(merged) != renderedTemplate { + t.Errorf("comments-only patch must round-trip rendered byte-for-byte") + } + }) +} + +// TestNodeFileHasOverlay pins the classifier used by the apply path to +// decide whether a multi-node modeline would replay a per-node body +// onto every target. Modeline-only and comments-only files must +// classify as no-overlay; any real YAML key must count as an overlay. +func TestNodeFileHasOverlay(t *testing.T) { + tests := []struct { + name string + content string + want bool + }{ + { + name: "modeline only", + content: `# talm: nodes=["a","b"]` + "\n", + want: false, + }, + { + name: "empty file", + content: "", + want: false, + }, + { + name: "comments and separators", + content: "# top\n---\n# middle\n \n---\n# bottom\n", + want: false, + }, + { + name: "real yaml body", + content: `# talm: nodes=["a"] +machine: + network: + hostname: node0 +`, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "node.yaml") + if err := os.WriteFile(path, []byte(tt.content), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + got, err := NodeFileHasOverlay(path) + if err != nil { + t.Fatalf("NodeFileHasOverlay: %v", err) + } + if got != tt.want { + t.Errorf("NodeFileHasOverlay = %v, want %v", got, tt.want) + } + }) + } +} + +// TestRenderFailIfMultiNodes_UsesCommandName guards the contract for +// Options.CommandName: the multi-node rejection error must reference the +// calling subcommand the caller passed in, never the historical +// hardcoded literal that pre-dated this option. An empty value falls back +// to the neutral "talm". +func TestRenderFailIfMultiNodes_UsesCommandName(t *testing.T) { + tests := []struct { + name string + commandName string + wantInError string + }{ + {"talm apply", "talm apply", "talm apply"}, + {"talm template", "talm template", "talm template"}, + {"empty falls back to talm", "", "talm"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := client.WithNodes(context.Background(), "10.0.0.1", "10.0.0.2") + opts := Options{ + Offline: false, + CommandName: tt.commandName, + } + _, err := Render(ctx, nil, opts) + if err == nil { + t.Fatalf("Render expected an error, got nil") + } + if !strings.Contains(err.Error(), tt.wantInError) { + t.Errorf("error = %q, expected to contain %q", err.Error(), tt.wantInError) + } + }) + } + + t.Run("non-CommandName subcommand names must not leak into the error", func(t *testing.T) { + // If a caller passes "talm apply", the error must not carry any + // other subcommand name — historically the call site here emitted + // "talm template" unconditionally. + ctx := client.WithNodes(context.Background(), "10.0.0.1", "10.0.0.2") + opts := Options{Offline: false, CommandName: "talm apply"} + _, err := Render(ctx, nil, opts) + if err == nil { + t.Fatal("Render expected an error, got nil") + } + if strings.Contains(err.Error(), "talm template") { + t.Errorf("error must not mention 'talm template' when CommandName is 'talm apply'; got %q", err.Error()) + } + }) +} + // TestRenderInvalidTalosVersion verifies that malformed TalosVersion values // surface a user-friendly error before template rendering, instead of the // opaque "error calling semverCompare: invalid semantic version" that escapes