-
Notifications
You must be signed in to change notification settings - Fork 23
fix(commands): apply batch — multi-node, node-file patch merge, error handling #128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
84dcf99
8c0148c
2a3dcaa
03f1bfb
c4b5a5d
1164b5e
dcb214c
b7df415
405b5ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -113,32 +113,40 @@ func apply(args []string) error { | |||||
| withSecretsPath := ResolveSecretsPath(applyCmdFlags.withSecrets) | ||||||
|
|
||||||
| if len(modelineTemplates) > 0 { | ||||||
| // Template rendering path: connect to the node first, render templates | ||||||
| // online (so lookup() functions resolve real discovery data), then apply. | ||||||
| // Template rendering path: render templates online per node and | ||||||
| // apply the rendered config plus the node file overlay. See | ||||||
| // applyTemplatesPerNode for why the loop is mandatory. | ||||||
| opts := buildApplyRenderOptions(modelineTemplates, withSecretsPath) | ||||||
| nodes := append([]string(nil), GlobalArgs.Nodes...) | ||||||
| fmt.Printf("- talm: file=%s, nodes=%s, endpoints=%s\n", configFile, GlobalArgs.Nodes, GlobalArgs.Endpoints) | ||||||
|
|
||||||
| err = withApplyClient(func(ctx context.Context, c *client.Client) error { | ||||||
| fmt.Printf("- talm: file=%s, nodes=%s, endpoints=%s\n", configFile, GlobalArgs.Nodes, GlobalArgs.Endpoints) | ||||||
|
|
||||||
| result, err := engine.Render(ctx, c, opts) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("template rendering error: %w", err) | ||||||
| } | ||||||
|
|
||||||
| applyClosure := func(ctx context.Context, c *client.Client, data []byte) error { | ||||||
| resp, err := c.ApplyConfiguration(ctx, &machineapi.ApplyConfigurationRequest{ | ||||||
| Data: result, | ||||||
| Data: data, | ||||||
| Mode: applyCmdFlags.Mode.Mode, | ||||||
| DryRun: applyCmdFlags.dryRun, | ||||||
| TryModeTimeout: durationpb.New(applyCmdFlags.configTryTimeout), | ||||||
| }) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("error applying new configuration: %w", err) | ||||||
| } | ||||||
|
|
||||||
| helpers.PrintApplyResults(resp) | ||||||
|
|
||||||
| return nil | ||||||
| }) | ||||||
| } | ||||||
|
|
||||||
| if applyCmdFlags.insecure { | ||||||
| openClient := openClientPerNodeMaintenance(applyCmdFlags.certFingerprints, WithClientMaintenance) | ||||||
| if err := applyTemplatesPerNode(opts, configFile, nodes, openClient, engine.Render, applyClosure); err != nil { | ||||||
| return err | ||||||
| } | ||||||
| } else { | ||||||
| if err := withApplyClientBare(func(parentCtx context.Context, c *client.Client) error { | ||||||
| openClient := openClientPerNodeAuth(parentCtx, c) | ||||||
|
Comment on lines
+143
to
+144
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the authenticated template rendering path, the list of nodes is captured from if err := withApplyClientBare(func(parentCtx context.Context, c *client.Client) error {
if len(nodes) == 0 {
if configContext := c.GetConfigContext(); configContext != nil {
nodes = configContext.Nodes
}
}
openClient := openClientPerNodeAuth(parentCtx, c) |
||||||
| return applyTemplatesPerNode(opts, configFile, nodes, openClient, engine.Render, applyClosure) | ||||||
| }); err != nil { | ||||||
| return err | ||||||
| } | ||||||
| } | ||||||
| } else { | ||||||
| // Direct patch path: apply config file as patch against empty bundle | ||||||
| opts := buildApplyPatchOptions(withSecretsPath) | ||||||
|
|
@@ -173,9 +181,6 @@ func apply(args []string) error { | |||||
| return err | ||||||
| } | ||||||
| } | ||||||
| if err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // Reset args | ||||||
| if !applyCmdFlags.nodesFromArgs { | ||||||
|
|
@@ -188,23 +193,131 @@ func apply(args []string) error { | |||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // withApplyClient creates a Talos client appropriate for the current apply mode | ||||||
| // and invokes the given action with it. | ||||||
| // withApplyClient creates a Talos client appropriate for the current apply | ||||||
| // mode and invokes the given action with it. The action receives a context | ||||||
| // in which gRPC node metadata is set to the resolved node list — either | ||||||
| // GlobalArgs.Nodes (when set) or the talosconfig context's Nodes (when not). | ||||||
| // Used by the direct-patch branch where multi-node fan-out happens at the | ||||||
| // gRPC layer inside ApplyConfiguration. | ||||||
| func withApplyClient(f func(ctx context.Context, c *client.Client) error) error { | ||||||
| return withApplyClientBare(wrapWithNodeContext(f)) | ||||||
| } | ||||||
|
|
||||||
| // withApplyClientBare connects to Talos for the current apply mode but does | ||||||
| // NOT inject node metadata into the context — leaving that decision to the | ||||||
| // caller. Used by the template-rendering path (see applyTemplatesPerNode for | ||||||
| // the rationale). | ||||||
| func withApplyClientBare(f func(ctx context.Context, c *client.Client) error) error { | ||||||
| if applyCmdFlags.insecure { | ||||||
| // Maintenance mode connects directly to the node IP without talosconfig; | ||||||
| // node context injection is not needed — the maintenance client handles | ||||||
| // node targeting internally via GlobalArgs.Nodes. | ||||||
| // Maintenance mode reads its endpoints directly from | ||||||
| // GlobalArgs.Nodes — gRPC node metadata is not consulted. | ||||||
| return WithClientMaintenance(applyCmdFlags.certFingerprints, f) | ||||||
| } | ||||||
|
|
||||||
| wrappedF := wrapWithNodeContext(f) | ||||||
|
|
||||||
| if GlobalArgs.SkipVerify { | ||||||
| return WithClientSkipVerify(wrappedF) | ||||||
| return WithClientSkipVerify(f) | ||||||
| } | ||||||
|
|
||||||
| return WithClientNoNodes(f) | ||||||
| } | ||||||
|
|
||||||
| // renderFunc, applyFunc and openClientFunc are injection points for | ||||||
| // applyTemplatesPerNode so unit tests can drive the loop with fakes instead | ||||||
| // of a real Talos client. | ||||||
| type renderFunc func(ctx context.Context, c *client.Client, opts engine.Options) ([]byte, error) | ||||||
| type applyFunc func(ctx context.Context, c *client.Client, data []byte) error | ||||||
|
|
||||||
| // openClientFunc opens a Talos client suitable for a single node and runs | ||||||
| // action with it. Authenticated mode reuses one parent client and rotates | ||||||
| // the node via single-target gRPC metadata (client.WithNode); insecure | ||||||
| // (maintenance) mode opens a fresh single-endpoint client per node because | ||||||
| // Talos's maintenance client ignores node metadata in the context and | ||||||
| // round-robins between its configured endpoints. | ||||||
| type openClientFunc func(node string, action func(ctx context.Context, c *client.Client) error) error | ||||||
|
|
||||||
| // applyTemplatesPerNode runs render → MergeFileAsPatch → apply once per | ||||||
| // node. Two reasons it has to iterate: | ||||||
| // | ||||||
| // - engine.Render's FailIfMultiNodes guard rejects a context that carries | ||||||
| // more than one node, so the auth-mode caller has to attach a single | ||||||
| // node per iteration — and discovery via lookup() should resolve each | ||||||
| // node's own topology in any case. | ||||||
| // - In insecure (maintenance) mode the client connects directly to a | ||||||
| // Talos node and ignores nodes-in-context entirely, so each node needs | ||||||
| // its own client; otherwise gRPC round-robins ApplyConfiguration | ||||||
| // across the endpoint list and most nodes never see the config. | ||||||
| // | ||||||
| // Both modes share this loop via openClient. | ||||||
| func applyTemplatesPerNode( | ||||||
| opts engine.Options, | ||||||
| configFile string, | ||||||
| nodes []string, | ||||||
| openClient openClientFunc, | ||||||
| render renderFunc, | ||||||
| apply applyFunc, | ||||||
| ) error { | ||||||
| if len(nodes) == 0 { | ||||||
| return fmt.Errorf("no nodes specified for template-rendering apply") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error message when no nodes are specified is less descriptive than the one used in the
Suggested change
|
||||||
| } | ||||||
| for _, node := range nodes { | ||||||
| if err := openClient(node, func(ctx context.Context, c *client.Client) error { | ||||||
| return renderMergeAndApply(ctx, c, opts, configFile, render, apply) | ||||||
| }); err != nil { | ||||||
|
Comment on lines
+251
to
+265
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't replay the same node-body overlay across a multi-node modeline.
Also applies to: 311-320 🤖 Prompt for AI Agents |
||||||
| return fmt.Errorf("node %s: %w", node, err) | ||||||
| } | ||||||
| } | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // maintenanceClientFunc is the contract WithClientMaintenance satisfies in | ||||||
| // production and a fake satisfies in tests. Injection lets the unit tests | ||||||
| // run the real openClientPerNodeMaintenance body without dialing a Talos | ||||||
| // node. | ||||||
| type maintenanceClientFunc func(fingerprints []string, action func(ctx context.Context, c *client.Client) error) error | ||||||
|
|
||||||
| // openClientPerNodeMaintenance returns an openClientFunc that opens a | ||||||
| // fresh single-endpoint maintenance client per node. Multi-node insecure | ||||||
| // apply (first bootstrap of a multi-node cluster) needs this because | ||||||
| // WithClientMaintenance creates a client with all endpoints and gRPC then | ||||||
| // round-robins ApplyConfiguration across them — most nodes never see the | ||||||
| // config. Narrowing GlobalArgs.Nodes to the current iteration's node and | ||||||
| // restoring it via defer keeps the wrapper's signature unchanged. | ||||||
| // | ||||||
| // mkClient is normally WithClientMaintenance; tests pass a fake that | ||||||
| // captures the GlobalArgs.Nodes value at the moment WithClientMaintenance | ||||||
| // would have read it. | ||||||
| func openClientPerNodeMaintenance(fingerprints []string, mkClient maintenanceClientFunc) openClientFunc { | ||||||
| return func(node string, action func(ctx context.Context, c *client.Client) error) error { | ||||||
| savedNodes := append([]string(nil), GlobalArgs.Nodes...) | ||||||
| GlobalArgs.Nodes = []string{node} | ||||||
| defer func() { GlobalArgs.Nodes = savedNodes }() | ||||||
| return mkClient(fingerprints, action) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| // openClientPerNodeAuth returns an openClientFunc that reuses one | ||||||
| // authenticated client (the one withApplyClientBare opened above this | ||||||
| // callback) and rotates the addressed node via client.WithNode on the | ||||||
| // per-iteration context. WithNode (rather than WithNodes) sets the | ||||||
| // "node" metadata key for single-target proxying, which engine.Render's | ||||||
| // FailIfMultiNodes guard treats as one node. | ||||||
| func openClientPerNodeAuth(parentCtx context.Context, c *client.Client) openClientFunc { | ||||||
| return func(node string, action func(ctx context.Context, c *client.Client) error) error { | ||||||
| return action(client.WithNode(parentCtx, node), c) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return WithClientNoNodes(wrappedF) | ||||||
| // renderMergeAndApply is the per-node body shared by every apply mode. | ||||||
| func renderMergeAndApply(ctx context.Context, c *client.Client, opts engine.Options, configFile string, render renderFunc, apply applyFunc) error { | ||||||
| rendered, err := render(ctx, c, opts) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("template rendering: %w", err) | ||||||
| } | ||||||
| merged, err := engine.MergeFileAsPatch(rendered, configFile) | ||||||
| if err != nil { | ||||||
| return fmt.Errorf("merging node file as patch: %w", err) | ||||||
| } | ||||||
| return apply(ctx, c, merged) | ||||||
| } | ||||||
|
|
||||||
| // buildApplyRenderOptions constructs engine.Options for the template rendering path. | ||||||
|
|
@@ -214,14 +327,14 @@ func withApplyClient(f func(ctx context.Context, c *client.Client) error) error | |||||
| func buildApplyRenderOptions(modelineTemplates []string, withSecretsPath string) engine.Options { | ||||||
| resolvedTemplates := resolveTemplatePaths(modelineTemplates, Config.RootDir) | ||||||
| return engine.Options{ | ||||||
| Insecure: applyCmdFlags.insecure, | ||||||
| TalosVersion: applyCmdFlags.talosVersion, | ||||||
| WithSecrets: withSecretsPath, | ||||||
| KubernetesVersion: applyCmdFlags.kubernetesVersion, | ||||||
| Debug: applyCmdFlags.debug, | ||||||
| Full: true, | ||||||
| Root: Config.RootDir, | ||||||
| TemplateFiles: resolvedTemplates, | ||||||
| CommandName: "talm apply", | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Scope this overlay note away from Talos v1.12+ network overrides.
This text currently promises that legacy node-body fields such as
deviceSelectorinterfaces and VIPs survivetalm apply, but on the multi-doc path thosemachine.network.interfacesfragments still do not have a safe 1:1 mapping toLinkConfig/BondConfig/VLANConfig/Layer2VIPConfig. As written, this will mislead v1.12+ users into relying on overrides that are not represented semantically the way the docs imply. Please either limit the claim to legacy/single-doc Talos output or explicitly say that v1.12+ requires patching the typed resources instead.Based on learnings, the multidoc path intentionally ignores legacy
machine.network.interfacesbecause it has no safe 1:1 translation to Talos v1.12 typed resources.🤖 Prompt for AI Agents