-
Notifications
You must be signed in to change notification settings - Fork 353
Expand file tree
/
Copy pathdomains.go
More file actions
826 lines (734 loc) · 28.6 KB
/
domains.go
File metadata and controls
826 lines (734 loc) · 28.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
package workflow
import (
_ "embed"
"encoding/json"
"fmt"
"sort"
"strings"
"github.com/github/gh-aw/pkg/constants"
"github.com/github/gh-aw/pkg/logger"
"github.com/github/gh-aw/pkg/stringutil"
)
var domainsLog = logger.New("workflow:domains")
//go:embed data/ecosystem_domains.json
var ecosystemDomainsJSON []byte
// ecosystemDomains holds the loaded domain data
var ecosystemDomains map[string][]string
// CopilotDefaultDomains are the default domains required for GitHub Copilot CLI authentication and operation
var CopilotDefaultDomains = []string{
"api.business.githubcopilot.com",
"api.enterprise.githubcopilot.com",
"api.github.com",
"api.githubcopilot.com",
"api.individual.githubcopilot.com",
"github.com",
"host.docker.internal",
"raw.githubusercontent.com",
"registry.npmjs.org",
"telemetry.enterprise.githubcopilot.com",
}
// CodexDefaultDomains are the minimal default domains required for Codex CLI operation
var CodexDefaultDomains = []string{
"172.30.0.1", // AWF gateway IP - Codex resolves host.docker.internal to this IP for Rust DNS compatibility
"api.openai.com",
"host.docker.internal",
"openai.com",
}
// ClaudeDefaultDomains are the default domains required for Claude Code CLI authentication and operation
var ClaudeDefaultDomains = []string{
"*.githubusercontent.com",
"anthropic.com",
"api.anthropic.com",
"api.github.com",
"api.snapcraft.io",
"archive.ubuntu.com",
"azure.archive.ubuntu.com",
"cdn.playwright.dev",
"codeload.github.com",
"crl.geotrust.com",
"crl.globalsign.com",
"crl.identrust.com",
"crl.sectigo.com",
"crl.thawte.com",
"crl.usertrust.com",
"crl.verisign.com",
"crl3.digicert.com",
"crl4.digicert.com",
"crls.ssl.com",
"files.pythonhosted.org",
"ghcr.io",
"github-cloud.githubusercontent.com",
"github-cloud.s3.amazonaws.com",
"github.com",
"host.docker.internal",
"json-schema.org",
"json.schemastore.org",
"keyserver.ubuntu.com",
"lfs.github.com",
"objects.githubusercontent.com",
"ocsp.digicert.com",
"ocsp.geotrust.com",
"ocsp.globalsign.com",
"ocsp.identrust.com",
"ocsp.sectigo.com",
"ocsp.ssl.com",
"ocsp.thawte.com",
"ocsp.usertrust.com",
"ocsp.verisign.com",
"packagecloud.io",
"packages.cloud.google.com",
"packages.microsoft.com",
"playwright.download.prss.microsoft.com",
"ppa.launchpad.net",
"pypi.org",
"raw.githubusercontent.com",
"registry.npmjs.org",
"s.symcb.com",
"s.symcd.com",
"security.ubuntu.com",
"sentry.io",
"statsig.anthropic.com",
"ts-crl.ws.symantec.com",
"ts-ocsp.ws.symantec.com",
}
// GeminiDefaultDomains are the default domains required for Google Gemini CLI authentication and operation
var GeminiDefaultDomains = []string{
"*.googleapis.com",
"generativelanguage.googleapis.com",
"github.com",
"host.docker.internal",
"raw.githubusercontent.com",
"registry.npmjs.org",
}
// PlaywrightDomains are the domains required for Playwright browser downloads
// These domains are needed when Playwright MCP server initializes in the Docker container
var PlaywrightDomains = []string{
"cdn.playwright.dev",
"playwright.download.prss.microsoft.com",
}
// init loads the ecosystem domains from the embedded JSON
func init() {
domainsLog.Print("Loading ecosystem domains from embedded JSON")
if err := json.Unmarshal(ecosystemDomainsJSON, &ecosystemDomains); err != nil {
panic(fmt.Sprintf("failed to load ecosystem domains from JSON: %v", err))
}
domainsLog.Printf("Loaded %d ecosystem categories", len(ecosystemDomains))
}
// compoundEcosystems defines ecosystem identifiers that expand to the union of multiple
// component ecosystems. These are resolved at lookup time, so they stay in sync with
// any future changes to the component ecosystems.
var compoundEcosystems = map[string][]string{
// default-safe-outputs: the recommended baseline for URL redaction in safe-outputs.
// Covers common infrastructure certificate/OCSP hosts (via "defaults"), popular
// developer-tool and CI/CD service domains (via "dev-tools"), GitHub domains (via "github"),
// and loopback/localhost addresses (via "local").
"default-safe-outputs": {"defaults", "dev-tools", "github", "local"},
}
// getEcosystemDomains returns the domains for a given ecosystem category.
// Supports compound ecosystem identifiers (see compoundEcosystems).
// The returned list is sorted and contains unique entries.
func getEcosystemDomains(category string) []string {
// Check for compound ecosystem first
if components, ok := compoundEcosystems[category]; ok {
domainMap := make(map[string]bool)
for _, component := range components {
for _, d := range getEcosystemDomains(component) {
domainMap[d] = true
}
}
result := make([]string, 0, len(domainMap))
for d := range domainMap {
result = append(result, d)
}
sort.Strings(result)
return result
}
domains, exists := ecosystemDomains[category]
if !exists {
return []string{}
}
// Return a sorted copy to avoid external modification
result := make([]string, len(domains))
copy(result, domains)
sort.Strings(result)
return result
}
// runtimeToEcosystem maps runtime IDs to their corresponding ecosystem categories in ecosystem_domains.json
// Some runtimes share ecosystems (e.g., bun and deno use node ecosystem domains)
var runtimeToEcosystem = map[string]string{
"node": "node",
"python": "python",
"go": "go",
"java": "java",
"ruby": "ruby",
"dotnet": "dotnet",
"haskell": "haskell",
"bun": "node", // bun.sh is in the node ecosystem
"deno": "node", // deno.land is in the node ecosystem
"uv": "python", // uv is a Python package manager
"clojure": "clojure",
"dart": "dart",
"elixir": "elixir",
"kotlin": "kotlin",
"php": "php",
"scala": "scala",
"swift": "swift",
"zig": "zig",
}
// getDomainsFromRuntimes extracts ecosystem domains based on the specified runtimes
// Returns a deduplicated list of domains for all specified runtimes
func getDomainsFromRuntimes(runtimes map[string]any) []string {
if len(runtimes) == 0 {
return []string{}
}
domainMap := make(map[string]bool)
for runtimeID := range runtimes {
// Look up the ecosystem for this runtime
ecosystem, exists := runtimeToEcosystem[runtimeID]
if !exists {
domainsLog.Printf("No ecosystem mapping for runtime '%s'", runtimeID)
continue
}
// Get domains for this ecosystem
domains := getEcosystemDomains(ecosystem)
if len(domains) > 0 {
domainsLog.Printf("Runtime '%s' mapped to ecosystem '%s' with %d domains", runtimeID, ecosystem, len(domains))
for _, d := range domains {
domainMap[d] = true
}
}
}
// Convert map to sorted slice
result := make([]string, 0, len(domainMap))
for domain := range domainMap {
result = append(result, domain)
}
sort.Strings(result)
return result
}
// GetAllowedDomains returns the allowed domains from network permissions.
//
// # Behavior based on network permissions configuration:
//
// 1. No network permissions (nil):
// Returns default ecosystem domains for backwards compatibility.
//
// 2. Allowed list with "defaults" only:
// network: defaults OR network: { allowed: [defaults] }
// Returns default ecosystem domains.
//
// 3. Allowed list with multiple ecosystems:
// network:
// allowed:
// - defaults
// - github
// Processes the Allowed list, expanding all ecosystem identifiers and merging them.
//
// 4. Allowed list with custom domains:
// network:
// allowed:
// - example.com
// - python
// Processes the Allowed list, expanding ecosystem identifiers.
//
// 5. Empty Allowed list (deny-all):
// network: {} OR network: { allowed: [] }
// Returns empty slice (no network access).
//
// The returned list is sorted and deduplicated.
//
// # Supported ecosystem identifiers:
// - "defaults": basic infrastructure (certs, JSON schema, Ubuntu, package mirrors)
// - "chrome": headless Chrome/Puppeteer browser testing (*.google.com, *.googleapis.com, *.gvt1.com)
// - "clojure": Clojure/Clojars
// - "containers": container registries (Docker, GHCR, etc.)
// - "dart": Dart/Flutter ecosystem
// - "deno": Deno runtime (deno.land, jsr.io, googleapis.deno.dev, fresh.deno.dev)
// - "dotnet": .NET and NuGet ecosystem
// - "elixir": Elixir/Hex
// - "github": GitHub domains (*.githubusercontent.com, github.githubassets.com, etc.)
// - "github-actions": GitHub Actions blob storage domains
// - "go": Go ecosystem
// - "haskell": Haskell ecosystem
// - "java": Java/Maven/Gradle
// - "kotlin": Kotlin/JetBrains
// - "linux-distros": Linux distribution package repositories
// - "node": Node.js/NPM/Yarn
// - "perl": Perl/CPAN
// - "php": PHP/Composer
// - "playwright": Playwright testing framework
// - "python": Python/PyPI/Conda
// - "ruby": Ruby/RubyGems
// - "rust": Rust/Cargo/Crates
// - "scala": Scala/SBT
// - "swift": Swift/CocoaPods
// - "terraform": HashiCorp/Terraform
// - "zig": Zig
func GetAllowedDomains(network *NetworkPermissions) []string {
if network == nil {
domainsLog.Print("No network permissions specified, using defaults")
return getEcosystemDomains("defaults") // Default allow-list for backwards compatibility
}
// Handle empty allowed list (deny-all case)
if len(network.Allowed) == 0 {
domainsLog.Print("Empty allowed list, denying all network access")
return []string{} // Return empty slice, not nil
}
domainsLog.Printf("Processing %d allowed domains/ecosystems", len(network.Allowed))
// Process the allowed list, expanding ecosystem identifiers if present
// Use a map to deduplicate domains
domainMap := make(map[string]bool)
for _, domain := range network.Allowed {
// Try to get domains for this ecosystem category
ecosystemDomains := getEcosystemDomains(domain)
if len(ecosystemDomains) > 0 {
// This was an ecosystem identifier, expand it
domainsLog.Printf("Expanded ecosystem '%s' to %d domains", domain, len(ecosystemDomains))
for _, d := range ecosystemDomains {
domainMap[d] = true
}
} else {
// Add the domain as-is (regular domain name)
domainMap[domain] = true
}
}
// Convert map to sorted slice
expandedDomains := make([]string, 0, len(domainMap))
for domain := range domainMap {
expandedDomains = append(expandedDomains, domain)
}
sort.Strings(expandedDomains)
return expandedDomains
}
// ecosystemPriority defines the order in which ecosystems are checked by GetDomainEcosystem.
// More specific sub-ecosystems are listed before their parent ecosystems so that domains
// shared between multiple ecosystems resolve deterministically to the most specific one.
// For example, "node-cdns" is listed before "node" so that cdn.jsdelivr.net returns "node-cdns".
// All known ecosystems are enumerated here; any ecosystem not in this list is checked last
// in sorted order (for forward-compatibility with new entries).
var ecosystemPriority = []string{
"node-cdns", // before "node" — more specific CDN sub-ecosystem
"rust", // before "python" — crates.io/index.crates.io/static.crates.io are native Rust domains
"clojure",
"containers",
"dart",
"defaults",
"dev-tools",
"deno", // before "node" — deno-specific domains take precedence over the broader node set
"dotnet",
"elixir",
"fonts", // before "chrome" — fonts.googleapis.com is a fonts domain, not a chrome domain
"github",
"github-actions",
"go",
"haskell",
"java", // before "chrome" — maven.google.com and dl.google.com are Java domains, not chrome domains
"chrome",
"kotlin",
"linux-distros",
"local",
"node",
"perl",
"php",
"playwright",
"python",
"ruby",
"scala",
"swift",
"terraform",
"zig",
"default-safe-outputs", // compound: defaults + dev-tools + github + local
}
// GetDomainEcosystem returns the ecosystem identifier for a given domain, or empty string if not found.
// Ecosystems are checked in ecosystemPriority order so that the result is deterministic even when
// a domain appears in multiple ecosystems (e.g. cdn.jsdelivr.net is in both "node" and "node-cdns").
func GetDomainEcosystem(domain string) string {
checked := make(map[string]bool, len(ecosystemPriority))
// Check ecosystems in priority order first
for _, ecosystem := range ecosystemPriority {
checked[ecosystem] = true
domains := getEcosystemDomains(ecosystem)
for _, ecosystemDomain := range domains {
if matchesDomain(domain, ecosystemDomain) {
return ecosystem
}
}
}
// Fall back to any ecosystems not in the priority list, sorted for determinism
remaining := make([]string, 0)
for ecosystem := range ecosystemDomains {
if !checked[ecosystem] {
remaining = append(remaining, ecosystem)
}
}
sort.Strings(remaining)
for _, ecosystem := range remaining {
domains := getEcosystemDomains(ecosystem)
for _, ecosystemDomain := range domains {
if matchesDomain(domain, ecosystemDomain) {
return ecosystem
}
}
}
return "" // No ecosystem found
}
// matchesDomain checks if a domain matches a pattern (supports wildcards)
func matchesDomain(domain, pattern string) bool {
// Exact match
if domain == pattern {
return true
}
// Wildcard match
if strings.HasPrefix(pattern, "*.") {
suffix := pattern[2:] // Remove "*."
return strings.HasSuffix(domain, "."+suffix) || domain == suffix
}
return false
}
// extractHTTPMCPDomains extracts domain names from HTTP MCP server URLs in tools configuration
// Returns a slice of domain names (e.g., ["mcp.tavily.com", "api.example.com"])
func extractHTTPMCPDomains(tools map[string]any) []string {
if tools == nil {
return []string{}
}
domains := []string{}
// Iterate through tools to find HTTP MCP servers
for toolName, toolConfig := range tools {
configMap, ok := toolConfig.(map[string]any)
if !ok {
// Tool has no explicit config (e.g., github: null means local mode)
continue
}
// Special handling for GitHub MCP in remote mode
// When mode: remote is set, the URL is implicitly the hosted GitHub Copilot MCP server
if toolName == "github" {
if modeField, hasMode := configMap["mode"]; hasMode {
if modeStr, ok := modeField.(string); ok && modeStr == "remote" {
domainsLog.Printf("Detected GitHub MCP remote mode, adding %s to domains", constants.GitHubCopilotMCPDomain)
domains = append(domains, constants.GitHubCopilotMCPDomain)
continue
}
}
}
// Check if this is an HTTP MCP server
mcpType, hasType := configMap["type"].(string)
url, hasURL := configMap["url"].(string)
// HTTP MCP servers have either type: http or just a url field
isHTTPMCP := (hasType && mcpType == "http") || (!hasType && hasURL)
if isHTTPMCP && hasURL {
// Extract domain from URL (e.g., "https://mcp.tavily.com/mcp/" -> "mcp.tavily.com")
domain := stringutil.ExtractDomainFromURL(url)
if domain != "" {
domainsLog.Printf("Extracted HTTP MCP domain '%s' from tool '%s'", domain, toolName)
domains = append(domains, domain)
}
}
}
return domains
}
// extractPlaywrightDomains returns Playwright domains when Playwright tool is configured
// Returns a slice of domain names required for Playwright browser downloads
// These domains are needed when Playwright MCP server initializes in the Docker container
func extractPlaywrightDomains(tools map[string]any) []string {
if tools == nil {
return []string{}
}
// Check if Playwright tool is configured
if _, hasPlaywright := tools["playwright"]; hasPlaywright {
domainsLog.Printf("Detected Playwright tool, adding %d domains for browser downloads", len(PlaywrightDomains))
return PlaywrightDomains
}
return []string{}
}
// mergeDomainsWithNetworkToolsAndRuntimes combines default domains with NetworkPermissions, HTTP MCP server domains, and runtime ecosystem domains
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag
func mergeDomainsWithNetworkToolsAndRuntimes(defaultDomains []string, network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
domainMap := make(map[string]bool)
// Add default domains
for _, domain := range defaultDomains {
domainMap[domain] = true
}
// Add NetworkPermissions domains (if specified)
if network != nil && len(network.Allowed) > 0 {
// Expand ecosystem identifiers and add individual domains
expandedDomains := GetAllowedDomains(network)
for _, domain := range expandedDomains {
domainMap[domain] = true
}
}
// Add HTTP MCP server domains (if tools are specified)
if tools != nil {
mcpDomains := extractHTTPMCPDomains(tools)
for _, domain := range mcpDomains {
domainMap[domain] = true
}
}
// Add Playwright ecosystem domains (if Playwright tool is specified)
// This ensures browser binaries can be downloaded when Playwright initializes
if tools != nil {
playwrightDomains := extractPlaywrightDomains(tools)
for _, domain := range playwrightDomains {
domainMap[domain] = true
}
}
// Add runtime ecosystem domains (if runtimes are specified)
if runtimes != nil {
runtimeDomains := getDomainsFromRuntimes(runtimes)
for _, domain := range runtimeDomains {
domainMap[domain] = true
}
}
// Convert to sorted slice for consistent output
domains := make([]string, 0, len(domainMap))
for domain := range domainMap {
domains = append(domains, domain)
}
sort.Strings(domains)
// Join with commas for AWF --allow-domains flag
return strings.Join(domains, ",")
}
// engineDefaultDomains maps each engine to its default required domains.
// Add new engines here to avoid adding new engine-specific domain functions.
var engineDefaultDomains = map[constants.EngineName][]string{
constants.CopilotEngine: CopilotDefaultDomains,
constants.ClaudeEngine: ClaudeDefaultDomains,
constants.CodexEngine: CodexDefaultDomains,
constants.GeminiEngine: GeminiDefaultDomains,
}
// GetAllowedDomainsForEngine merges the engine's default domains with NetworkPermissions,
// HTTP MCP server domains, and runtime ecosystem domains.
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag.
// Falls back to an empty default domain list for unknown engines.
func GetAllowedDomainsForEngine(engine constants.EngineName, network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
return mergeDomainsWithNetworkToolsAndRuntimes(engineDefaultDomains[engine], network, tools, runtimes)
}
// GetCopilotAllowedDomainsWithToolsAndRuntimes merges Copilot default domains with NetworkPermissions, HTTP MCP server domains, and runtime ecosystem domains
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag
func GetCopilotAllowedDomainsWithToolsAndRuntimes(network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
return GetAllowedDomainsForEngine(constants.CopilotEngine, network, tools, runtimes)
}
// GetThreatDetectionAllowedDomains returns the minimal set of domains allowed for a Copilot
// detection run. It loads the "threat-detection" ecosystem from ecosystem_domains.json, which
// includes only the Copilot API endpoints needed for read-only threat analysis. It intentionally
// excludes registry.npmjs.org and raw.githubusercontent.com (not needed when MCP servers are
// disabled and the CLI binary is pre-installed).
// Any additional user-specified network.allowed entries are merged in (typically empty for detection).
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag.
func GetThreatDetectionAllowedDomains(network *NetworkPermissions) string {
detectionDomains := getEcosystemDomains("threat-detection")
// Pass nil tools and runtimes: detection runs with no npm/runtime ecosystem, so
// ecosystem domain expansion is intentionally skipped.
return mergeDomainsWithNetworkToolsAndRuntimes(detectionDomains, network, nil, nil)
}
// GetCodexAllowedDomainsWithToolsAndRuntimes merges Codex default domains with NetworkPermissions, HTTP MCP server domains, and runtime ecosystem domains
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag
func GetCodexAllowedDomainsWithToolsAndRuntimes(network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
return GetAllowedDomainsForEngine(constants.CodexEngine, network, tools, runtimes)
}
// GetClaudeAllowedDomainsWithToolsAndRuntimes merges Claude default domains with NetworkPermissions, HTTP MCP server domains, and runtime ecosystem domains
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag
func GetClaudeAllowedDomainsWithToolsAndRuntimes(network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
return GetAllowedDomainsForEngine(constants.ClaudeEngine, network, tools, runtimes)
}
// GetGeminiAllowedDomainsWithToolsAndRuntimes merges Gemini default domains with NetworkPermissions, HTTP MCP server domains, and runtime ecosystem domains
// Returns a deduplicated, sorted, comma-separated string suitable for AWF's --allow-domains flag
func GetGeminiAllowedDomainsWithToolsAndRuntimes(network *NetworkPermissions, tools map[string]any, runtimes map[string]any) string {
return GetAllowedDomainsForEngine(constants.GeminiEngine, network, tools, runtimes)
}
// GetBlockedDomains returns the blocked domains from network permissions
// Returns empty slice if no network permissions configured or no domains blocked
// The returned list is sorted and deduplicated
// Supports ecosystem identifiers (same as allowed domains)
func GetBlockedDomains(network *NetworkPermissions) []string {
if network == nil {
domainsLog.Print("No network permissions specified, no blocked domains")
return []string{}
}
// Handle empty blocked list
if len(network.Blocked) == 0 {
domainsLog.Print("Empty blocked list, no domains blocked")
return []string{}
}
domainsLog.Printf("Processing %d blocked domains/ecosystems", len(network.Blocked))
// Process the blocked list, expanding ecosystem identifiers if present
// Use a map to deduplicate domains
domainMap := make(map[string]bool)
for _, domain := range network.Blocked {
// Try to get domains for this ecosystem category
ecosystemDomains := getEcosystemDomains(domain)
if len(ecosystemDomains) > 0 {
// This was an ecosystem identifier, expand it
domainsLog.Printf("Expanded ecosystem '%s' to %d domains", domain, len(ecosystemDomains))
for _, d := range ecosystemDomains {
domainMap[d] = true
}
} else {
// Add the domain as-is (regular domain name)
domainMap[domain] = true
}
}
// Convert map to sorted slice
expandedDomains := make([]string, 0, len(domainMap))
for domain := range domainMap {
expandedDomains = append(expandedDomains, domain)
}
sort.Strings(expandedDomains)
return expandedDomains
}
// formatBlockedDomains formats blocked domains as a comma-separated string suitable for AWF's --block-domains flag
// Returns empty string if no blocked domains
func formatBlockedDomains(network *NetworkPermissions) string {
if network == nil {
return ""
}
blockedDomains := GetBlockedDomains(network)
if len(blockedDomains) == 0 {
return ""
}
return strings.Join(blockedDomains, ",")
}
// GetAPITargetDomains returns the set of domains to add to the allow-list when engine.api-target is set.
// For a GHES instance with api-target "api.acme.ghe.com", this returns both the API domain
// ("api.acme.ghe.com") and the base hostname ("acme.ghe.com") so that both the GitHub web UI
// and API requests pass through the firewall without manual lock file edits.
// Returns nil for empty apiTarget.
func GetAPITargetDomains(apiTarget string) []string {
if apiTarget == "" {
return nil
}
domains := []string{apiTarget}
// Derive the base hostname by stripping the first subdomain label, but only for
// API-style hostnames that start with "api.".
// e.g., "api.acme.ghe.com" → "acme.ghe.com"
// Only add the base hostname if it still looks like a multi-label hostname (contains a dot).
if strings.HasPrefix(apiTarget, "api.") {
if idx := strings.Index(apiTarget, "."); idx > 0 {
baseHost := apiTarget[idx+1:]
if strings.Contains(baseHost, ".") && baseHost != apiTarget {
domains = append(domains, baseHost)
}
}
}
return domains
}
// mergeAPITargetDomains merges the api-target domains into an existing comma-separated domain string.
// When engine.api-target is set, both the API hostname and its base hostname are added to the allow-list.
// Returns the original string unchanged when apiTarget is empty.
func mergeAPITargetDomains(domainsStr string, apiTarget string) string {
extraDomains := GetAPITargetDomains(apiTarget)
if len(extraDomains) == 0 {
return domainsStr
}
domainMap := make(map[string]bool)
for d := range strings.SplitSeq(domainsStr, ",") {
d = strings.TrimSpace(d)
if d != "" {
domainMap[d] = true
}
}
for _, d := range extraDomains {
domainMap[d] = true
}
result := make([]string, 0, len(domainMap))
for d := range domainMap {
result = append(result, d)
}
sort.Strings(result)
return strings.Join(result, ",")
}
// computeAllowedDomainsForSanitization computes the allowed domains for sanitization
// based on the engine and network configuration, matching what's provided to the firewall
func (c *Compiler) computeAllowedDomainsForSanitization(data *WorkflowData) string {
// Determine which engine is being used
var engineID string
if data.EngineConfig != nil {
engineID = data.EngineConfig.ID
} else if data.AI != "" {
engineID = data.AI
}
// Compute domains based on engine type, including tools and runtimes to match
// what's provided to the actual firewall at runtime
var base string
switch engineID {
case "copilot":
base = GetCopilotAllowedDomainsWithToolsAndRuntimes(data.NetworkPermissions, data.Tools, data.Runtimes)
case "codex":
base = GetCodexAllowedDomainsWithToolsAndRuntimes(data.NetworkPermissions, data.Tools, data.Runtimes)
case "claude":
base = GetClaudeAllowedDomainsWithToolsAndRuntimes(data.NetworkPermissions, data.Tools, data.Runtimes)
case "gemini":
base = GetGeminiAllowedDomainsWithToolsAndRuntimes(data.NetworkPermissions, data.Tools, data.Runtimes)
default:
// For other engines, use network permissions only
domains := GetAllowedDomains(data.NetworkPermissions)
base = strings.Join(domains, ",")
}
// Add Copilot API target domains so GH_AW_ALLOWED_DOMAINS stays in sync with --allow-domains.
// Resolved from engine.api-target or GITHUB_COPILOT_BASE_URL in engine.env.
if copilotAPITarget := GetCopilotAPITarget(data); copilotAPITarget != "" {
base = mergeAPITargetDomains(base, copilotAPITarget)
}
// Add Gemini API target domains so GH_AW_ALLOWED_DOMAINS stays in sync with --allow-domains.
// Resolved from GEMINI_API_BASE_URL in engine.env or default generativelanguage.googleapis.com.
if geminiAPITarget := GetGeminiAPITarget(data, engineID); geminiAPITarget != "" {
base = mergeAPITargetDomains(base, geminiAPITarget)
}
return base
}
// expandAllowedDomains expands a list of domain entries (which may include ecosystem
// identifiers like "python", "node", "dev-tools") into a deduplicated, sorted list of
// concrete domain strings. This uses the same expansion logic as network.allowed.
func expandAllowedDomains(entries []string) []string {
domainMap := make(map[string]bool)
for _, entry := range entries {
ecosystemDomains := getEcosystemDomains(entry)
if len(ecosystemDomains) > 0 {
for _, d := range ecosystemDomains {
domainMap[d] = true
}
} else {
domainMap[entry] = true
}
}
result := make([]string, 0, len(domainMap))
for d := range domainMap {
result = append(result, d)
}
sort.Strings(result)
return result
}
// computeExpandedAllowedDomainsForSanitization computes the allowed domains for URL sanitization,
// unioning the engine/network base set with the safe-outputs.allowed-domains entries.
// It always includes "localhost" and "github.com" in the result.
// The allowed-domains entries support ecosystem identifiers (same syntax as network.allowed).
func (c *Compiler) computeExpandedAllowedDomainsForSanitization(data *WorkflowData) string {
// Start from the base set (engine defaults + network.allowed + tools + runtimes)
base := c.computeAllowedDomainsForSanitization(data)
domainMap := make(map[string]bool)
// Seed from the base computation
if base != "" {
for d := range strings.SplitSeq(base, ",") {
d = strings.TrimSpace(d)
if d != "" {
domainMap[d] = true
}
}
}
// Union with allowed-domains (expanded)
if data.SafeOutputs != nil && len(data.SafeOutputs.AllowedDomains) > 0 {
for _, d := range expandAllowedDomains(data.SafeOutputs.AllowedDomains) {
domainMap[d] = true
}
}
// Always allow localhost (for local development URL references)
domainMap["localhost"] = true
// Always allow github.com (GitHub page of the current repo)
domainMap["github.com"] = true
// Produce a sorted, comma-separated result
result := make([]string, 0, len(domainMap))
for d := range domainMap {
result = append(result, d)
}
sort.Strings(result)
return strings.Join(result, ",")
}