-
Notifications
You must be signed in to change notification settings - Fork 49
T keithyao/batch purge Adds ABAC support for purge command with batched token requests #577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
02e4cf1
69c5fbb
e666217
d6779aa
6b6b106
7db0aea
eb5ea3a
b857681
8ad2ac8
f9f0762
6aa0c97
b57476b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
keithy1012 marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,8 +7,10 @@ import ( | |
| "context" | ||
| "fmt" | ||
| "net/http" | ||
| "os" | ||
| "runtime" | ||
| "sort" | ||
| "strconv" | ||
| "strings" | ||
| "time" | ||
|
|
||
|
|
@@ -228,34 +230,75 @@ func purge(ctx context.Context, | |
| dryRun bool, | ||
| includeLocked bool) (deletedTagsCount int, deletedManifestsCount int, err error) { | ||
|
|
||
| // In order to print a summary of the deleted tags/manifests the counters get updated everytime a repo is purged. | ||
| for repoName, tagRegex := range tagFilters { | ||
| var singleDeletedTagsCount int | ||
| var manifestToTagsCountMap map[string]int | ||
|
|
||
| // Handle tag deletion based on mode | ||
| if untaggedOnly { | ||
| // Initialize empty map for untagged-only mode (no tag deletion) | ||
| manifestToTagsCountMap = make(map[string]int) | ||
| } else { | ||
| // Standard mode: delete matching tags first | ||
| singleDeletedTagsCount, manifestToTagsCountMap, err = purgeTags(ctx, acrClient, repoParallelism, loginURL, repoName, agoDuration, tagRegex, keep, filterTimeout, dryRun, includeLocked) | ||
| if err != nil { | ||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to purge tags: %w", err) | ||
| // Load ABAC batch size from environment variable | ||
| abacBatchSize := 10 // default | ||
| if envVal, exists := os.LookupEnv("ABAC_BATCH_SIZE"); exists { | ||
keithy1012 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if parsed, err := strconv.Atoi(envVal); err == nil && parsed > 0 { | ||
| abacBatchSize = parsed | ||
| } | ||
| } | ||
|
|
||
| // Collect all repository names into a slice for batching | ||
| repos := make([]string, 0, len(tagFilters)) | ||
| for repoName := range tagFilters { | ||
| repos = append(repos, repoName) | ||
| } | ||
|
|
||
| // Process repositories in batches of abacBatchSize. | ||
| // For ABAC-enabled registries, we refresh the token per batch. | ||
| // For non-ABAC registries, the batching loop is harmless (no token refresh needed). | ||
| for i := 0; i < len(repos); i += abacBatchSize { | ||
| end := i + abacBatchSize | ||
| if end > len(repos) { | ||
| end = len(repos) | ||
| } | ||
| batch := repos[i:end] | ||
|
|
||
| // For ABAC registries, request a token that covers all repositories in this batch | ||
| if acrClient.IsAbac() { | ||
| if err := acrClient.RefreshTokenForAbac(ctx, batch); err != nil { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The token is only refreshed once at the start of the requests. This seems like it could be problematic as I presume these tokens time out. You should have some strategy to rotate this as time goes on.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implemented
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now the token is refreshed per batch. In practice we want to generally avoid doing the refreshes upfront and just refresh as needed on the token acquisition portion. So for example if you look at the getManifest calls you will see we call refreshAcrCLIClientToken first on those. What you could do is modify that function to refresh the token you have if its necessary dynamically at that stage.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implemented |
||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to authorize ABAC repositories batch: %w", err) | ||
keithy1012 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| singleDeletedManifestsCount := 0 | ||
| // If the untagged flag is set or untagged-only mode is enabled, delete manifests | ||
| if removeUntaggedManifests { | ||
| singleDeletedManifestsCount, err = purgeDanglingManifests(ctx, acrClient, repoParallelism, loginURL, repoName, agoDuration, keep, manifestToTagsCountMap, dryRun, includeLocked) | ||
| if err != nil { | ||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to purge manifests: %w", err) | ||
| // Process all repositories in this batch | ||
| for j, repoName := range batch { | ||
| // For ABAC registries, check if token expired and refresh for remaining repos in batch | ||
| if acrClient.IsAbac() && acrClient.IsTokenExpired() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The token can fail at any moment, for example let's say there are a lot of images inside of a particular repository that need to be scanned. We could have the token expire mid iteration and thus that repo listing and exploration would fail. My suggestion would be to give the acquirer the ability to refresh itself, so for example I see that you are not storing the token anwyhere in this part of the code, instead its maintained inside of acrClient,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking about updating acrsdk.go to handle single repo token refresh: This allows SDK methods like GetAcrTags() to auto-refresh with single repo scope when the token expires mid-operation. But we should still keep the batch-level token refresh logic in purge.go for efficiency. The batch refresh at the start of each batch reduces the number of token requests when processing multiple repositories. And to coordinate between these two, we could add a flag (e.g., NeedsBatchRefresh()) that gets set to true when a single repo refresh occurs. Then in purge.go, after processing each repository, we can check this flag and proactively refresh for the remaining repositories in the batch if needed. What do you think? |
||
| remainingRepos := batch[j:] | ||
| fmt.Printf("ABAC token expired, refreshing for remaining repositories: %v\n", remainingRepos) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This currently prints the full remaining repository list on ABAC token refresh. Two concerns:
Could we switch default output to a count-only message (e.g., "refreshing token for N remaining repositories"), and only print repository names when a purge debug/verbose mode is enabled? |
||
| if err := acrClient.RefreshTokenForAbac(ctx, remainingRepos); err != nil { | ||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to refresh ABAC token: %w", err) | ||
| } | ||
| } | ||
| tagRegex := tagFilters[repoName] | ||
| var singleDeletedTagsCount int | ||
| var manifestToTagsCountMap map[string]int | ||
|
|
||
| // Handle tag deletion based on mode | ||
| if untaggedOnly { | ||
| // Initialize empty map for untagged-only mode (no tag deletion) | ||
| manifestToTagsCountMap = make(map[string]int) | ||
| } else { | ||
| // Standard mode: delete matching tags first | ||
| singleDeletedTagsCount, manifestToTagsCountMap, err = purgeTags(ctx, acrClient, repoParallelism, loginURL, repoName, agoDuration, tagRegex, keep, filterTimeout, dryRun, includeLocked) | ||
| if err != nil { | ||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to purge tags: %w", err) | ||
| } | ||
| } | ||
|
|
||
| singleDeletedManifestsCount := 0 | ||
| // If the untagged flag is set or untagged-only mode is enabled, delete manifests | ||
| if removeUntaggedManifests { | ||
| singleDeletedManifestsCount, err = purgeDanglingManifests(ctx, acrClient, repoParallelism, loginURL, repoName, agoDuration, keep, manifestToTagsCountMap, dryRun, includeLocked) | ||
| if err != nil { | ||
| return deletedTagsCount, deletedManifestsCount, fmt.Errorf("failed to purge manifests: %w", err) | ||
| } | ||
| } | ||
| // After every repository is purged the counters are updated. | ||
| deletedTagsCount += singleDeletedTagsCount | ||
| deletedManifestsCount += singleDeletedManifestsCount | ||
| } | ||
| // After every repository is purged the counters are updated. | ||
| deletedTagsCount += singleDeletedTagsCount | ||
| deletedManifestsCount += singleDeletedManifestsCount | ||
| } | ||
|
|
||
| return deletedTagsCount, deletedManifestsCount, nil | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ package api | |
| import ( | ||
| "bytes" | ||
| "context" | ||
| "fmt" | ||
| "io/ioutil" | ||
| "strings" | ||
| "time" | ||
|
|
@@ -52,6 +53,10 @@ type AcrCLIClient struct { | |
| // accessTokenExp refers to the expiration time for the access token, it is in a unix time format represented by a | ||
| // 64 bit integer. | ||
| accessTokenExp int64 | ||
| // isAbac indicates whether this registry uses Attribute-Based Access Control (ABAC). | ||
| // ABAC registries require repository-level permissions instead of registry-wide wildcards. | ||
| // This is detected by checking if the refresh token contains the "aad_identity" claim. | ||
| isAbac bool | ||
| } | ||
|
|
||
| // LoginURL returns the FQDN for a registry. | ||
|
|
@@ -91,10 +96,29 @@ func newAcrCLIClientWithBasicAuth(loginURL string, username string, password str | |
| } | ||
|
|
||
| // newAcrCLIClientWithBearerAuth creates a client that uses bearer token authentication. | ||
| // It detects if the registry is ABAC-enabled by checking for the "aad_identity" claim in the refresh token. | ||
| // For ABAC registries, it only requests catalog access initially; repository access is requested per-batch. | ||
| // For non-ABAC registries, it requests the traditional wildcard scope for all repositories. | ||
| func newAcrCLIClientWithBearerAuth(loginURL string, refreshToken string) (AcrCLIClient, error) { | ||
| // Detect if this is an ABAC-enabled registry by checking for aad_identity claim | ||
| isAbac := hasAadIdentityClaim(refreshToken) | ||
|
|
||
| newAcrCLIClient := newAcrCLIClient(loginURL) | ||
| newAcrCLIClient.isAbac = isAbac | ||
|
|
||
| ctx := context.Background() | ||
| accessTokenResponse, err := newAcrCLIClient.AutorestClient.GetAcrAccessToken(ctx, loginURL, "registry:catalog:* repository:*:*", refreshToken) | ||
| var scope string | ||
| if isAbac { | ||
| // For ABAC registries, only request catalog access initially. | ||
| // Repository-level access will be requested on-demand per repository or batch. | ||
| // This is because ABAC registries cannot grant wildcard repository access. | ||
| scope = "registry:catalog:*" | ||
| } else { | ||
| // For non-ABAC registries, request full wildcard access to all repositories. | ||
| scope = "registry:catalog:* repository:*:*" | ||
| } | ||
|
|
||
| accessTokenResponse, err := newAcrCLIClient.AutorestClient.GetAcrAccessToken(ctx, loginURL, scope, refreshToken) | ||
| if err != nil { | ||
| return newAcrCLIClient, err | ||
| } | ||
|
|
@@ -154,6 +178,7 @@ func GetAcrCLIClientWithAuth(loginURL string, username string, password string, | |
| } | ||
|
|
||
| // refreshAcrCLIClientToken obtains a new token and gets its expiration time. | ||
| // This uses the wildcard scope and should only be called for non-ABAC registries. | ||
| func refreshAcrCLIClientToken(ctx context.Context, c *AcrCLIClient) error { | ||
| accessTokenResponse, err := c.AutorestClient.GetAcrAccessToken(ctx, c.loginURL, "repository:*:*", c.token.RefreshToken) | ||
| if err != nil { | ||
|
|
@@ -173,6 +198,72 @@ func refreshAcrCLIClientToken(ctx context.Context, c *AcrCLIClient) error { | |
| return nil | ||
| } | ||
|
|
||
| // hasAadIdentityClaim checks if a JWT token contains the "aad_identity" claim. | ||
| // The presence of this claim indicates that the registry is ABAC-enabled. | ||
| // ABAC (Attribute-Based Access Control) registries grant permissions at the repository level, | ||
| // not at the registry level, so wildcard scopes like "repository:*:*" will not work. | ||
| func hasAadIdentityClaim(tokenString string) bool { | ||
| parser := jwt.Parser{SkipClaimsValidation: true} | ||
| mapC := jwt.MapClaims{} | ||
| // We only need to check for the claim, not verify the signature | ||
| _, _, err := parser.ParseUnverified(tokenString, mapC) | ||
| if err != nil { | ||
| return false | ||
| } | ||
| _, ok := mapC["aad_identity"] | ||
| return ok | ||
| } | ||
|
|
||
| // RefreshTokenForAbac obtains a new access token scoped to specific repositories. | ||
| // This is used for ABAC-enabled registries where wildcard repository access is not allowed. | ||
| // The token will include permissions for all specified repositories. | ||
| // | ||
| // Parameters: | ||
| // - repositories: list of repository names to request access for | ||
| // | ||
| // The scope format is: "registry:catalog:* repository:<name>:pull repository:<name>:delete ..." | ||
| // This allows batching multiple repositories into a single token request for efficiency. | ||
| func (c *AcrCLIClient) RefreshTokenForAbac(ctx context.Context, repositories []string) error { | ||
| if c.token == nil { | ||
| return errors.New("no refresh token available for ABAC token refresh") | ||
| } | ||
|
|
||
| // Build the scope string for all requested repositories. | ||
| // Each repository needs pull, delete, and metadata permissions for purge operations. | ||
| // Format: "repository:repo1:pull,delete,metadata_read,metadata_write repository:repo2:pull,delete,metadata_read,metadata_write ..." | ||
| var scopeParts []string | ||
| for _, repo := range repositories { | ||
| scopeParts = append(scopeParts, fmt.Sprintf("repository:%s:pull,delete,metadata_read,metadata_write", repo)) | ||
| } | ||
| scope := strings.Join(scopeParts, " ") | ||
|
|
||
| accessTokenResponse, err := c.AutorestClient.GetAcrAccessToken(ctx, c.loginURL, scope, c.token.RefreshToken) | ||
| if err != nil { | ||
| return errors.Wrap(err, "failed to refresh token for ABAC repositories") | ||
| } | ||
|
|
||
| token := &adal.Token{ | ||
| AccessToken: *accessTokenResponse.AccessToken, | ||
| RefreshToken: c.token.RefreshToken, | ||
| } | ||
| c.token = token | ||
| c.AutorestClient.Authorizer = autorest.NewBearerAuthorizer(token) | ||
|
|
||
| exp, err := getExpiration(token.AccessToken) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| c.accessTokenExp = exp | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // IsAbac returns true if this client is connected to an ABAC-enabled registry. | ||
| // ABAC registries require repository-level token scopes instead of wildcard scopes. | ||
| func (c *AcrCLIClient) IsAbac() bool { | ||
| return c.isAbac | ||
| } | ||
|
|
||
| // getExpiration is used to obtain the expiration out of a jwt token. | ||
| func getExpiration(token string) (int64, error) { | ||
| parser := jwt.Parser{SkipClaimsValidation: true} | ||
|
|
@@ -198,6 +289,13 @@ func (c *AcrCLIClient) isExpired() bool { | |
| return (time.Now().Add(5 * time.Minute)).Unix() > c.accessTokenExp | ||
| } | ||
|
|
||
| // IsTokenExpired returns true when the token is expired or close to expiring. | ||
| // This is the public version of isExpired for use by callers that need to check | ||
| // token expiration before making batched ABAC token refresh requests. | ||
| func (c *AcrCLIClient) IsTokenExpired() bool { | ||
| return c.isExpired() | ||
| } | ||
|
|
||
| // GetAcrTags list the tags of a repository with their attributes. | ||
| func (c *AcrCLIClient) GetAcrTags(ctx context.Context, repoName string, orderBy string, last string) (*acrapi.RepositoryTagsType, error) { | ||
| if c.isExpired() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block also needs to be updated. Otherwise, when GetAcrTags detects that the token has expired, it will obtain an RBAC access token instead. This also applies to DeleteAcrTag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Additional potential ABAC expiry edge case:
Even with ABAC batch refresh in Perhaps, you could make the SDK expiry refresh ABAC-aware (repo-scoped) as well? A targeted test in |
||
|
|
@@ -348,4 +446,11 @@ type AcrCLIClientInterface interface { | |
| GetAcrManifestAttributes(ctx context.Context, repoName string, reference string) (*acrapi.ManifestAttributes, error) | ||
| UpdateAcrTagAttributes(ctx context.Context, repoName string, reference string, value *acrapi.ChangeableAttributes) (*autorest.Response, error) | ||
| UpdateAcrManifestAttributes(ctx context.Context, repoName string, reference string, value *acrapi.ChangeableAttributes) (*autorest.Response, error) | ||
|
|
||
| // IsAbac returns true if the registry uses Attribute-Based Access Control. | ||
| IsAbac() bool | ||
| // IsTokenExpired returns true if the access token is expired or close to expiring. | ||
| IsTokenExpired() bool | ||
| // RefreshTokenForAbac refreshes the access token with scopes for specific repositories. | ||
| RefreshTokenForAbac(ctx context.Context, repositories []string) error | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.