From 684ebcb620475cd4882aa9dedbf2eabf6810f5cb Mon Sep 17 00:00:00 2001 From: "Aleksey @soar Smyrnov" Date: Tue, 17 Jun 2025 23:05:31 -0700 Subject: [PATCH 1/5] feat: Add search option --- auth/auth.go | 7 ++++ cmd/git-xargs.go | 2 + common/common.go | 10 +++++ config/config.go | 4 ++ io/validate-input.go | 2 +- main.go | 2 + repository/fetch-repos.go | 79 ++++++++++++++++++++++++++++++++++++++ repository/select-repos.go | 37 ++++++++++++++++-- types/types.go | 16 +++++++- 9 files changed, 153 insertions(+), 6 deletions(-) diff --git a/auth/auth.go b/auth/auth.go index 83b1be4..14ffe86 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -24,6 +24,11 @@ type githubRepositoriesService interface { ListByOrg(ctx context.Context, org string, opts *github.RepositoryListByOrgOptions) ([]*github.Repository, *github.Response, error) } +// The go-github package satisfies this Search service's interface in production +type githubSearchService interface { + Repositories(ctx context.Context, query string, opts *github.SearchOptions) (*github.RepositoriesSearchResult, *github.Response, error) +} + // GithubClient is the data structure that is common between production code and test code. In production code, // go-github satisfies the PullRequests and Repositories service interfaces, whereas in test the concrete // implementations for these same services are mocks that return a static slice of pointers to GitHub repositories, @@ -32,12 +37,14 @@ type githubRepositoriesService interface { type GithubClient struct { PullRequests githubPullRequestService Repositories githubRepositoriesService + Search githubSearchService } func NewClient(client *github.Client) GithubClient { return GithubClient{ PullRequests: client.PullRequests, Repositories: client.Repositories, + Search: client.Search, } } diff --git a/cmd/git-xargs.go b/cmd/git-xargs.go index 2286ecc..94e2ea8 100644 --- a/cmd/git-xargs.go +++ b/cmd/git-xargs.go @@ -36,6 +36,8 @@ func parseGitXargsConfig(c *cli.Context) (*config.GitXargsConfig, error) { config.TeamReviewers = c.StringSlice("team-reviewers") config.ReposFile = c.String("repos") config.GithubOrg = c.String("github-org") + config.GithubSearchQuery = c.String("github-search") + config.GithubSearchOrg = c.String("github-search-org") config.RepoSlice = c.StringSlice("repo") config.MaxConcurrentRepos = c.Int("max-concurrent-repos") config.SecondsToSleepBetweenPRs = c.Int("seconds-between-prs") diff --git a/common/common.go b/common/common.go index e731735..52e425a 100644 --- a/common/common.go +++ b/common/common.go @@ -30,6 +30,8 @@ const ( DefaultSecondsBetweenPRs = 1 DefaultMaxPullRequestRetries = 3 DefaultSecondsToWaitWhenRateLimited = 60 + GithubSearchQueryFlagName = "github-search" + GithubSearchOrgFlagName = "github-search-org" ) var ( @@ -120,4 +122,12 @@ var ( Name: KeepClonedRepositoriesFlagName, Usage: "By default, git-xargs deletes the cloned repositories from the temp directory after the command has finished running, to save space on your machine. Pass this flag to prevent git-xargs from deleting the cloned repositories.", } + GenericGithubSearchQueryFlag = cli.StringFlag{ + Name: GithubSearchQueryFlagName, + Usage: "GitHub search query to find repositories. For example: 'is:private' to find private repos, or 'language:go' to find Go repositories. See GitHub search syntax for more options.", + } + GenericGithubSearchOrgFlag = cli.StringFlag{ + Name: GithubSearchOrgFlagName, + Usage: "When used with --github-search, limits the search to repositories within the specified organization.", + } ) diff --git a/config/config.go b/config/config.go index 34b7858..7ce2752 100644 --- a/config/config.go +++ b/config/config.go @@ -28,6 +28,8 @@ type GitXargsConfig struct { TeamReviewers []string ReposFile string GithubOrg string + GithubSearchQuery string + GithubSearchOrg string RepoSlice []string RepoFromStdIn []string Args []string @@ -61,6 +63,8 @@ func NewGitXargsConfig() *GitXargsConfig { TeamReviewers: []string{}, ReposFile: "", GithubOrg: "", + GithubSearchQuery: "", + GithubSearchOrg: "", RepoSlice: []string{}, RepoFromStdIn: []string{}, Args: []string{}, diff --git a/io/validate-input.go b/io/validate-input.go index 6c722d6..8d7fd11 100644 --- a/io/validate-input.go +++ b/io/validate-input.go @@ -8,7 +8,7 @@ import ( // EnsureValidOptionsPassed checks that user has provided one valid method for selecting repos to operate on func EnsureValidOptionsPassed(config *config.GitXargsConfig) error { - if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && len(config.RepoFromStdIn) == 0 { + if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && config.GithubSearchQuery == "" && len(config.RepoFromStdIn) == 0 { return errors.WithStackTrace(types.NoRepoSelectionsMadeErr{}) } if config.BranchName == "" { diff --git a/main.go b/main.go index 7f4f656..fba5f6b 100644 --- a/main.go +++ b/main.go @@ -79,6 +79,8 @@ func setupApp() *cli.App { common.GenericMaxConcurrentClonesFlag, common.GenericNoSkipCIFlag, common.GenericKeepClonedRepositoriesFlag, + common.GenericGithubSearchQueryFlag, + common.GenericGithubSearchOrgFlag, } app.Action = cmd.RunGitXargs diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index bcf7596..5f1adad 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -131,3 +131,82 @@ func getReposByOrg(config *config.GitXargsConfig) ([]*github.Repository, error) return allRepos, nil } + +// getReposBySearch uses GitHub's search API to find repositories matching the given query +func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + var allRepos []*github.Repository + + if config.GithubSearchQuery == "" { + return allRepos, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + } + + // Build the search query + searchQuery := config.GithubSearchQuery + + // If a specific organization is provided, add it to the query + if config.GithubSearchOrg != "" { + searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubSearchOrg) + } + + logger.WithFields(logrus.Fields{ + "Query": searchQuery, + }).Debug("Searching for repositories using GitHub Search API") + + opt := &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + var reposToAdd []*github.Repository + result, resp, err := config.GithubClient.Search.Repositories(context.Background(), searchQuery, opt) + if err != nil { + return allRepos, errors.WithStackTrace(err) + } + + repos := result.Repositories + + // Filter out archived repos if --skip-archived-repos is passed + if config.SkipArchivedRepos { + for _, repo := range repos { + if repo.GetArchived() { + logger.WithFields(logrus.Fields{ + "Name": repo.GetFullName(), + }).Debug("Skipping archived repository from search results") + + // Track repos to skip because of archived status for our final run report + config.Stats.TrackSingle(stats.ReposArchivedSkipped, repo) + } else { + reposToAdd = append(reposToAdd, repo) + } + } + } else { + reposToAdd = repos + } + + allRepos = append(allRepos, reposToAdd...) + + if resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + + repoCount := len(allRepos) + + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{Query: searchQuery}) + } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Query": searchQuery, + }).Debug("Fetched repos from GitHub Search API") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, allRepos) + + return allRepos, nil +} diff --git a/repository/select-repos.go b/repository/select-repos.go index e24513c..d6e1df4 100644 --- a/repository/select-repos.go +++ b/repository/select-repos.go @@ -21,15 +21,20 @@ const ( ExplicitReposOnCommandLine RepoSelectionCriteria = "repo-flag" ReposFilePath RepoSelectionCriteria = "repos-file" GithubOrganization RepoSelectionCriteria = "github-org" + GithubSearch RepoSelectionCriteria = "github-search" ) // getPreferredOrderOfRepoSelections codifies the order in which flags will be preferred when the user supplied more // than one: -// 1. --github-org is a string representing the GitHub org to page through via API for all repos. -// 2. --repos is a string representing a filepath to a repos file -// 3. --repo is a string slice flag that can be called multiple times -// 4. stdin allows you to pipe repos in from other CLI tools +// 1. --github-search is a string representing a GitHub search query to find repos via API +// 2. --github-org is a string representing the GitHub org to page through via API for all repos. +// 3. --repos is a string representing a filepath to a repos file +// 4. --repo is a string slice flag that can be called multiple times +// 5. stdin allows you to pipe repos in from other CLI tools func getPreferredOrderOfRepoSelections(config *config.GitXargsConfig) RepoSelectionCriteria { + if config.GithubSearchQuery != "" { + return GithubSearch + } if config.GithubOrg != "" { return GithubOrganization } @@ -72,6 +77,15 @@ func selectReposViaInput(config *config.GitXargsConfig) (*RepoSelection, error) GithubOrganizationName: config.GithubOrg, } switch getPreferredOrderOfRepoSelections(config) { + case GithubSearch: + config.Stats.SetSelectionMode(string(GithubSearch)) + + return &RepoSelection{ + SelectionType: GithubSearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + case ExplicitReposOnCommandLine: config.Stats.SetSelectionMode(string(ExplicitReposOnCommandLine)) @@ -200,6 +214,21 @@ func OperateOnRepos(config *config.GitXargsConfig) error { switch repoSelection.GetCriteria() { + case GithubSearch: + // If githubSearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubSearchQuery, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubSearchQuery) + case GithubOrganization: // If githubOrganization is set, the user did not provide a flat file or explicit repos via the -repo(s) flags, so we're just looking up all the GitHub // repos via their Organization name via the GitHub API diff --git a/types/types.go b/types/types.go index 88d61a3..46bea1b 100644 --- a/types/types.go +++ b/types/types.go @@ -67,7 +67,7 @@ func (NoGithubOrgSuppliedErr) Error() string { type NoRepoSelectionsMadeErr struct{} func (NoRepoSelectionsMadeErr) Error() string { - return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-org, --repos, or --repo flags") + return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-search, --github-org, --repos, or --repo flags") } type NoRepoFlagTargetsValid struct{} @@ -107,3 +107,17 @@ type NoGithubOauthTokenProvidedErr struct{} func (NoGithubOauthTokenProvidedErr) Error() string { return fmt.Sprintf("You must export a valid Github personal access token as GITHUB_OAUTH_TOKEN") } + +type NoGithubSearchQuerySuppliedErr struct{} + +func (NoGithubSearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub search query via the --github-search flag") +} + +type NoReposFoundFromSearchErr struct { + Query string +} + +func (err NoReposFoundFromSearchErr) Error() string { + return fmt.Sprintf("No repos found for the search query: %s", err.Query) +} From 5da004b6bd00b8474db7b3677f6038862783114c Mon Sep 17 00:00:00 2001 From: "Aleksey @soar Smyrnov" Date: Tue, 17 Jun 2025 23:16:37 -0700 Subject: [PATCH 2/5] feat: Remove unnecessary flag --- cmd/git-xargs.go | 1 - common/common.go | 5 ----- config/config.go | 2 -- main.go | 1 - repository/fetch-repos.go | 4 ++-- 5 files changed, 2 insertions(+), 11 deletions(-) diff --git a/cmd/git-xargs.go b/cmd/git-xargs.go index 94e2ea8..bbb8de5 100644 --- a/cmd/git-xargs.go +++ b/cmd/git-xargs.go @@ -37,7 +37,6 @@ func parseGitXargsConfig(c *cli.Context) (*config.GitXargsConfig, error) { config.ReposFile = c.String("repos") config.GithubOrg = c.String("github-org") config.GithubSearchQuery = c.String("github-search") - config.GithubSearchOrg = c.String("github-search-org") config.RepoSlice = c.StringSlice("repo") config.MaxConcurrentRepos = c.Int("max-concurrent-repos") config.SecondsToSleepBetweenPRs = c.Int("seconds-between-prs") diff --git a/common/common.go b/common/common.go index 52e425a..e3bb66e 100644 --- a/common/common.go +++ b/common/common.go @@ -31,7 +31,6 @@ const ( DefaultMaxPullRequestRetries = 3 DefaultSecondsToWaitWhenRateLimited = 60 GithubSearchQueryFlagName = "github-search" - GithubSearchOrgFlagName = "github-search-org" ) var ( @@ -126,8 +125,4 @@ var ( Name: GithubSearchQueryFlagName, Usage: "GitHub search query to find repositories. For example: 'is:private' to find private repos, or 'language:go' to find Go repositories. See GitHub search syntax for more options.", } - GenericGithubSearchOrgFlag = cli.StringFlag{ - Name: GithubSearchOrgFlagName, - Usage: "When used with --github-search, limits the search to repositories within the specified organization.", - } ) diff --git a/config/config.go b/config/config.go index 7ce2752..2366d64 100644 --- a/config/config.go +++ b/config/config.go @@ -29,7 +29,6 @@ type GitXargsConfig struct { ReposFile string GithubOrg string GithubSearchQuery string - GithubSearchOrg string RepoSlice []string RepoFromStdIn []string Args []string @@ -64,7 +63,6 @@ func NewGitXargsConfig() *GitXargsConfig { ReposFile: "", GithubOrg: "", GithubSearchQuery: "", - GithubSearchOrg: "", RepoSlice: []string{}, RepoFromStdIn: []string{}, Args: []string{}, diff --git a/main.go b/main.go index fba5f6b..0bb93a3 100644 --- a/main.go +++ b/main.go @@ -80,7 +80,6 @@ func setupApp() *cli.App { common.GenericNoSkipCIFlag, common.GenericKeepClonedRepositoriesFlag, common.GenericGithubSearchQueryFlag, - common.GenericGithubSearchOrgFlag, } app.Action = cmd.RunGitXargs diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index 5f1adad..579487a 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -146,8 +146,8 @@ func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, erro searchQuery := config.GithubSearchQuery // If a specific organization is provided, add it to the query - if config.GithubSearchOrg != "" { - searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubSearchOrg) + if config.GithubOrg != "" { + searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubOrg) } logger.WithFields(logrus.Fields{ From 54240c29040f285f18a1bc3a8c6a631d1a58a1f0 Mon Sep 17 00:00:00 2001 From: "Aleksey @soar Smyrnov" Date: Tue, 17 Jun 2025 23:39:21 -0700 Subject: [PATCH 3/5] feat: Add code search logic (WiP) --- auth/auth.go | 1 + common/common.go | 2 +- repository/fetch-repos.go | 164 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 161 insertions(+), 6 deletions(-) diff --git a/auth/auth.go b/auth/auth.go index 14ffe86..b8a10dd 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -27,6 +27,7 @@ type githubRepositoriesService interface { // The go-github package satisfies this Search service's interface in production type githubSearchService interface { Repositories(ctx context.Context, query string, opts *github.SearchOptions) (*github.RepositoriesSearchResult, *github.Response, error) + Code(ctx context.Context, query string, opts *github.SearchOptions) (*github.CodeSearchResult, *github.Response, error) } // GithubClient is the data structure that is common between production code and test code. In production code, diff --git a/common/common.go b/common/common.go index e3bb66e..1d72673 100644 --- a/common/common.go +++ b/common/common.go @@ -123,6 +123,6 @@ var ( } GenericGithubSearchQueryFlag = cli.StringFlag{ Name: GithubSearchQueryFlagName, - Usage: "GitHub search query to find repositories. For example: 'is:private' to find private repos, or 'language:go' to find Go repositories. See GitHub search syntax for more options.", + Usage: "GitHub search query to find repositories. Supports both repository search (e.g., 'language:go', 'is:private') and code search (e.g., 'path:Dockerfile', 'filename:package.json'). Code search will automatically extract unique repositories from matching files. See GitHub search syntax for more options.", } ) diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index 579487a..45555fe 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -3,6 +3,7 @@ package repository import ( "context" "fmt" + "strings" "github.com/gruntwork-io/git-xargs/auth" "github.com/gruntwork-io/git-xargs/config" @@ -136,11 +137,29 @@ func getReposByOrg(config *config.GitXargsConfig) ([]*github.Repository, error) func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { logger := logging.GetLogger("git-xargs") - var allRepos []*github.Repository - if config.GithubSearchQuery == "" { - return allRepos, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + } + + // Determine if this should be a code search or repository search + if isCodeSearchQuery(config.GithubSearchQuery) { + logger.WithFields(logrus.Fields{ + "Query": config.GithubSearchQuery, + }).Debug("Detected code search query, using GitHub Code Search API") + return getReposByCodeSearch(config) + } else { + logger.WithFields(logrus.Fields{ + "Query": config.GithubSearchQuery, + }).Debug("Detected repository search query, using GitHub Repository Search API") + return getReposByRepositorySearch(config) } +} + +// getReposByRepositorySearch uses GitHub's repository search API to find repositories matching the given query +func getReposByRepositorySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + var allRepos []*github.Repository // Build the search query searchQuery := config.GithubSearchQuery @@ -152,7 +171,7 @@ func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, erro logger.WithFields(logrus.Fields{ "Query": searchQuery, - }).Debug("Searching for repositories using GitHub Search API") + }).Debug("Searching for repositories using GitHub Repository Search API") opt := &github.SearchOptions{ ListOptions: github.ListOptions{ @@ -204,9 +223,144 @@ func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, erro logger.WithFields(logrus.Fields{ "Repo count": repoCount, "Query": searchQuery, - }).Debug("Fetched repos from GitHub Search API") + }).Debug("Fetched repos from GitHub Repository Search API") config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, allRepos) return allRepos, nil } + +// getReposByCodeSearch uses GitHub's code search API to find repositories containing matching code +func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + var allRepos []*github.Repository + repoMap := make(map[string]*github.Repository) // To avoid duplicates + + if config.GithubSearchQuery == "" { + return allRepos, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + } + + // Build the search query + searchQuery := config.GithubSearchQuery + + // If a specific organization is provided, add it to the query + if config.GithubOrg != "" { + searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubOrg) + } + + logger.WithFields(logrus.Fields{ + "Query": searchQuery, + }).Debug("Searching for code using GitHub Code Search API") + + opt := &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + result, resp, err := config.GithubClient.Search.Code(context.Background(), searchQuery, opt) + if err != nil { + return allRepos, errors.WithStackTrace(err) + } + + // Extract unique repositories from code search results + for _, codeResult := range result.CodeResults { + repo := codeResult.Repository + if repo != nil { + repoKey := repo.GetFullName() + + // Skip archived repos if --skip-archived-repos is passed + if config.SkipArchivedRepos && repo.GetArchived() { + logger.WithFields(logrus.Fields{ + "Name": repo.GetFullName(), + }).Debug("Skipping archived repository from code search results") + + // Track repos to skip because of archived status for our final run report + config.Stats.TrackSingle(stats.ReposArchivedSkipped, repo) + continue + } + + // Add to map to avoid duplicates + repoMap[repoKey] = repo + } + } + + if resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + + // Convert map to slice + for _, repo := range repoMap { + allRepos = append(allRepos, repo) + } + + repoCount := len(allRepos) + + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{Query: searchQuery}) + } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Query": searchQuery, + }).Debug("Fetched repos from GitHub Code Search API") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, allRepos) + + return allRepos, nil +} + +// isCodeSearchQuery determines if a query should use code search instead of repository search +// Code search queries typically contain file-specific qualifiers like path:, filename:, extension: +// or content search terms without repository-specific qualifiers +func isCodeSearchQuery(query string) bool { + codeSearchIndicators := []string{ + "path:", + "filename:", + "extension:", + "in:file", + "in:path", + } + + for _, indicator := range codeSearchIndicators { + if strings.Contains(query, indicator) { + return true + } + } + + // If the query doesn't contain typical repository search qualifiers and isn't obviously + // a repository search, it's likely a code search + repoSearchIndicators := []string{ + "language:", + "topic:", + "is:public", + "is:private", + "is:internal", + "archived:", + "fork:", + "mirror:", + "template:", + "stars:", + "forks:", + "size:", + "pushed:", + "created:", + "updated:", + } + + hasRepoIndicator := false + for _, indicator := range repoSearchIndicators { + if strings.Contains(query, indicator) { + hasRepoIndicator = true + break + } + } + + // If it has no repository indicators and contains text that could be code content, + // treat it as code search + return !hasRepoIndicator +} From c0501531d6e5360e1b43e183a16439e85906937c Mon Sep 17 00:00:00 2001 From: "Aleksey @soar Smyrnov" Date: Wed, 18 Jun 2025 00:25:59 -0700 Subject: [PATCH 4/5] feat: Split search logic between two args --- README.md | 18 +++++ cmd/git-xargs.go | 3 +- common/common.go | 13 ++-- config/config.go | 6 +- io/validate-input.go | 3 +- main.go | 3 +- repository/fetch-repos.go | 134 ++++++++++++++++++------------------- repository/select-repos.go | 88 +++++++++++++++++++----- types/types.go | 2 +- 9 files changed, 177 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index f48fbfb..399b199 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,24 @@ echo "gruntwork-io/terragrunt gruntwork-io/terratest" | git-xargs \ "$(pwd)/scripts/update-copyright-year.sh" ``` +### Option #5: Select repos via the GitHub Search API + +The repository scope can be narrowed down by using the GitHub Search API. This allows you to select repositories based on various criteria, such as language, topics, or other metadata or file content. + +- `--github-repository-search` to select repositories based on the [GitHub Repository Search API](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories) +- `--github-code-search` to select repositories based on the [GitHub Code Search API](https://docs.github.com/en/search-github/github-code-search/understanding-github-code-search-syntax) +- `--github-org` still can be used, it will add `org:` to the search query + +If both, `--github-repository-search` and `--github-code-search` are provided, the repositories will be filtered by both criteria. + +``` +git-xargs \ + --github-org \ + --github-repository-search "is:private language:go" \ + --github-code-search "filename:Dockerfile ubuntu" \ + "$(pwd)/scripts/update-copyright-year.sh" +``` + ## Notable flags `git-xargs` exposes several flags that allow you to customize its behavior to better suit your needs. For the latest info on flags, you should run `git-xargs --help`. However, a couple of the flags are worth explaining more in depth here: diff --git a/cmd/git-xargs.go b/cmd/git-xargs.go index bbb8de5..369e18b 100644 --- a/cmd/git-xargs.go +++ b/cmd/git-xargs.go @@ -36,7 +36,8 @@ func parseGitXargsConfig(c *cli.Context) (*config.GitXargsConfig, error) { config.TeamReviewers = c.StringSlice("team-reviewers") config.ReposFile = c.String("repos") config.GithubOrg = c.String("github-org") - config.GithubSearchQuery = c.String("github-search") + config.GithubRepositorySearch = c.String("github-repository-search") + config.GithubCodeSearch = c.String("github-code-search") config.RepoSlice = c.StringSlice("repo") config.MaxConcurrentRepos = c.Int("max-concurrent-repos") config.SecondsToSleepBetweenPRs = c.Int("seconds-between-prs") diff --git a/common/common.go b/common/common.go index 1d72673..5feb45d 100644 --- a/common/common.go +++ b/common/common.go @@ -30,7 +30,8 @@ const ( DefaultSecondsBetweenPRs = 1 DefaultMaxPullRequestRetries = 3 DefaultSecondsToWaitWhenRateLimited = 60 - GithubSearchQueryFlagName = "github-search" + GithubRepositorySearchFlagName = "github-repository-search" + GithubCodeSearchFlagName = "github-code-search" ) var ( @@ -121,8 +122,12 @@ var ( Name: KeepClonedRepositoriesFlagName, Usage: "By default, git-xargs deletes the cloned repositories from the temp directory after the command has finished running, to save space on your machine. Pass this flag to prevent git-xargs from deleting the cloned repositories.", } - GenericGithubSearchQueryFlag = cli.StringFlag{ - Name: GithubSearchQueryFlagName, - Usage: "GitHub search query to find repositories. Supports both repository search (e.g., 'language:go', 'is:private') and code search (e.g., 'path:Dockerfile', 'filename:package.json'). Code search will automatically extract unique repositories from matching files. See GitHub search syntax for more options.", + GenericGithubRepositorySearchFlag = cli.StringFlag{ + Name: GithubRepositorySearchFlagName, + Usage: "GitHub repository search query to find repositories (e.g., 'language:go', 'is:private', 'topic:docker'). See GitHub repository search syntax for more options.", + } + GenericGithubCodeSearchFlag = cli.StringFlag{ + Name: GithubCodeSearchFlagName, + Usage: "GitHub code search query to find repositories containing matching code (e.g., 'path:Dockerfile', 'filename:package.json', 'extension:py print'). Repositories will be extracted from code search results. See GitHub code search syntax for more options.", } ) diff --git a/config/config.go b/config/config.go index 2366d64..beccf8b 100644 --- a/config/config.go +++ b/config/config.go @@ -28,7 +28,8 @@ type GitXargsConfig struct { TeamReviewers []string ReposFile string GithubOrg string - GithubSearchQuery string + GithubRepositorySearch string + GithubCodeSearch string RepoSlice []string RepoFromStdIn []string Args []string @@ -62,7 +63,8 @@ func NewGitXargsConfig() *GitXargsConfig { TeamReviewers: []string{}, ReposFile: "", GithubOrg: "", - GithubSearchQuery: "", + GithubRepositorySearch: "", + GithubCodeSearch: "", RepoSlice: []string{}, RepoFromStdIn: []string{}, Args: []string{}, diff --git a/io/validate-input.go b/io/validate-input.go index 8d7fd11..694d748 100644 --- a/io/validate-input.go +++ b/io/validate-input.go @@ -8,7 +8,8 @@ import ( // EnsureValidOptionsPassed checks that user has provided one valid method for selecting repos to operate on func EnsureValidOptionsPassed(config *config.GitXargsConfig) error { - if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && config.GithubSearchQuery == "" && len(config.RepoFromStdIn) == 0 { + if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && + config.GithubRepositorySearch == "" && config.GithubCodeSearch == "" && len(config.RepoFromStdIn) == 0 { return errors.WithStackTrace(types.NoRepoSelectionsMadeErr{}) } if config.BranchName == "" { diff --git a/main.go b/main.go index 0bb93a3..5fbcbaa 100644 --- a/main.go +++ b/main.go @@ -79,7 +79,8 @@ func setupApp() *cli.App { common.GenericMaxConcurrentClonesFlag, common.GenericNoSkipCIFlag, common.GenericKeepClonedRepositoriesFlag, - common.GenericGithubSearchQueryFlag, + common.GenericGithubRepositorySearchFlag, + common.GenericGithubCodeSearchFlag, } app.Action = cmd.RunGitXargs diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index 45555fe..797af58 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -3,7 +3,6 @@ package repository import ( "context" "fmt" - "strings" "github.com/gruntwork-io/git-xargs/auth" "github.com/gruntwork-io/git-xargs/config" @@ -137,22 +136,68 @@ func getReposByOrg(config *config.GitXargsConfig) ([]*github.Repository, error) func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { logger := logging.GetLogger("git-xargs") - if config.GithubSearchQuery == "" { - return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + // Handle different search scenarios + if config.GithubRepositorySearch != "" && config.GithubCodeSearch != "" { + // Both searches provided - return intersection + logger.Debug("Both repository and code search queries provided, finding intersection") + return getReposByIntersection(config) + } else if config.GithubRepositorySearch != "" { + // Only repository search + return getReposByRepositorySearch(config) + } else if config.GithubCodeSearch != "" { + // Only code search + return getReposByCodeSearch(config) } - // Determine if this should be a code search or repository search - if isCodeSearchQuery(config.GithubSearchQuery) { - logger.WithFields(logrus.Fields{ - "Query": config.GithubSearchQuery, - }).Debug("Detected code search query, using GitHub Code Search API") - return getReposByCodeSearch(config) - } else { - logger.WithFields(logrus.Fields{ - "Query": config.GithubSearchQuery, - }).Debug("Detected repository search query, using GitHub Repository Search API") - return getReposByRepositorySearch(config) + return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) +} + +// getReposByIntersection finds repositories that match both repository and code search queries +func getReposByIntersection(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + // Get repositories from repository search + repoSearchRepos, err := getReposByRepositorySearch(config) + if err != nil { + return nil, err + } + + // Get repositories from code search + codeSearchRepos, err := getReposByCodeSearch(config) + if err != nil { + return nil, err + } + + // Find intersection + repoMap := make(map[string]*github.Repository) + for _, repo := range repoSearchRepos { + repoMap[repo.GetFullName()] = repo + } + + var intersectionRepos []*github.Repository + for _, repo := range codeSearchRepos { + if _, found := repoMap[repo.GetFullName()]; found { + intersectionRepos = append(intersectionRepos, repo) + } + } + + repoCount := len(intersectionRepos) + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{ + Query: fmt.Sprintf("intersection of repository search '%s' and code search '%s'", + config.GithubRepositorySearch, config.GithubCodeSearch), + }) } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Repository Query": config.GithubRepositorySearch, + "Code Query": config.GithubCodeSearch, + }).Debug("Found intersection of repository and code search results") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, intersectionRepos) + + return intersectionRepos, nil } // getReposByRepositorySearch uses GitHub's repository search API to find repositories matching the given query @@ -161,8 +206,12 @@ func getReposByRepositorySearch(config *config.GitXargsConfig) ([]*github.Reposi var allRepos []*github.Repository + if config.GithubRepositorySearch == "" { + return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + } + // Build the search query - searchQuery := config.GithubSearchQuery + searchQuery := config.GithubRepositorySearch // If a specific organization is provided, add it to the query if config.GithubOrg != "" { @@ -237,12 +286,12 @@ func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, var allRepos []*github.Repository repoMap := make(map[string]*github.Repository) // To avoid duplicates - if config.GithubSearchQuery == "" { + if config.GithubCodeSearch == "" { return allRepos, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) } // Build the search query - searchQuery := config.GithubSearchQuery + searchQuery := config.GithubCodeSearch // If a specific organization is provided, add it to the query if config.GithubOrg != "" { @@ -313,54 +362,3 @@ func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, return allRepos, nil } - -// isCodeSearchQuery determines if a query should use code search instead of repository search -// Code search queries typically contain file-specific qualifiers like path:, filename:, extension: -// or content search terms without repository-specific qualifiers -func isCodeSearchQuery(query string) bool { - codeSearchIndicators := []string{ - "path:", - "filename:", - "extension:", - "in:file", - "in:path", - } - - for _, indicator := range codeSearchIndicators { - if strings.Contains(query, indicator) { - return true - } - } - - // If the query doesn't contain typical repository search qualifiers and isn't obviously - // a repository search, it's likely a code search - repoSearchIndicators := []string{ - "language:", - "topic:", - "is:public", - "is:private", - "is:internal", - "archived:", - "fork:", - "mirror:", - "template:", - "stars:", - "forks:", - "size:", - "pushed:", - "created:", - "updated:", - } - - hasRepoIndicator := false - for _, indicator := range repoSearchIndicators { - if strings.Contains(query, indicator) { - hasRepoIndicator = true - break - } - } - - // If it has no repository indicators and contains text that could be code content, - // treat it as code search - return !hasRepoIndicator -} diff --git a/repository/select-repos.go b/repository/select-repos.go index d6e1df4..72a544e 100644 --- a/repository/select-repos.go +++ b/repository/select-repos.go @@ -21,19 +21,29 @@ const ( ExplicitReposOnCommandLine RepoSelectionCriteria = "repo-flag" ReposFilePath RepoSelectionCriteria = "repos-file" GithubOrganization RepoSelectionCriteria = "github-org" - GithubSearch RepoSelectionCriteria = "github-search" + GithubRepositorySearch RepoSelectionCriteria = "github-repository-search" + GithubCodeSearch RepoSelectionCriteria = "github-code-search" + GithubCombinedSearch RepoSelectionCriteria = "github-combined-search" ) // getPreferredOrderOfRepoSelections codifies the order in which flags will be preferred when the user supplied more // than one: -// 1. --github-search is a string representing a GitHub search query to find repos via API -// 2. --github-org is a string representing the GitHub org to page through via API for all repos. -// 3. --repos is a string representing a filepath to a repos file -// 4. --repo is a string slice flag that can be called multiple times -// 5. stdin allows you to pipe repos in from other CLI tools +// 1. Both --github-repository-search and --github-code-search (intersection of both) +// 2. --github-repository-search for repository-based searches +// 3. --github-code-search for code-based searches +// 4. --github-org is a string representing the GitHub org to page through via API for all repos. +// 5. --repos is a string representing a filepath to a repos file +// 6. --repo is a string slice flag that can be called multiple times +// 7. stdin allows you to pipe repos in from other CLI tools func getPreferredOrderOfRepoSelections(config *config.GitXargsConfig) RepoSelectionCriteria { - if config.GithubSearchQuery != "" { - return GithubSearch + if config.GithubRepositorySearch != "" && config.GithubCodeSearch != "" { + return GithubCombinedSearch + } + if config.GithubRepositorySearch != "" { + return GithubRepositorySearch + } + if config.GithubCodeSearch != "" { + return GithubCodeSearch } if config.GithubOrg != "" { return GithubOrganization @@ -77,11 +87,29 @@ func selectReposViaInput(config *config.GitXargsConfig) (*RepoSelection, error) GithubOrganizationName: config.GithubOrg, } switch getPreferredOrderOfRepoSelections(config) { - case GithubSearch: - config.Stats.SetSelectionMode(string(GithubSearch)) + case GithubRepositorySearch: + config.Stats.SetSelectionMode(string(GithubRepositorySearch)) + + return &RepoSelection{ + SelectionType: GithubRepositorySearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + + case GithubCodeSearch: + config.Stats.SetSelectionMode(string(GithubCodeSearch)) + + return &RepoSelection{ + SelectionType: GithubCodeSearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + + case GithubCombinedSearch: + config.Stats.SetSelectionMode(string(GithubCombinedSearch)) return &RepoSelection{ - SelectionType: GithubSearch, + SelectionType: GithubCombinedSearch, AllowedRepos: []*types.AllowedRepo{}, GithubOrganizationName: "", }, nil @@ -214,20 +242,50 @@ func OperateOnRepos(config *config.GitXargsConfig) error { switch repoSelection.GetCriteria() { - case GithubSearch: - // If githubSearch is set, use the GitHub Search API to find matching repositories + case GithubRepositorySearch: + // If githubRepositorySearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubRepositorySearch, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub repository search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubRepositorySearch) + + case GithubCodeSearch: + // If githubCodeSearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubCodeSearch, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub code search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubCodeSearch) + + case GithubCombinedSearch: + // If githubCombinedSearch is set, use the GitHub Search API to find matching repositories reposFetchedFromSearch, err := getReposBySearch(config) if err != nil { logger.WithFields(logrus.Fields{ "Error": err, - "Query": config.GithubSearchQuery, + "Query": config.GithubRepositorySearch + " " + config.GithubCodeSearch, }).Debug("Failure searching for repos using GitHub Search API") return err } // We gather all the repos by searching them from the GitHub API reposToIterate = reposFetchedFromSearch - logger.Debugf("Using GitHub search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubSearchQuery) + logger.Debugf("Using GitHub combined search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubRepositorySearch+" "+config.GithubCodeSearch) case GithubOrganization: // If githubOrganization is set, the user did not provide a flat file or explicit repos via the -repo(s) flags, so we're just looking up all the GitHub diff --git a/types/types.go b/types/types.go index 46bea1b..0ae0340 100644 --- a/types/types.go +++ b/types/types.go @@ -67,7 +67,7 @@ func (NoGithubOrgSuppliedErr) Error() string { type NoRepoSelectionsMadeErr struct{} func (NoRepoSelectionsMadeErr) Error() string { - return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-search, --github-org, --repos, or --repo flags") + return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-repository-search, --github-code-search, --github-org, --repos, or --repo flags") } type NoRepoFlagTargetsValid struct{} From b3f37d1ad9a7f67d80570cc530f15de497acf3ec Mon Sep 17 00:00:00 2001 From: "Aleksey @soar Smyrnov" Date: Thu, 19 Jun 2025 17:46:48 -0700 Subject: [PATCH 5/5] fix: Populate URLs from code search results properly --- repository/fetch-repos.go | 89 ++++++++++++++++++++++++++++++++++++--- types/types.go | 12 ++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index 797af58..8b2459e 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -3,6 +3,7 @@ package repository import ( "context" "fmt" + "strings" "github.com/gruntwork-io/git-xargs/auth" "github.com/gruntwork-io/git-xargs/config" @@ -207,7 +208,7 @@ func getReposByRepositorySearch(config *config.GitXargsConfig) ([]*github.Reposi var allRepos []*github.Repository if config.GithubRepositorySearch == "" { - return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + return nil, errors.WithStackTrace(types.NoGithubRepositorySearchQuerySuppliedErr{}) } // Build the search query @@ -248,11 +249,33 @@ func getReposByRepositorySearch(config *config.GitXargsConfig) ([]*github.Reposi // Track repos to skip because of archived status for our final run report config.Stats.TrackSingle(stats.ReposArchivedSkipped, repo) } else { - reposToAdd = append(reposToAdd, repo) + // Ensure complete repository data before adding + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + reposToAdd = append(reposToAdd, completeRepo) } } } else { - reposToAdd = repos + // Ensure complete repository data for all repos + for _, repo := range repos { + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + reposToAdd = append(reposToAdd, completeRepo) + } } allRepos = append(allRepos, reposToAdd...) @@ -287,7 +310,7 @@ func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, repoMap := make(map[string]*github.Repository) // To avoid duplicates if config.GithubCodeSearch == "" { - return allRepos, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) + return allRepos, errors.WithStackTrace(types.NoGithubCodeSearchQuerySuppliedErr{}) } // Build the search query @@ -342,9 +365,18 @@ func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, opt.Page = resp.NextPage } - // Convert map to slice + // Convert map to slice and ensure complete repository data for _, repo := range repoMap { - allRepos = append(allRepos, repo) + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + allRepos = append(allRepos, completeRepo) } repoCount := len(allRepos) @@ -362,3 +394,48 @@ func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, return allRepos, nil } + +// ensureCompleteRepositoryData fetches complete repository information using GitHub's Get Repository API +// This is needed because search results may not include all fields (like CloneURL) that are required +func ensureCompleteRepositoryData(config *config.GitXargsConfig, repo *github.Repository) (*github.Repository, error) { + if repo == nil || repo.FullName == nil { + return repo, nil + } + + // Check if we already have the essential fields - if so, no need to make additional API call + if repo.CloneURL != nil && repo.HTMLURL != nil && repo.URL != nil { + return repo, nil + } + + logger := logging.GetLogger("git-xargs") + + // Parse owner and repo name from FullName + parts := strings.Split(*repo.FullName, "/") + if len(parts) != 2 { + logger.WithFields(logrus.Fields{ + "FullName": *repo.FullName, + }).Debug("Invalid repository FullName format, skipping complete data fetch") + return repo, nil + } + + owner := parts[0] + repoName := parts[1] + + // Fetch complete repository data + completeRepo, _, err := config.GithubClient.Repositories.Get(context.Background(), owner, repoName) + if err != nil { + logger.WithFields(logrus.Fields{ + "Owner": owner, + "Repo": repoName, + "Error": err, + }).Debug("Failed to fetch complete repository data, using search result as-is") + return repo, nil // Return original repo if we can't fetch complete data + } + + logger.WithFields(logrus.Fields{ + "Owner": owner, + "Repo": repoName, + }).Debug("Fetched complete repository data via Get Repository API") + + return completeRepo, nil +} diff --git a/types/types.go b/types/types.go index 0ae0340..dee2de9 100644 --- a/types/types.go +++ b/types/types.go @@ -121,3 +121,15 @@ type NoReposFoundFromSearchErr struct { func (err NoReposFoundFromSearchErr) Error() string { return fmt.Sprintf("No repos found for the search query: %s", err.Query) } + +type NoGithubRepositorySearchQuerySuppliedErr struct{} + +func (NoGithubRepositorySearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub repository search query via the --github-repository-search flag") +} + +type NoGithubCodeSearchQuerySuppliedErr struct{} + +func (NoGithubCodeSearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub code search query via the --github-code-search flag") +}