-
Notifications
You must be signed in to change notification settings - Fork 127
[RB] Optimize git operations run by the CLI #11432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -253,18 +253,23 @@ func parseRemote(s string) (*gitRemote, error) { | |
|
|
||
| } | ||
|
|
||
| // determineDefaultBranch parses `remoteData` (the output from `git ls-remote --symref origin`) | ||
| // determineDefaultBranch parses the output from `git ls-remote --symref origin` | ||
| // and returns the HEAD branch for the repo (often `main` or `master). | ||
| // | ||
| // We expect `remoteData` to contain a string looking like | ||
| // We expect the git data to contain a string looking like | ||
| // `ref: refs/heads/main HEAD` | ||
| // and this function would return `main`. | ||
| func determineDefaultBranch(remoteData string) (string, error) { | ||
| func determineDefaultBranch(remoteName string) (string, error) { | ||
| defaultBranch := os.Getenv("GIT_REPO_DEFAULT_BRANCH") | ||
| if defaultBranch != "" { | ||
| return defaultBranch, nil | ||
| } | ||
|
|
||
| remoteData, err := runGit("ls-remote", "--symref", remoteName) | ||
| if err != nil { | ||
| return "", status.WrapErrorf(err, "git ls-remote --symref %s", remoteName) | ||
| } | ||
|
|
||
| re := regexp.MustCompile(`ref: refs/heads/(\S+)\s+HEAD`) | ||
| match := re.FindStringSubmatch(remoteData) | ||
| if len(match) > 1 { | ||
|
|
@@ -332,27 +337,14 @@ func Config() (*RepoConfig, error) { | |
| fetchURL := remote.url | ||
| log.Debugf("Using fetch URL: %s", fetchURL) | ||
|
|
||
| // Fetching remote data from GitHub can be slow. If we don't need the data | ||
| // because the user has passed in enough information manually, skip the fetch. | ||
| shouldFetchBaseRef := *runFromBranch == "" && *runFromCommit == "" | ||
| shouldFetchDefaultBranch := os.Getenv("GIT_REPO_DEFAULT_BRANCH") == "" | ||
| shouldFetchRemote := shouldFetchBaseRef || shouldFetchDefaultBranch | ||
| var remoteData string | ||
| if shouldFetchRemote { | ||
| remoteData, err = runGit("ls-remote", "--symref", remote.name) | ||
| if err != nil { | ||
| return nil, status.WrapErrorf(err, "git remote show %s", remote.name) | ||
| } | ||
| } | ||
|
|
||
| branch, commit, err := getBaseBranchAndCommit(remoteData) | ||
| defaultBranch, err := determineDefaultBranch(remote.name) | ||
| if err != nil { | ||
| return nil, status.WrapError(err, "get base branch and commit") | ||
| return nil, status.WrapError(err, "get default branch") | ||
| } | ||
|
|
||
| defaultBranch, err := determineDefaultBranch(remoteData) | ||
| branch, commit, err := getBaseBranchAndCommit(remote.name, defaultBranch) | ||
| if err != nil { | ||
| log.Warnf("Failed to fetch default branch: %s", err) | ||
| return nil, status.WrapError(err, "get base branch and commit") | ||
| } | ||
|
|
||
| repoConfig := &RepoConfig{ | ||
|
|
@@ -373,11 +365,8 @@ func Config() (*RepoConfig, error) { | |
| return repoConfig, nil | ||
| } | ||
|
|
||
| // getBaseBranchAndCommit returns the git branch and commit that the remote run | ||
| // should be based off | ||
| // | ||
| // remoteData is the output from `git remote show origin` | ||
| func getBaseBranchAndCommit(remoteData string) (branch string, commit string, err error) { | ||
| // getBaseBranchAndCommit returns the git branch and commit that should be fetched on the remote runner. | ||
| func getBaseBranchAndCommit(remoteName string, defaultBranch string) (branch string, commit string, err error) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The func comment is outdated now that remoteData is no longer a param Also I'm a little confused what this means I'm confused what it means for the remote run to be "based off" a commit - is it meant to be the merge-base commit between the default branch and the current branch tip?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the comment - LMK if that's clearer |
||
| branch = *runFromBranch | ||
| commit = *runFromCommit | ||
| if branch != "" || commit != "" { | ||
|
|
@@ -389,20 +378,17 @@ func getBaseBranchAndCommit(remoteData string) (branch string, commit string, er | |
| return "", "", fmt.Errorf("get current ref: %w", err) | ||
| } | ||
|
|
||
| currentBranchExistsRemotely := branchExistsRemotely(remoteData, currentBranch) | ||
| currentBranchExistsRemotely, err := branchTrackedRemotely(remoteName, currentBranch) | ||
| if err != nil { | ||
| log.Warnf("Failed to check if branch %s exists remotely. Falling back to running on default branch: %s", currentBranch, err) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess if we fail to check whether the remote branch exists, there's probably a deeper error going on (e.g. no network connection or GitHub is down?) What do you think about just returning the error here instead?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can fail if there is no local tracking for the branch, for example if there's a shallow clone |
||
| } | ||
| if currentBranchExistsRemotely { | ||
| currentCommitHash, err := getHeadCommitForLocalBranch("HEAD") | ||
| if err != nil { | ||
| return "", "", status.WrapError(err, "get current commit hash") | ||
| } | ||
| currentCommitHash = strings.TrimSuffix(currentCommitHash, "\n") | ||
|
|
||
| remoteCommitOutput, err := runGit("branch", "-r", "--contains", currentCommitHash) | ||
| if err != nil { | ||
| return "", "", status.WrapError(err, fmt.Sprintf("check if commit %s exists remotely", currentCommitHash)) | ||
| } | ||
| currentCommitExistsRemotely := strings.Contains(remoteCommitOutput, fmt.Sprintf("origin/%s", currentBranch)) | ||
| currentCommitExistsRemotely := commitTrackedInRemoteBranch(remoteName, currentBranch, "HEAD") | ||
| if currentCommitExistsRemotely { | ||
| currentCommitHash, err := getHeadCommitForLocalBranch("HEAD") | ||
| if err != nil { | ||
| return "", "", status.WrapError(err, "get current commit hash") | ||
| } | ||
| branch = currentBranch | ||
| commit = currentCommitHash | ||
| } | ||
|
|
@@ -412,10 +398,6 @@ func getBaseBranchAndCommit(remoteData string) (branch string, commit string, er | |
| // not be able to fetch it. In this case, use the default branch for the repo. | ||
| // Your local changes will be applied as a patchset to the remote runner. | ||
| if branch == "" || commit == "" { | ||
| defaultBranch, err := determineDefaultBranch(remoteData) | ||
| if err != nil { | ||
| return "", "", status.WrapError(err, "get default branch") | ||
| } | ||
| branch = defaultBranch | ||
|
|
||
| defaultBranchCommitHash, err := getHeadCommitForLocalBranch(defaultBranch) | ||
|
|
@@ -451,32 +433,35 @@ func getCurrentRef() (string, error) { | |
| return strings.TrimSpace(matches[1]), nil | ||
| } | ||
|
|
||
| // branchExistsRemotely parses `remoteData` (the output from “git ls-remote --symref origin) | ||
| // and returns whether `branch` is tracked remotely. | ||
| // branchTrackedRemotely returns whether the given branch exists remotely, as reflected in | ||
| // the local git state. | ||
| // | ||
| // If the branch is tracked remotely, we expect `remoteData` to contain a string looking like | ||
| // `abc123 refs/heads/my_branch` | ||
| func branchExistsRemotely(remoteData string, branch string) bool { | ||
| regex := fmt.Sprintf("\\brefs/heads/%s\\b", regexp.QuoteMeta(branch)) | ||
| re := regexp.MustCompile(regex) | ||
| return re.MatchString(remoteData) | ||
| // This will return false if there is a shallow clone and data for the requested branch | ||
| // was not fetched. | ||
| // This can be incorrect if the branch has been deleted remotely and the local | ||
| // git state hasn't been updated, though this case should be rare. | ||
| func branchTrackedRemotely(remoteName string, branch string) (bool, error) { | ||
| ref := fmt.Sprintf("refs/remotes/%s/%s", remoteName, branch) | ||
| _, err := runGit("show-ref", "--verify", ref) | ||
| if err != nil { | ||
| if strings.Contains(err.Error(), "not a valid ref") { | ||
| return false, nil | ||
| } | ||
| return false, status.WrapErrorf(err, "git show-ref --verify %s", ref) | ||
| } | ||
| return true, nil | ||
| } | ||
|
|
||
| // getHeadCommitForRemoteBranch parses `remoteData` (the output from “git ls-remote --symref origin) | ||
| // and returns the commit at HEAD for the remote branch. | ||
| // commitTrackedInRemoteBranch returns whether the given commit is tracked in the remote branch. | ||
| // It is used as a proxy for whether the commit exists remotely, and can be fetched on the remote runner. | ||
| // | ||
| // We expect `remoteData` to contain a string looking like | ||
| // | ||
| // `abc123 refs/heads/my_branch` | ||
| // and this function would return `abc123`. | ||
| func getHeadCommitForRemoteBranch(remoteData string, branch string) (string, error) { | ||
| regex := `\n(\S+)\s+refs/heads/` + branch + `\n` | ||
| re := regexp.MustCompile(regex) | ||
| match := re.FindStringSubmatch(remoteData) | ||
| if len(match) > 1 { | ||
| return match[1], nil | ||
| } | ||
| return "", status.NotFoundErrorf("failed to get HEAD commit for remote branch %s from:\n%s", branch, remoteData) | ||
| // This will return false if there is a shallow clone and data for the requested branch | ||
| // was not fetched. | ||
| // This can be incorrect if the branch has been deleted remotely and the local git state hasn't been updated, though this case should be rare. | ||
| func commitTrackedInRemoteBranch(remoteName, branch, commit string) bool { | ||
| remoteTrackingRef := fmt.Sprintf("refs/remotes/%s/%s", remoteName, branch) | ||
| _, err := runGit("merge-base", "--is-ancestor", commit, remoteTrackingRef) | ||
| return err == nil | ||
| } | ||
|
|
||
| // getHeadCommitForLocalBranch returns the commit at HEAD for the local branch. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Config()now always callsdetermineDefaultBranch(remote.name), which runsgit ls-remote --symref(the slow command noted in the PR). This happens even when--run_from_branch/--run_from_commitare provided andgetBaseBranchAndCommitwould early-return, so it can negate the intended optimization and also makes default-branch lookup failures fatal. Consider resolving the default branch lazily only when it’s actually needed (e.g. when falling back to the default branch and/or whenGIT_REPO_DEFAULT_BRANCHmust be populated), and preserve the previous non-fatal behavior if default-branch detection fails.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually think it's worth it to always fetch the default branch, because we use it for fallback snapshot behavior. The performance wins of having a fallback snapshot probably outweigh the cost of the git operation