From 27691ca27e0be0195e30450cc0316c2cc090dfca Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Wed, 29 Apr 2020 08:22:23 -0400 Subject: [PATCH 001/447] lib-submodule-update: add space after function name In the shell scripts in this codebase, the usual style is to include a space between the function name and the (). Add these missing spaces to conform to the usual style of the code. Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- t/lib-submodule-update.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/lib-submodule-update.sh b/t/lib-submodule-update.sh index 1dd17fc03e1203..122554acd65ebf 100755 --- a/t/lib-submodule-update.sh +++ b/t/lib-submodule-update.sh @@ -183,7 +183,7 @@ test_git_directory_is_unchanged () { ) } -test_git_directory_exists() { +test_git_directory_exists () { test -e ".git/modules/$1" && if test -f sub1/.git then @@ -309,7 +309,7 @@ test_submodule_content () { # Internal function; use test_submodule_switch() or # test_submodule_forced_switch() instead. -test_submodule_switch_common() { +test_submodule_switch_common () { command="$1" ######################### Appearing submodule ######################### # Switching to a commit letting a submodule appear creates empty dir ... @@ -629,7 +629,7 @@ test_submodule_forced_switch () { # Internal function; use test_submodule_switch_recursing_with_args() or # test_submodule_forced_switch_recursing_with_args() instead. -test_submodule_recursing_with_args_common() { +test_submodule_recursing_with_args_common () { command="$1" ######################### Appearing submodule ######################### From add245234820be6cec3f54174d44f738aa1a80f9 Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Wed, 29 Apr 2020 08:22:24 -0400 Subject: [PATCH 002/447] lib-submodule-update: consolidate --recurse-submodules Both test_submodule_switch_recursing_with_args() and test_submodule_forced_switch_recursing_with_args() call the internal function test_submodule_recursing_with_args_common() with the final argument of `--recurse-submodules`. Consolidate this duplication by appending the argument in test_submodule_recursing_with_args_common(). Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- t/lib-submodule-update.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/lib-submodule-update.sh b/t/lib-submodule-update.sh index 122554acd65ebf..bb36287803504d 100755 --- a/t/lib-submodule-update.sh +++ b/t/lib-submodule-update.sh @@ -630,7 +630,7 @@ test_submodule_forced_switch () { # Internal function; use test_submodule_switch_recursing_with_args() or # test_submodule_forced_switch_recursing_with_args() instead. test_submodule_recursing_with_args_common () { - command="$1" + command="$1 --recurse-submodules" ######################### Appearing submodule ######################### # Switching to a commit letting a submodule appear checks it out ... @@ -809,7 +809,7 @@ test_submodule_recursing_with_args_common () { # test_submodule_switch_recursing_with_args "$GIT_COMMAND" test_submodule_switch_recursing_with_args () { cmd_args="$1" - command="git $cmd_args --recurse-submodules" + command="git $cmd_args" test_submodule_recursing_with_args_common "$command" RESULTDS=success @@ -927,7 +927,7 @@ test_submodule_switch_recursing_with_args () { # away local changes in the superproject is allowed. test_submodule_forced_switch_recursing_with_args () { cmd_args="$1" - command="git $cmd_args --recurse-submodules" + command="git $cmd_args" test_submodule_recursing_with_args_common "$command" RESULT=success From 1fe10844cad39db18de18412c928a0e69b0067ae Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 4 May 2020 19:13:35 -0600 Subject: [PATCH 003/447] commit-graph.c: extract 'refs_cb_data' In subsequent patches, we are going to update a progress meter when 'add_ref_to_set()' is called, and need a convenient way to pass a 'struct progress *' in from the caller. Introduce 'refs_cb_data' as a catch-all for parameters that 'add_ref_to_set' may need, and wrap the existing single parameter in that struct. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6dc777e2f395ea..00da281f390eba 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1318,13 +1318,17 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) stop_progress(&progress); } +struct refs_cb_data { + struct oidset *commits; +}; + static int add_ref_to_set(const char *refname, const struct object_id *oid, int flags, void *cb_data) { - struct oidset *commits = (struct oidset *)cb_data; + struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - oidset_insert(commits, oid); + oidset_insert(data->commits, oid); return 0; } @@ -1333,9 +1337,13 @@ int write_commit_graph_reachable(struct object_directory *odb, const struct split_commit_graph_opts *split_opts) { struct oidset commits = OIDSET_INIT; + struct refs_cb_data data; int result; - for_each_ref(add_ref_to_set, &commits); + memset(&data, 0, sizeof(data)); + data.commits = &commits; + + for_each_ref(add_ref_to_set, &data); result = write_commit_graph(odb, NULL, &commits, flags, split_opts); From d5546726fb30b25b48f8475446261f997609ff1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 11 May 2020 11:56:15 +0000 Subject: [PATCH 004/447] line-log: remove unused fields from 'struct line_log_data' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused fields 'status', 'arg_alloc', 'arg_nr' and 'args' from 'struct line_log_data'. They were already part of the struct when it was introduced in commit 12da1d1f6 (Implement line-history search (git log -L), 2013-03-28), but as far as I can tell none of them have ever been actually used. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- line-log.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/line-log.h b/line-log.h index 8ee7a2bd4a1866..882c5055bb8001 100644 --- a/line-log.h +++ b/line-log.h @@ -46,10 +46,7 @@ void sort_and_merge_range_set(struct range_set *); struct line_log_data { struct line_log_data *next; char *path; - char status; struct range_set ranges; - int arg_alloc, arg_nr; - const char **args; struct diff_filepair *pair; struct diff_ranges diff; }; From 48da94ba374e0dca8d3a70c617060d94ce242c78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 11 May 2020 11:56:16 +0000 Subject: [PATCH 005/447] t4211-line-log: add tests for parent oids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of the tests in 't4211-line-log.sh' really check which parent object IDs are shown in the output, either implicitly as part of "Merge: ..." lines [1] or explicitly via the '%p' or '%P' format specifiers in a custom pretty format. Add two tests to 't4211-line-log.sh' to check which parent object IDs are shown, one without and one with explicitly requested parent rewriting, IOW without and with the '--parents' option. The test without '--parents' is marked as failing, because without that option parent rewriting should not be performed, and thus the parent object ID should be that of the immediate parent, just like in case of a pathspec-limited history traversal without parent rewriting. The current line-level log implementation, however, performs parent rewriting unconditionally and without a possibility to turn it off, and, consequently, it shows the object ID of the most recent ancestor that modified the given line range. In both of these new tests we only really care about the object IDs of the listed commits and their parents, but not the diffs of the line ranges; the diffs have already been thoroughly checked in the previous tests. [1] While one of the tests ('-M -L ':f:b.c' parallel-change') does list a merge commit, both of its parents happen to modify the given line range and are listed as well, so the implications of parent rewriting remained hidden and untested. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t4211-line-log.sh | 68 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index cda58186c2d2cc..ea4a9398365953 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -215,4 +215,72 @@ test_expect_success 'fancy rename following #2' ' test_cmp expect actual ' +# Create the following linear history, where each commit does what its +# subject line promises: +# +# * 66c6410 Modify func2() in file.c +# * 50834e5 Modify other-file +# * fe5851c Modify func1() in file.c +# * 8c7c7dd Add other-file +# * d5f4417 Add func1() and func2() in file.c +test_expect_success 'setup for checking line-log and parent oids' ' + git checkout --orphan parent-oids && + git reset --hard && + + cat >file.c <<-\EOF && + int func1() + { + return F1; + } + + int func2() + { + return F2; + } + EOF + git add file.c && + test_tick && + git commit -m "Add func1() and func2() in file.c" && + + echo 1 >other-file && + git add other-file && + git commit -m "Add other-file" && + + sed -e "s/F1/F1 + 1/" file.c >tmp && + mv tmp file.c && + git commit -a -m "Modify func1() in file.c" && + + echo 2 >other-file && + git commit -a -m "Modify other-file" && + + sed -e "s/F2/F2 + 2/" file.c >tmp && + mv tmp file.c && + git commit -a -m "Modify func2() in file.c" && + + head_oid=$(git rev-parse --short HEAD) && + prev_oid=$(git rev-parse --short HEAD^) && + root_oid=$(git rev-parse --short HEAD~4) +' + +# Parent oid should be from immediate parent. +test_expect_failure 'parent oids without parent rewriting' ' + cat >expect <<-EOF && + $head_oid $prev_oid Modify func2() in file.c + $root_oid Add func1() and func2() in file.c + EOF + git log --format="%h %p %s" --no-patch -L:func2:file.c >actual && + test_cmp expect actual +' + +# Parent oid should be from the most recent ancestor touching func2(), +# i.e. in this case from the root commit. +test_expect_success 'parent oids with parent rewriting' ' + cat >expect <<-EOF && + $head_oid $root_oid Modify func2() in file.c + $root_oid Add func1() and func2() in file.c + EOF + git log --format="%h %p %s" --no-patch -L:func2:file.c --parents >actual && + test_cmp expect actual +' + test_done From 3cb9d2b6f9fd2dcb17f5534fd1536682e76f734a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 11 May 2020 11:56:17 +0000 Subject: [PATCH 006/447] line-log: more responsive, incremental 'git log -L' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current line-level log implementation performs a preprocessing step in prepare_revision_walk(), during which the line_log_filter() function filters and rewrites history to keep only commits modifying the given line range. This preprocessing affects both responsiveness and correctness: - Git doesn't produce any output during this preprocessing step. Checking whether a commit modified the given line range is somewhat expensive, so depending on the size of the given revision range this preprocessing can result in a significant delay before the first commit is shown. - Limiting the number of displayed commits (e.g. 'git log -3 -L...') doesn't limit the amount of work during preprocessing, because that limit is applied during history traversal. Alas, by that point this expensive preprocessing step has already churned through the whole revision range to find all commits modifying the revision range, even though only a few of them need to be shown. - It rewrites parents, with no way to turn it off. Without the user explicitly requesting parent rewriting any parent object ID shown should be that of the immediate parent, just like in case of a pathspec-limited history traversal without parent rewriting. However, after that preprocessing step rewrote history, the subsequent "regular" history traversal (i.e. get_revision() in a loop) only sees commits modifying the given line range. Consequently, it can only show the object ID of the last ancestor that modified the given line range (which might happen to be the immediate parent, but many-many times it isn't). This patch addresses both the correctness and, at least for the common case, the responsiveness issues by integrating line-level log filtering into the regular revision walking machinery: - Make process_ranges_arbitrary_commit(), the static function in 'line-log.c' deciding whether a commit modifies the given line range, public by removing the static keyword and adding the 'line_log_' prefix, so it can be called from other parts of the revision walking machinery. - If the user didn't explicitly ask for parent rewriting (which, I believe, is the most common case): - Call this now-public function during regular history traversal, namely from get_commit_action() to ignore any commits not modifying the given line range. Note that while this check is relatively expensive, it must be performed before other, much cheaper conditions, because the tracked line range must be adjusted even when the commit will end up being ignored by other conditions. - Skip the line_log_filter() call, i.e. the expensive preprocessing step, in prepare_revision_walk(), because, thanks to the above points, the revision walking machinery is now able to filter out commits not modifying the given line range while traversing history. This way the regular history traversal sees the unmodified history, and is therefore able to print the object ids of the immediate parents of the listed commits. The eliminated preprocessing step can greatly reduce the delay before the first commit is shown, see the numbers below. - However, if the user did explicitly ask for parent rewriting via '--parents' or a similar option, then stick with the current implementation for now, i.e. perform that expensive filtering and history rewriting in the preprocessing step just like we did before, leaving the initial delay as long as it was. I tried to integrate line-level log filtering with parent rewriting into the regular history traversal, but, unfortunately, several subtleties resisted... :) Maybe someday we'll figure out how to do that, but until then at least the simple and common (i.e. without parent rewriting) 'git log -L:func:file' commands can benefit from the reduced delay. This change makes the failing 'parent oids without parent rewriting' test in 't4211-line-log.sh' succeed. The reduced delay is most noticable when there's a commit modifying the line range near the tip of a large-ish revision range: # no parent rewriting requested, no commit-graph present $ time git --no-pager log -L:read_alternate_refs:sha1-file.c -1 v2.23.0 Before: real 0m9.570s user 0m9.494s sys 0m0.076s After: real 0m0.718s user 0m0.674s sys 0m0.044s A significant part of the remaining delay is spent reading and parsing commit objects in limit_list(). With the help of the commit-graph we can eliminate most of that reading and parsing overhead, so here are the timing results of the same command as above, but this time using the commit-graph: Before: real 0m8.874s user 0m8.816s sys 0m0.057s After: real 0m0.107s user 0m0.091s sys 0m0.013s The next patch will further reduce the remaining delay. To be clear: this patch doesn't actually optimize the line-level log, but merely moves most of the work from the preprocessing step to the history traversal, so the commits modifying the line range can be shown as soon as they are processed, and the traversal can be terminated as soon as the given number of commits are shown. Consequently, listing the full history of a line range, potentially all the way to the root commit, will take the same time as before (but at least the user might start reading the output earlier). Furthermore, if the most recent commit modifying the line range is far away from the starting revision, then that initial delay will still be significant. Additional testing by Derrick Stolee: In the Linux kernel repository, the MAINTAINERS file was changed ~3,500 times across the ~915,000 commits. In addition to that edit frequency, the file itself is quite large (~18,700 lines). This means that a significant portion of the computation is taken up by computing the patch-diff of the file. This patch improves the real time it takes to output the first result quite a bit: Command: git log -L 100,200:MAINTAINERS -n 1 >/dev/null Before: 3.88 s After: 0.71 s If we drop the "-n 1" in the command, then there is no change in end-to-end process time. This is because the command still needs to walk the entire commit history, which negates the point of this patch. This is expected. As a note for future reference, the ~4.3 seconds in the old code spends ~2.6 seconds computing the patch-diffs, and the rest of the time is spent walking commits and computing diffs for which paths changed at each commit. The changed-path Bloom filters could improve the end-to-end computation time (i.e. no "-n 1" in the command). Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- line-log.c | 4 ++-- line-log.h | 2 ++ revision.c | 27 ++++++++++++++++++++++++++- t/t4211-line-log.sh | 2 +- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/line-log.c b/line-log.c index 9010e00950b379..520ee715bcdb02 100644 --- a/line-log.c +++ b/line-log.c @@ -1227,7 +1227,7 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm /* NEEDSWORK leaking like a sieve */ } -static int process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit) +int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit) { struct line_log_data *range = lookup_line_range(rev, commit); int changed = 0; @@ -1270,7 +1270,7 @@ int line_log_filter(struct rev_info *rev) while (list) { struct commit_list *to_free = NULL; commit = list->item; - if (process_ranges_arbitrary_commit(rev, commit)) { + if (line_log_process_ranges_arbitrary_commit(rev, commit)) { *pp = list; pp = &list->next; } else diff --git a/line-log.h b/line-log.h index 882c5055bb8001..82ae8d98a403bb 100644 --- a/line-log.h +++ b/line-log.h @@ -54,6 +54,8 @@ struct line_log_data { void line_log_init(struct rev_info *rev, const char *prefix, struct string_list *args); int line_log_filter(struct rev_info *rev); +int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, + struct commit *commit); int line_log_print(struct rev_info *rev, struct commit *commit); diff --git a/revision.c b/revision.c index f78c636e4d022d..3228db9af6d606 100644 --- a/revision.c +++ b/revision.c @@ -39,6 +39,8 @@ static const char *term_good; implement_shared_commit_slab(revision_sources, char *); +static inline int want_ancestry(const struct rev_info *revs); + void show_object_with_name(FILE *out, struct object *obj, const char *name) { const char *p; @@ -3511,7 +3513,14 @@ int prepare_revision_walk(struct rev_info *revs) sort_in_topological_order(&revs->commits, revs->sort_order); } else if (revs->topo_order) init_topo_walk(revs); - if (revs->line_level_traverse) + if (revs->line_level_traverse && want_ancestry(revs)) + /* + * At the moment we can only do line-level log with parent + * rewriting by performing this expensive pre-filtering step. + * If parent rewriting is not requested, then we rather + * perform the line-level log filtering during the regular + * history traversal. + */ line_log_filter(revs); if (revs->simplify_merges) simplify_merges(revs); @@ -3722,6 +3731,22 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi return commit_ignore; if (commit->object.flags & UNINTERESTING) return commit_ignore; + if (revs->line_level_traverse && !want_ancestry(revs)) { + /* + * In case of line-level log with parent rewriting + * prepare_revision_walk() already took care of all line-level + * log filtering, and there is nothing left to do here. + * + * If parent rewriting was not requested, then this is the + * place to perform the line-level log filtering. Notably, + * this check, though expensive, must come before the other, + * cheaper filtering conditions, because the tracked line + * ranges must be adjusted even when the commit will end up + * being ignored based on other conditions. + */ + if (!line_log_process_ranges_arbitrary_commit(revs, commit)) + return commit_ignore; + } if (revs->min_age != -1 && comparison_date(revs, commit) > revs->min_age) return commit_ignore; diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index ea4a9398365953..1428eae26299d6 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -263,7 +263,7 @@ test_expect_success 'setup for checking line-log and parent oids' ' ' # Parent oid should be from immediate parent. -test_expect_failure 'parent oids without parent rewriting' ' +test_expect_success 'parent oids without parent rewriting' ' cat >expect <<-EOF && $head_oid $prev_oid Modify func2() in file.c $root_oid Add func1() and func2() in file.c From 002933f3fe2b016022ebbbbb359f6aeba58309a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 11 May 2020 11:56:18 +0000 Subject: [PATCH 007/447] line-log: try to use generation number-based topo-ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous patch made it possible to perform line-level filtering during history traversal instead of in an expensive preprocessing step, but it still requires some simpler preprocessing steps, notably topo-ordering. However, nowadays we have commit-graphs storing generation numbers, which make it possible to incrementally traverse the history in topological order, without the preparatory limit_list() and sort_in_topological_order() steps; see b45424181e (revision.c: generation-based topo-order algorithm, 2018-11-01). This patch combines the two, so we can do both the topo-ordering and the line-level filtering during history traversal, eliminating even those simpler preprocessing steps, and thus further reducing the delay before showing the first commit modifying the given line range. The 'revs->limited' flag plays the central role in this, because, due to limitations of the current implementation, the generation number-based topo-ordering is only enabled when this flag remains unset. Line-level log, however, always sets this flag in setup_revisions() ever since the feature was introduced in 12da1d1f6f (Implement line-history search (git log -L), 2013-03-28). The reason for setting 'limited' is unclear, though, because the line-level log itself doesn't directly depend on it, and it doesn't affect how the limit_list() function limits the revision range. However, there is an indirect dependency: the line-level log requires topo-ordering, and the "traditional" sort_in_topological_order() requires an already limited commit list since e6c3505b44 (Make sure we generate the whole commit list before trying to sort it topologically, 2005-07-06). The new, generation numbers-based topo-ordering doesn't require a limited commit list anymore. So don't set 'revs->limited' for line-level log, unless it is really necessary, namely: - The user explicitly requested parent rewriting, because that is still done in the line_log_filter() preprocessing step (see previous patch), which requires sort_in_topological_order() and in turn limit_list() as well. - A commit-graph file is not available or it doesn't yet contain generation numbers. In these cases we had to fall back on sort_in_topological_order() and in turn limit_list(). The existing condition with generation_numbers_enabled() has already ensured that the 'limited' flag is set in these cases; this patch just makes sure that the line-level log sets 'revs->topo_order' before that condition. While the reduced delay before showing the first commit is measurable in git.git, it takes a bigger repository to make it clearly noticable. In both cases below the line ranges were chosen so that they were modified rather close to the starting revisions, so the effect of this change is most noticable. # git.git $ time git --no-pager log -L:read_alternate_refs:sha1-file.c -1 v2.23.0 Before: real 0m0.107s user 0m0.091s sys 0m0.013s After: real 0m0.058s user 0m0.050s sys 0m0.005s # linux.git $ time git --no-pager log \ -L:build_restore_work_registers:arch/mips/mm/tlbex.c -1 v5.2 Before: real 0m1.129s user 0m1.061s sys 0m0.069s After: real 0m0.096s user 0m0.087s sys 0m0.009s Additional testing by Derrick Stolee: Since this patch improves the performance for the first result, I repeated the experiment from the previous patch on the Linux kernel repository, reporting real time here: Command: git log -L 100,200:MAINTAINERS -n 1 >/dev/null Before: 0.71 s After: 0.05 s Now, we have dropped the full topo-order of all ~910,000 commits before reporting the first result. The remaining performance improvements then are: 1. Update the parent-rewriting logic to be incremental similar to how "git log --graph" behaves. 2. Use changed-path Bloom filters to reduce the time spend in the tree-diff to see if the path(s) changed. Signed-off-by: SZEDER Gábor Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- revision.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/revision.c b/revision.c index 3228db9af6d606..3356ede9a20bd3 100644 --- a/revision.c +++ b/revision.c @@ -2790,6 +2790,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (revs->diffopt.objfind) revs->simplify_history = 0; + if (revs->line_level_traverse) { + if (want_ancestry(revs)) + revs->limited = 1; + revs->topo_order = 1; + } + if (revs->topo_order && !generation_numbers_enabled(the_repository)) revs->limited = 1; @@ -2809,11 +2815,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s revs->diffopt.abbrev = revs->abbrev; - if (revs->line_level_traverse) { - revs->limited = 1; - revs->topo_order = 1; - } - diff_setup_done(&revs->diffopt); grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED, From f32dde8c12d941065be848a9f66239df96bde216 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 11 May 2020 11:56:19 +0000 Subject: [PATCH 008/447] line-log: integrate with changed-path Bloom filters The previous changes to the line-log machinery focused on making the first result appear faster. This was achieved by no longer walking the entire commit history before returning the early results. There is still another way to improve the performance: walk most commits much faster. Let's use the changed-path Bloom filters to reduce time spent computing diffs. Since the line-log computation requires opening blobs and checking the content-diff, there is still a lot of necessary computation that cannot be replaced with changed-path Bloom filters. The part that we can reduce is most effective when checking the history of a file that is deep in several directories and those directories are modified frequently. In this case, the computation to check if a commit is TREESAME to its first parent takes a large fraction of the time. That is ripe for improvement with changed-path Bloom filters. We must ensure that prepare_to_use_bloom_filters() is called in revision.c so that the bloom_filter_settings are loaded into the struct rev_info from the commit-graph. Of course, some cases are still forbidden, but in the line-log case the pathspec is provided in a different way than normal. Since multiple paths and segments could be requested, we compute the struct bloom_key data dynamically during the commit walk. This could likely be improved, but adds code complexity that is not valuable at this time. There are two cases to care about: merge commits and "ordinary" commits. Merge commits have multiple parents, but if we are TREESAME to our first parent in every range, then pass the blame for all ranges to the first parent. Ordinary commits have the same condition, but each is done slightly differently in the process_ranges_[merge|ordinary]_commit() methods. By checking if the changed-path Bloom filter can guarantee TREESAME, we can avoid that tree-diff cost. If the filter says "probably changed", then we need to run the tree-diff and then the blob-diff if there was a real edit. The Linux kernel repository is a good testing ground for the performance improvements claimed here. There are two different cases to test. The first is the "entire history" case, where we output the entire history to /dev/null to see how long it would take to compute the full line-log history. The second is the "first result" case, where we find how long it takes to show the first value, which is an indicator of how quickly a user would see responses when waiting at a terminal. To test, I selected the paths that were changed most frequently in the top 10,000 commits using this command (stolen from StackOverflow [1]): git log --pretty=format: --name-only -n 10000 | sort | \ uniq -c | sort -rg | head -10 which results in 121 MAINTAINERS 63 fs/namei.c 60 arch/x86/kvm/cpuid.c 59 fs/io_uring.c 58 arch/x86/kvm/vmx/vmx.c 51 arch/x86/kvm/x86.c 45 arch/x86/kvm/svm.c 42 fs/btrfs/disk-io.c 42 Documentation/scsi/index.rst (along with a bogus first result). It appears that the path arch/x86/kvm/svm.c was renamed, so we ignore that entry. This leaves the following results for the real command time: | | Entire History | First Result | | Path | Before | After | Before | After | |------------------------------|--------|--------|--------|--------| | MAINTAINERS | 4.26 s | 3.87 s | 0.41 s | 0.39 s | | fs/namei.c | 1.99 s | 0.99 s | 0.42 s | 0.21 s | | arch/x86/kvm/cpuid.c | 5.28 s | 1.12 s | 0.16 s | 0.09 s | | fs/io_uring.c | 4.34 s | 0.99 s | 0.94 s | 0.27 s | | arch/x86/kvm/vmx/vmx.c | 5.01 s | 1.34 s | 0.21 s | 0.12 s | | arch/x86/kvm/x86.c | 2.24 s | 1.18 s | 0.21 s | 0.14 s | | fs/btrfs/disk-io.c | 1.82 s | 1.01 s | 0.06 s | 0.05 s | | Documentation/scsi/index.rst | 3.30 s | 0.89 s | 1.46 s | 0.03 s | It is worth noting that the least speedup comes for the MAINTAINERS file which is * edited frequently, * low in the directory heirarchy, and * quite a large file. All of those points lead to spending more time doing the blob diff and less time doing the tree diff. Still, we see some improvement in that case and significant improvement in other cases. A 2-4x speedup is likely the more typical case as opposed to the small 5% change for that file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bloom.c | 5 +++++ bloom.h | 1 + line-log.c | 39 ++++++++++++++++++++++++++++++++++++++- revision.c | 5 ++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/bloom.c b/bloom.c index e2ede44126c7fd..c38d1cff0c6c62 100644 --- a/bloom.c +++ b/bloom.c @@ -138,6 +138,11 @@ void fill_bloom_key(const char *data, key->hashes[i] = hash0 + i * hash1; } +void clear_bloom_key(struct bloom_key *key) +{ + FREE_AND_NULL(key->hashes); +} + void add_key_to_filter(const struct bloom_key *key, struct bloom_filter *filter, const struct bloom_filter_settings *settings) diff --git a/bloom.h b/bloom.h index a51e3715296ebe..d0c69172e6773f 100644 --- a/bloom.h +++ b/bloom.h @@ -72,6 +72,7 @@ void fill_bloom_key(const char *data, size_t len, struct bloom_key *key, const struct bloom_filter_settings *settings); +void clear_bloom_key(struct bloom_key *key); void add_key_to_filter(const struct bloom_key *key, struct bloom_filter *filter, diff --git a/line-log.c b/line-log.c index 520ee715bcdb02..7dc411da8f6a2a 100644 --- a/line-log.c +++ b/line-log.c @@ -15,6 +15,7 @@ #include "userdiff.h" #include "line-log.h" #include "argv-array.h" +#include "bloom.h" static void range_set_grow(struct range_set *rs, size_t extra) { @@ -1146,6 +1147,37 @@ int line_log_print(struct rev_info *rev, struct commit *commit) return 1; } +static int bloom_filter_check(struct rev_info *rev, + struct commit *commit, + struct line_log_data *range) +{ + struct bloom_filter *filter; + struct bloom_key key; + int result = 0; + + if (!commit->parents) + return 1; + + if (!rev->bloom_filter_settings || + !(filter = get_bloom_filter(rev->repo, commit, 0))) + return 1; + + if (!range) + return 0; + + while (!result && range) { + fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings); + + if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings)) + result = 1; + + clear_bloom_key(&key); + range = range->next; + } + + return result; +} + static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *commit, struct line_log_data *range) { @@ -1159,6 +1191,7 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c queue_diffs(range, &rev->diffopt, &queue, commit, parent); changed = process_all_files(&parent_range, rev, &queue, range); + if (parent) add_line_range(rev, parent, parent_range); free_line_log_data(parent_range); @@ -1233,7 +1266,11 @@ int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit int changed = 0; if (range) { - if (!commit->parents || !commit->parents->next) + if (commit->parents && !bloom_filter_check(rev, commit, range)) { + struct line_log_data *prange = line_log_data_copy(range); + add_line_range(rev, commit->parents->item, prange); + clear_commit_line_range(rev, commit); + } else if (!commit->parents || !commit->parents->next) changed = process_ranges_ordinary_commit(rev, commit, range); else changed = process_ranges_merge_commit(rev, commit, range); diff --git a/revision.c b/revision.c index 3356ede9a20bd3..cbf4b61aa67e65 100644 --- a/revision.c +++ b/revision.c @@ -689,6 +689,9 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) if (!revs->bloom_filter_settings) return; + if (!revs->pruning.pathspec.nr) + return; + pi = &revs->pruning.pathspec.items[0]; last_index = pi->len - 1; @@ -3501,7 +3504,7 @@ int prepare_revision_walk(struct rev_info *revs) FOR_EACH_OBJECT_PROMISOR_ONLY); } - if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info) + if (!revs->reflog_info) prepare_to_use_bloom_filter(revs); if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) commit_list_sort_by_date(&revs->commits); From 373e9bd66e2ba468f490e5e4ec1ccbe47853f8cd Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 11 May 2020 10:43:09 -0700 Subject: [PATCH 009/447] t5551: test that GIT_TRACE_CURL redacts password Verify that when GIT_TRACE_CURL is set, Git prints out "Authorization: Basic " instead of the base64-encoded authorization details. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/t5551-http-fetch-smart.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh index 6788aefaceb8b0..acc8473a72a8e7 100755 --- a/t/t5551-http-fetch-smart.sh +++ b/t/t5551-http-fetch-smart.sh @@ -185,6 +185,18 @@ test_expect_success 'redirects send auth to new location' ' expect_askpass both user@host auth/smart/repo.git ' +test_expect_success 'GIT_TRACE_CURL redacts auth details' ' + rm -rf redact-auth trace && + set_askpass user@host pass@host && + GIT_TRACE_CURL="$(pwd)/trace" git clone --bare "$HTTPD_URL/auth/smart/repo.git" redact-auth && + expect_askpass both user@host && + + # Ensure that there is no "Basic" followed by a base64 string, but that + # the auth details are redacted + ! grep "Authorization: Basic [0-9a-zA-Z+/]" trace && + grep "Authorization: Basic " trace +' + test_expect_success 'disable dumb http on server' ' git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \ config http.getanyfile false From 7167a62b9e2f648adc11411446f876f2458722a5 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 11 May 2020 10:43:10 -0700 Subject: [PATCH 010/447] http, imap-send: stop using CURLOPT_VERBOSE Whenever GIT_CURL_VERBOSE is set, teach Git to behave as if GIT_TRACE_CURL=1 and GIT_TRACE_CURL_NO_DATA=1 is set, instead of setting CURLOPT_VERBOSE. This is to prevent inadvertent revelation of sensitive data. In particular, GIT_CURL_VERBOSE redacts neither the "Authorization" header nor any cookies specified by GIT_REDACT_COOKIES. Unifying the tracing mechanism also has the future benefit that any improvements to the tracing mechanism will benefit both users of GIT_CURL_VERBOSE and GIT_TRACE_CURL, and we do not need to remember to implement any improvement twice. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/git.txt | 2 -- http.c | 8 +++++++- http.h | 7 +++++++ imap-send.c | 2 +- t/t5551-http-fetch-smart.sh | 24 ++++++++++++++++++++++++ t/t5581-http-curl-verbose.sh | 2 +- trace.c | 20 ++++++++++++++++---- trace.h | 6 ++++++ 8 files changed, 62 insertions(+), 9 deletions(-) diff --git a/Documentation/git.txt b/Documentation/git.txt index 9d6769e95ab1d3..427ea707014958 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -721,8 +721,6 @@ of clones and fetches. Enables a curl full trace dump of all incoming and outgoing data, including descriptive information, of the git transport protocol. This is similar to doing curl `--trace-ascii` on the command line. - This option overrides setting the `GIT_CURL_VERBOSE` environment - variable. See `GIT_TRACE` for available trace output options. `GIT_TRACE_CURL_NO_DATA`:: diff --git a/http.c b/http.c index 62aa995245324d..4882c9f5b26897 100644 --- a/http.c +++ b/http.c @@ -804,6 +804,12 @@ static int curl_trace(CURL *handle, curl_infotype type, char *data, size_t size, return 0; } +void http_trace_curl_no_data(void) +{ + trace_override_envvar(&trace_curl, "1"); + trace_curl_data = 0; +} + void setup_curl_trace(CURL *handle) { if (!trace_want(&trace_curl)) @@ -993,7 +999,7 @@ static CURL *get_curl_handle(void) warning(_("Protocol restrictions not supported with cURL < 7.19.4")); #endif if (getenv("GIT_CURL_VERBOSE")) - curl_easy_setopt(result, CURLOPT_VERBOSE, 1L); + http_trace_curl_no_data(); setup_curl_trace(result); if (getenv("GIT_TRACE_CURL_NO_DATA")) trace_curl_data = 0; diff --git a/http.h b/http.h index 5e0ad724f92f3c..faf8cbb0d10e44 100644 --- a/http.h +++ b/http.h @@ -252,6 +252,13 @@ int finish_http_object_request(struct http_object_request *freq); void abort_http_object_request(struct http_object_request *freq); void release_http_object_request(struct http_object_request *freq); +/* + * Instead of using environment variables to determine if curl tracing happens, + * behave as if GIT_TRACE_CURL=1 and GIT_TRACE_CURL_NO_DATA=1 is set. Call this + * before calling setup_curl_trace(). + */ +void http_trace_curl_no_data(void); + /* setup routine for curl_easy_setopt CURLOPT_DEBUGFUNCTION */ void setup_curl_trace(CURL *handle); #endif /* HTTP_H */ diff --git a/imap-send.c b/imap-send.c index 6c54d8c29d64c0..52737546f38b65 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1464,7 +1464,7 @@ static CURL *setup_curl(struct imap_server_conf *srvc, struct credential *cred) curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L); if (0 < verbosity || getenv("GIT_CURL_VERBOSE")) - curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + http_trace_curl_no_data(); setup_curl_trace(curl); return curl; diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh index acc8473a72a8e7..be01cf7bb24516 100755 --- a/t/t5551-http-fetch-smart.sh +++ b/t/t5551-http-fetch-smart.sh @@ -197,6 +197,18 @@ test_expect_success 'GIT_TRACE_CURL redacts auth details' ' grep "Authorization: Basic " trace ' +test_expect_success 'GIT_CURL_VERBOSE redacts auth details' ' + rm -rf redact-auth trace && + set_askpass user@host pass@host && + GIT_CURL_VERBOSE=1 git clone --bare "$HTTPD_URL/auth/smart/repo.git" redact-auth 2>trace && + expect_askpass both user@host && + + # Ensure that there is no "Basic" followed by a base64 string, but that + # the auth details are redacted + ! grep "Authorization: Basic [0-9a-zA-Z+/]" trace && + grep "Authorization: Basic " trace +' + test_expect_success 'disable dumb http on server' ' git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \ config http.getanyfile false @@ -454,6 +466,18 @@ test_expect_success 'GIT_REDACT_COOKIES redacts cookies' ' ! grep "Cookie:.*Bar=2" err ' +test_expect_success 'GIT_REDACT_COOKIES redacts cookies when GIT_CURL_VERBOSE=1' ' + rm -rf clone && + echo "Set-Cookie: Foo=1" >cookies && + echo "Set-Cookie: Bar=2" >>cookies && + GIT_CURL_VERBOSE=1 GIT_REDACT_COOKIES=Bar,Baz \ + git -c "http.cookieFile=$(pwd)/cookies" clone \ + $HTTPD_URL/smart/repo.git clone 2>err && + grep "Cookie:.*Foo=1" err && + grep "Cookie:.*Bar=" err && + ! grep "Cookie:.*Bar=2" err +' + test_expect_success 'GIT_REDACT_COOKIES handles empty values' ' rm -rf clone && echo "Set-Cookie: Foo=" >cookies && diff --git a/t/t5581-http-curl-verbose.sh b/t/t5581-http-curl-verbose.sh index 5129b0724f7038..927aad08209825 100755 --- a/t/t5581-http-curl-verbose.sh +++ b/t/t5581-http-curl-verbose.sh @@ -20,7 +20,7 @@ test_expect_success 'failure in git-upload-pack is shown' ' test_might_fail env GIT_CURL_VERBOSE=1 \ git clone "$HTTPD_URL/error_git_upload_pack/smart/repo.git" \ 2>curl_log && - grep "< HTTP/1.1 500 Intentional Breakage" curl_log + grep "<= Recv header: HTTP/1.1 500 Intentional Breakage" curl_log ' test_done diff --git a/trace.c b/trace.c index b3ef0e627f8cec..f726686fd92f0b 100644 --- a/trace.c +++ b/trace.c @@ -29,7 +29,7 @@ struct trace_key trace_perf_key = TRACE_KEY_INIT(PERFORMANCE); struct trace_key trace_setup_key = TRACE_KEY_INIT(SETUP); /* Get a trace file descriptor from "key" env variable. */ -static int get_trace_fd(struct trace_key *key) +static int get_trace_fd(struct trace_key *key, const char *override_envvar) { const char *trace; @@ -37,7 +37,7 @@ static int get_trace_fd(struct trace_key *key) if (key->initialized) return key->fd; - trace = getenv(key->key); + trace = override_envvar ? override_envvar : getenv(key->key); if (!trace || !strcmp(trace, "") || !strcmp(trace, "0") || !strcasecmp(trace, "false")) @@ -68,6 +68,18 @@ static int get_trace_fd(struct trace_key *key) return key->fd; } +void trace_override_envvar(struct trace_key *key, const char *value) +{ + trace_disable(key); + key->initialized = 0; + + /* + * Invoke get_trace_fd() to initialize key using the given value + * instead of the value of the environment variable. + */ + get_trace_fd(key, value); +} + void trace_disable(struct trace_key *key) { if (key->need_close) @@ -112,7 +124,7 @@ static int prepare_trace_line(const char *file, int line, static void trace_write(struct trace_key *key, const void *buf, unsigned len) { - if (write_in_full(get_trace_fd(key), buf, len) < 0) { + if (write_in_full(get_trace_fd(key, NULL), buf, len) < 0) { warning("unable to write trace for %s: %s", key->key, strerror(errno)); trace_disable(key); @@ -383,7 +395,7 @@ void trace_repo_setup(const char *prefix) int trace_want(struct trace_key *key) { - return !!get_trace_fd(key); + return !!get_trace_fd(key, NULL); } #if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_MONOTONIC) diff --git a/trace.h b/trace.h index 9826618b331af6..0dbbad0e41cb07 100644 --- a/trace.h +++ b/trace.h @@ -101,6 +101,12 @@ void trace_repo_setup(const char *prefix); */ int trace_want(struct trace_key *key); +/** + * Enables or disables tracing for the specified key, as if the environment + * variable was set to the given value. + */ +void trace_override_envvar(struct trace_key *key, const char *value); + /** * Disables tracing for the specified key, even if the environment variable * was set. From 2dfdd705ffd8c6dd1ef75abdfb64e1a08ad35a26 Mon Sep 17 00:00:00 2001 From: Ben Keene Date: Tue, 12 May 2020 13:15:59 +0000 Subject: [PATCH 011/447] git-p4.py: fix --prepare-p4-only error with multiple commits When using git p4 submit with the --prepare-p4-only option, the program should prepare a single p4 changelist and notify the user that more commits are pending and then stop processing. A bug has been introduced by the p4-changelist hook feature that causes the program to continue to try and process all pending changelists at the same time. The function applyCommit returns True when applying the commit was successful and the program should continue. However, when the optional flag --prepare-p4-only is set, the program should stop after the first application. Change the logic in the run method for P4Submit to check for the flag --prepare-p4-only after successfully completing the applyCommit method. Be aware - this change will fix the existing test error in t9807.23 for --prepare-p4-only. However there is insufficent coverage for this flag. If more than 1 commit is pending submission to P4, the method will properly prepare the P4 changelist, however it will still exit the application with an exitcode of 1. The current documentation does not define what the exit code should be in this condition. (See: https://git-scm.com/docs/git-p4#Documentation/git-p4.txt---prepare-p4-only) Signed-off-by: Ben Keene Signed-off-by: Junio C Hamano --- git-p4.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/git-p4.py b/git-p4.py index b8b2a1679e7336..c4a4012bcc1778 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2537,11 +2537,12 @@ def run(self, args): ok = self.applyCommit(commit) if ok: applied.append(commit) - else: - if self.prepare_p4_only and i < last: - print("Processing only the first commit due to option" \ - " --prepare-p4-only") + if self.prepare_p4_only: + if i < last: + print("Processing only the first commit due to option" \ + " --prepare-p4-only") break + else: if i < last: # prompt for what to do, or use the option/variable if self.conflict_behavior == "ask": From 39f4919dc50ff05bcebd0d3c89b718aa2d46bd67 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Tue, 12 May 2020 16:42:12 -0700 Subject: [PATCH 012/447] help: add shell-path to --build-options It may be useful to know which shell Git was built to try to point to, in the event that shell-based Git commands are failing. $SHELL_PATH is set during the build and used to launch the manpage viewer, as well as by git-compat-util.h, and it's used during tests. 'git version --build-options' is encouraged for use in bug reports, so it makes sense to include this information there. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- help.c | 1 + 1 file changed, 1 insertion(+) diff --git a/help.c b/help.c index 1de9c0d589cd9b..44cee69c11c683 100644 --- a/help.c +++ b/help.c @@ -641,6 +641,7 @@ void get_version_info(struct strbuf *buf, int show_build_options) strbuf_addstr(buf, "no commit associated with this build\n"); strbuf_addf(buf, "sizeof-long: %d\n", (int)sizeof(long)); strbuf_addf(buf, "sizeof-size_t: %d\n", (int)sizeof(size_t)); + strbuf_addf(buf, "shell-path: %s\n", SHELL_PATH); /* NEEDSWORK: also save and output GIT-BUILD_OPTIONS? */ } } From 4a4804edf4d5f96407fc46eda802a3b29d991e33 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Tue, 12 May 2020 16:42:13 -0700 Subject: [PATCH 013/447] bugreport: include user interactive shell It's possible a user may complain about the way that Git interacts with their interactive shell, e.g. autocompletion or shell prompt. In that case, it's useful for us to know which shell they're using interactively. Signed-off-by: Emily Shaffer Signed-off-by: Junio C Hamano --- Documentation/git-bugreport.txt | 1 + bugreport.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt index 7fe9aef34ea86a..4afc48c4526330 100644 --- a/Documentation/git-bugreport.txt +++ b/Documentation/git-bugreport.txt @@ -29,6 +29,7 @@ The following information is captured automatically: - uname sysname, release, version, and machine strings - Compiler-specific info string - A list of enabled hooks + - $SHELL This tool is invoked via the typical Git setup process, which means that in some cases, it might not be able to launch - for example, if a relevant config file diff --git a/bugreport.c b/bugreport.c index aa8a489c35e8f4..28f4568b01f28b 100644 --- a/bugreport.c +++ b/bugreport.c @@ -9,6 +9,7 @@ static void get_system_info(struct strbuf *sys_info) { struct utsname uname_info; + char *shell = NULL; /* get git version from native cmd */ strbuf_addstr(sys_info, _("git version:\n")); @@ -29,8 +30,13 @@ static void get_system_info(struct strbuf *sys_info) strbuf_addstr(sys_info, _("compiler info: ")); get_compiler_info(sys_info); + strbuf_addstr(sys_info, _("libc info: ")); get_libc_info(sys_info); + + shell = getenv("SHELL"); + strbuf_addf(sys_info, "$SHELL (typically, interactive shell): %s\n", + shell ? shell : ""); } static void get_populated_hooks(struct strbuf *hook_info, int nongit) From a1142963710fd71e5e6e0feb0c4020e977301af7 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Wed, 13 May 2020 00:53:41 +0000 Subject: [PATCH 014/447] t1050: match object ID paths in a hash-insensitive way The pattern here looking for failures is specific to SHA-1. Let's create a variable that matches the regex or glob pattern for a path within the objects directory. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1050-large.sh | 2 +- t/test-lib.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 184b479a2111a8..7f88ea07c27819 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -64,7 +64,7 @@ test_expect_success 'add a large file or two' ' test $count = 1 && cnt=$(git show-index <"$idx" | wc -l) && test $cnt = 2 && - for l in .git/objects/??/?????????????????????????????????????? + for l in .git/objects/$OIDPATH_REGEX do test_path_is_file "$l" || continue bad=t diff --git a/t/test-lib.sh b/t/test-lib.sh index baf94546da10b3..77e9a60fcbc123 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1428,6 +1428,7 @@ test_oid_init ZERO_OID=$(test_oid zero) OID_REGEX=$(echo $ZERO_OID | sed -e 's/0/[0-9a-f]/g') +OIDPATH_REGEX=$(test_oid_to_path $ZERO_OID | sed -e 's/0/[0-9a-f]/g') EMPTY_TREE=$(test_oid empty_tree) EMPTY_BLOB=$(test_oid empty_blob) _z40=$ZERO_OID From d335ce8f24e335ffda911f01a9569f7132e64cdb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:33 -0600 Subject: [PATCH 015/447] commit-graph.c: show progress of finding reachable commits When 'git commit-graph write --reachable' is invoked, the commit-graph machinery calls 'for_each_ref()' to discover the set of reachable commits. Right now the 'add_ref_to_set' callback is not doing anything other than adding an OID to the set of known-reachable OIDs. In a subsequent commit, 'add_ref_to_set' will presumptively peel references. This operation should be fast for repositories with an up-to-date '$GIT_DIR/packed-refs', but may be slow in the general case. So that it doesn't appear that 'git commit-graph write' is idling with '--reachable' in the slow case, add a progress meter to provide some output in the meantime. In general, we don't expect a progress meter to appear at all, since peeling references with a 'packed-refs' file is quick. If it's slow and we do show a progress meter, the subsequent 'fill_oids_from_commits()' will be fast, since all of the calls to 'lookup_commit_reference_gently()' will be no-ops. Both progress meters are delayed, so it is unlikely that more than one will appear. In either case, this intermediate state will go away in a handful of patches, at which point there will be at most one progress meter. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 00da281f390eba..bd421c4f06381f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1320,6 +1320,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) struct refs_cb_data { struct oidset *commits; + struct progress *progress; }; static int add_ref_to_set(const char *refname, @@ -1329,6 +1330,9 @@ static int add_ref_to_set(const char *refname, struct refs_cb_data *data = (struct refs_cb_data *)cb_data; oidset_insert(data->commits, oid); + + display_progress(data->progress, oidset_size(data->commits)); + return 0; } @@ -1342,12 +1346,17 @@ int write_commit_graph_reachable(struct object_directory *odb, memset(&data, 0, sizeof(data)); data.commits = &commits; + if (flags & COMMIT_GRAPH_WRITE_PROGRESS) + data.progress = start_delayed_progress( + _("Collecting referenced commits"), 0); for_each_ref(add_ref_to_set, &data); result = write_commit_graph(odb, NULL, &commits, flags, split_opts); oidset_clear(&commits); + if (data.progress) + stop_progress(&data.progress); return result; } From 630cd5194e6fcf928290a5c0f85e4094559197c8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:37 -0600 Subject: [PATCH 016/447] commit-graph.c: peel refs in 'add_ref_to_set' While iterating references (to discover the set of commits to write to the commit-graph with 'git commit-graph write --reachable'), 'add_ref_to_set' can save 'fill_oids_from_commits()' some time by peeling the references beforehand. Move peeling out of 'fill_oids_from_commits()' and into 'add_ref_to_set()' to use 'peel_ref()' instead of 'deref_tag()'. Doing so allows the commit-graph machinery to use the peeled value from '$GIT_DIR/packed-refs' instead of having to load and parse tags. While we're at it, discard non-commit objects reachable from ref tips. This would be done automatically by 'fill_oids_from_commits()', but such functionality will be removed in a subsequent patch after the call to 'lookup_commit_reference_gently' is dropped (at which point a non-commit object in the commits oidset will become an error). Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index bd421c4f06381f..6098ecd5752ea4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1327,9 +1327,13 @@ static int add_ref_to_set(const char *refname, const struct object_id *oid, int flags, void *cb_data) { + struct object_id peeled; struct refs_cb_data *data = (struct refs_cb_data *)cb_data; - oidset_insert(data->commits, oid); + if (!peel_ref(refname, &peeled)) + oid = &peeled; + if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT) + oidset_insert(data->commits, oid); display_progress(data->progress, oidset_size(data->commits)); From b8615c3c63246ae4340239fbad2e6b64cbeb0fa5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Wed, 13 May 2020 00:53:42 +0000 Subject: [PATCH 017/447] Documentation: document v1 protocol object-format capability Document a capability that indicates which hash algorithms are in use by both sides of a remote connection. Use the term "object-format", since this is the term used for the repository extension as well. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/technical/protocol-capabilities.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 2b267c0da6b2d8..36ccd14f97ed46 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -176,6 +176,21 @@ agent strings are purely informative for statistics and debugging purposes, and MUST NOT be used to programmatically assume the presence or absence of particular features. +object-format +------------- + +This capability, which takes a hash algorithm as an argument, indicates +that the server supports the given hash algorithms. It may be sent +multiple times; if so, the first one given is the one used in the ref +advertisement. + +When provided by the client, this indicates that it intends to use the +given hash algorithm to communicate. The algorithm provided must be one +that the server supports. + +If this capability is not provided, it is assumed that the only +supported algorithm is SHA-1. + symref ------ From 51ca7f89f8de97388e4f7997ba07b71dffdb9178 Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Mon, 18 May 2020 11:47:18 -0400 Subject: [PATCH 018/447] remote-curl: fix typo Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- remote-curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 1c9aa3d0ab978c..6844708f382761 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -643,7 +643,7 @@ static size_t rpc_out(void *ptr, size_t eltsize, return 0; } /* - * If avail is non-zerp, the line length for the flush still + * If avail is non-zero, the line length for the flush still * hasn't been fully sent. Proceed with sending the line * length. */ From 04cc91abcbeea60f0ef25c041b412480cd2b1afe Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Mon, 18 May 2020 11:47:19 -0400 Subject: [PATCH 019/447] remote-curl: remove label indentation In the codebase, labels are aligned to the leftmost column. Remove the space-indentation from `free_specs:` to conform to this. Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- remote-curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 6844708f382761..da3e07184aed3c 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1276,7 +1276,7 @@ static void parse_push(struct strbuf *buf) if (ret) exit(128); /* error already reported */ - free_specs: +free_specs: argv_array_clear(&specs); } From fa8953cb401d2c73c280eb6c70e7a7918dbe8bfb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 18 May 2020 13:27:09 -0600 Subject: [PATCH 020/447] builtin/commit-graph.c: extract 'read_one_commit()' With either '--stdin-commits' or '--stdin-packs', the commit-graph builtin will read line-delimited input, and interpret it either as a series of commit OIDs, or pack names. In a subsequent commit, we will begin handling '--stdin-commits' differently by processing each line as it comes in, instead of in one shot at the end. To make adequate room for this additional logic, split the '--stdin-commits' case from '--stdin-packs' by only storing the input when '--stdin-packs' is given. In the case of '--stdin-commits', feed each line to a new 'read_one_commit' helper, which (for now) will merely call 'parse_oid_hex'. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 58 +++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 15fe60317c7846..a5c2332a86ee7b 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -138,12 +138,24 @@ static int write_option_parse_split(const struct option *opt, const char *arg, return 0; } +static int read_one_commit(struct oidset *commits, const char *hash) +{ + struct object_id oid; + const char *end; + + if (parse_oid_hex(hash, &oid, &end)) + return error(_("unexpected non-hex object ID: %s"), hash); + + oidset_insert(commits, &oid); + return 0; +} + static int graph_write(int argc, const char **argv) { - struct string_list *pack_indexes = NULL; + struct string_list pack_indexes = STRING_LIST_INIT_NODUP; + struct strbuf buf = STRBUF_INIT; struct oidset commits = OIDSET_INIT; struct object_directory *odb = NULL; - struct string_list lines; int result = 0; enum commit_graph_write_flags flags = 0; @@ -209,44 +221,32 @@ static int graph_write(int argc, const char **argv) return 0; } - string_list_init(&lines, 0); - if (opts.stdin_packs || opts.stdin_commits) { - struct strbuf buf = STRBUF_INIT; - + if (opts.stdin_packs) { while (strbuf_getline(&buf, stdin) != EOF) - string_list_append(&lines, strbuf_detach(&buf, NULL)); - - if (opts.stdin_packs) - pack_indexes = &lines; - if (opts.stdin_commits) { - struct string_list_item *item; - oidset_init(&commits, lines.nr); - for_each_string_list_item(item, &lines) { - struct object_id oid; - const char *end; - - if (parse_oid_hex(item->string, &oid, &end)) { - error(_("unexpected non-hex object ID: " - "%s"), item->string); - return 1; - } - - oidset_insert(&commits, &oid); + string_list_append(&pack_indexes, + strbuf_detach(&buf, NULL)); + } else if (opts.stdin_commits) { + oidset_init(&commits, 0); + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + + while (strbuf_getline(&buf, stdin) != EOF) { + if (read_one_commit(&commits, buf.buf)) { + result = 1; + goto cleanup; } - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } - - UNLEAK(buf); } if (write_commit_graph(odb, - pack_indexes, + opts.stdin_packs ? &pack_indexes : NULL, opts.stdin_commits ? &commits : NULL, flags, &split_opts)) result = 1; - UNLEAK(lines); +cleanup: + string_list_clear(&pack_indexes, 0); + strbuf_release(&buf); return result; } From 5b6653e523cd2a0357924caef339adfeab12903b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:44 -0600 Subject: [PATCH 021/447] builtin/commit-graph.c: dereference tags in builtin When given a list of commits, the commit-graph machinery calls 'lookup_commit_reference_gently()' on each element in the set and treats the resulting set of OIDs as the base over which to close for reachability. In an earlier collection of commits, the 'git commit-graph write --reachable' case made the inner-most call to 'lookup_commit_reference_gently()' by peeling references before they were passed over to the commit-graph internals. Do the analog for 'git commit-graph write --stdin-commits' by calling 'lookup_commit_reference_gently()' outside of the commit-graph machinery, making the inner-most call a noop. Since this may incur additional processing time, surround 'read_one_commit' with a progress meter to provide output to the caller. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index a5c2332a86ee7b..97eb3b72d67da4 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -6,6 +6,7 @@ #include "repository.h" #include "commit-graph.h" #include "object-store.h" +#include "progress.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), @@ -138,15 +139,24 @@ static int write_option_parse_split(const struct option *opt, const char *arg, return 0; } -static int read_one_commit(struct oidset *commits, const char *hash) +static int read_one_commit(struct oidset *commits, struct progress *progress, + const char *hash) { + struct commit *result; struct object_id oid; const char *end; if (parse_oid_hex(hash, &oid, &end)) return error(_("unexpected non-hex object ID: %s"), hash); - oidset_insert(commits, &oid); + result = lookup_commit_reference_gently(the_repository, &oid, 1); + if (result) + oidset_insert(commits, &result->object.oid); + else + return error(_("invalid commit object id: %s"), hash); + + display_progress(progress, oidset_size(commits)); + return 0; } @@ -158,6 +168,7 @@ static int graph_write(int argc, const char **argv) struct object_directory *odb = NULL; int result = 0; enum commit_graph_write_flags flags = 0; + struct progress *progress = NULL; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -228,13 +239,18 @@ static int graph_write(int argc, const char **argv) } else if (opts.stdin_commits) { oidset_init(&commits, 0); flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + if (opts.progress) + progress = start_delayed_progress( + _("Collecting commits from input"), 0); while (strbuf_getline(&buf, stdin) != EOF) { - if (read_one_commit(&commits, buf.buf)) { + if (read_one_commit(&commits, progress, buf.buf)) { result = 1; goto cleanup; } } + + } if (write_commit_graph(odb, @@ -247,6 +263,8 @@ static int graph_write(int argc, const char **argv) cleanup: string_list_clear(&pack_indexes, 0); strbuf_release(&buf); + if (progress) + stop_progress(&progress); return result; } From 0ec2d0ff07f125b7dcf0fde2b508fa8d6d35e939 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:47 -0600 Subject: [PATCH 022/447] commit-graph.c: simplify 'fill_oids_from_commits' In the previous handful of commits, both 'git commit-graph write --reachable' and '--stdin-commits' learned to peel tags down to the commits which they refer to before passing them into the commit-graph internals. This makes the call to 'lookup_commit_reference_gently()' inside of 'fill_oids_from_commits()' a noop, since all OIDs are commits by that point. As such, remove the call entirely, as well as the progress meter, which has been split and moved out to the callers in the aforementioned earlier commits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6098ecd5752ea4..aed03f4b2f7ec2 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1412,46 +1412,19 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, static int fill_oids_from_commits(struct write_commit_graph_context *ctx, struct oidset *commits) { - uint32_t i = 0; - struct strbuf progress_title = STRBUF_INIT; struct oidset_iter iter; struct object_id *oid; if (!oidset_size(commits)) return 0; - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Finding commits for commit graph from %d ref", - "Finding commits for commit graph from %d refs", - oidset_size(commits)), - oidset_size(commits)); - ctx->progress = start_delayed_progress( - progress_title.buf, - oidset_size(commits)); - } - oidset_iter_init(commits, &iter); while ((oid = oidset_iter_next(&iter))) { - struct commit *result; - - display_progress(ctx->progress, ++i); - - result = lookup_commit_reference_gently(ctx->r, oid, 1); - if (result) { - ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); - oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); - ctx->oids.nr++; - } else if (ctx->check_oids) { - error(_("invalid commit object id: %s"), - oid_to_hex(oid)); - return -1; - } + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], oid); + ctx->oids.nr++; } - stop_progress(&ctx->progress); - strbuf_release(&progress_title); - return 0; } From 1f1304d4976f54b1afa5d71b55e41c6c1b5f9ac1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:51 -0600 Subject: [PATCH 023/447] t5318: reorder test below 'graph_read_expect' In the subsequent commit, we will introduce a dependency on 'graph_read_expect' from t5318.7. Preemptively move it below 'graph_read_expect()'s definition so that the test can call it. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 39e2918a324895..89020d3d441341 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -42,15 +42,6 @@ test_expect_success 'create commits and repack' ' git repack ' -test_expect_success 'exit with correct error on bad input to --stdin-commits' ' - cd "$TRASH_DIRECTORY/full" && - echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && - test_i18ngrep "unexpected non-hex object ID: HEAD" stderr && - # valid tree OID, but not a commit OID - git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && - test_i18ngrep "invalid commit object id" stderr -' - graph_git_two_modes() { git -c core.commitGraph=true $1 >output git -c core.commitGraph=false $1 >expect @@ -91,6 +82,15 @@ graph_read_expect() { test_cmp expect output } +test_expect_success 'exit with correct error on bad input to --stdin-commits' ' + cd "$TRASH_DIRECTORY/full" && + echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && + test_i18ngrep "unexpected non-hex object ID: HEAD" stderr && + # valid tree OID, but not a commit OID + git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && + test_i18ngrep "invalid commit object id" stderr +' + test_expect_success 'write graph' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && From 2f00c355cb79ee86bddc9f2fef91ac380a6023fc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 13 May 2020 15:59:55 -0600 Subject: [PATCH 024/447] commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in 'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on receiving non-commit OIDs as input to '--stdin-commits'. This behavior can be cumbersome to work around in, say, the case of piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if the caller does not want to cull out non-commits themselves. In this situation, it would be ideal if 'git commit-graph write' wrote the graph containing the inputs that did pertain to commits, and silently ignored the remainder of the input. Some options have been proposed to the effect of '--[no-]check-oids' which would allow callers to have the commit-graph builtin do just that. After some discussion, it is difficult to imagine a caller who wouldn't want to pass '--no-check-oids', suggesting that we should get rid of the behavior of complaining about non-commit inputs altogether. If callers do wish to retain this behavior, they can easily work around this change by doing the following: git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' | awk ' !/commit/ { print "not-a-commit:"$1 } /commit/ { print $1 } ' | git commit-graph write --stdin-commits To make it so that valid OIDs that refer to non-existent objects are indeed an error after loosening the error handling, perform an extra lookup to make sure that object indeed exists before sending it to the commit-graph internals. Helped-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 6 ++++-- builtin/commit-graph.c | 15 ++++++++------- commit-graph.c | 2 -- commit-graph.h | 4 +--- t/t5318-commit-graph.sh | 15 +++++++++++---- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 53a650225a8b4d..fcac7d12e1c296 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -47,8 +47,10 @@ with `--stdin-commits` or `--reachable`.) + With the `--stdin-commits` option, generate the new commit graph by walking commits starting at the commits specified in stdin as a list -of OIDs in hex, one OID per line. (Cannot be combined with -`--stdin-packs` or `--reachable`.) +of OIDs in hex, one OID per line. OIDs that resolve to non-commits +(either directly, or by peeling tags) are silently ignored. OIDs that +are malformed, or do not exist generate an error. (Cannot be combined +with `--stdin-packs` or `--reachable`.) + With the `--reachable` option, generate the new commit graph by walking commits starting at all refs. (Cannot be combined with `--stdin-commits` diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 97eb3b72d67da4..75455da138d5f6 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -7,6 +7,7 @@ #include "commit-graph.h" #include "object-store.h" #include "progress.h" +#include "tag.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), @@ -142,18 +143,19 @@ static int write_option_parse_split(const struct option *opt, const char *arg, static int read_one_commit(struct oidset *commits, struct progress *progress, const char *hash) { - struct commit *result; + struct object *result; struct object_id oid; const char *end; if (parse_oid_hex(hash, &oid, &end)) return error(_("unexpected non-hex object ID: %s"), hash); - result = lookup_commit_reference_gently(the_repository, &oid, 1); - if (result) - oidset_insert(commits, &result->object.oid); - else - return error(_("invalid commit object id: %s"), hash); + result = deref_tag(the_repository, parse_object(the_repository, &oid), + NULL, 0); + if (!result) + return error(_("invalid object: %s"), hash); + else if (object_as_type(the_repository, result, OBJ_COMMIT, 1)) + oidset_insert(commits, &result->oid); display_progress(progress, oidset_size(commits)); @@ -238,7 +240,6 @@ static int graph_write(int argc, const char **argv) strbuf_detach(&buf, NULL)); } else if (opts.stdin_commits) { oidset_init(&commits, 0); - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; if (opts.progress) progress = start_delayed_progress( _("Collecting commits from input"), 0); diff --git a/commit-graph.c b/commit-graph.c index aed03f4b2f7ec2..5df3e08718bdb9 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -880,7 +880,6 @@ struct write_commit_graph_context { unsigned append:1, report_progress:1, split:1, - check_oids:1, changed_paths:1, order_by_pack:1; @@ -2002,7 +2001,6 @@ int write_commit_graph(struct object_directory *odb, ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; - ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; ctx->split_opts = split_opts; ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; ctx->total_bloom_filter_data_size = 0; diff --git a/commit-graph.h b/commit-graph.h index 4212766a4f0507..3ba0da1e5f4738 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -91,9 +91,7 @@ enum commit_graph_write_flags { COMMIT_GRAPH_WRITE_APPEND = (1 << 0), COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), - /* Make sure that each OID in the input is a valid commit OID. */ - COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3), - COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4), + COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3), }; enum commit_graph_split_flags { diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 89020d3d441341..e77244c39bf65b 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -84,11 +84,18 @@ graph_read_expect() { test_expect_success 'exit with correct error on bad input to --stdin-commits' ' cd "$TRASH_DIRECTORY/full" && - echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && + # invalid, non-hex OID + echo HEAD >in && + test_expect_code 1 git commit-graph write --stdin-commits stderr && test_i18ngrep "unexpected non-hex object ID: HEAD" stderr && - # valid tree OID, but not a commit OID - git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && - test_i18ngrep "invalid commit object id" stderr + # non-existent OID + echo $ZERO_OID >in && + test_expect_code 1 git commit-graph write --stdin-commits stderr && + test_i18ngrep "invalid object" stderr && + # valid commit and tree OID + git rev-parse HEAD HEAD^{tree} >in && + git commit-graph write --stdin-commits Date: Fri, 15 May 2020 12:04:42 +0200 Subject: [PATCH 025/447] upload-pack: remove unused 'wants' from upload_pack_data As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's remove 'struct object_array wants' from 'struct upload_pack_data', as it appears to be unused. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 0478bff3e7fd11..9aaf886828db79 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1131,7 +1131,6 @@ void upload_pack(struct upload_pack_options *options) } struct upload_pack_data { - struct object_array wants; struct string_list wanted_refs; struct oid_array haves; @@ -1157,14 +1156,12 @@ struct upload_pack_data { static void upload_pack_data_init(struct upload_pack_data *data) { - struct object_array wants = OBJECT_ARRAY_INIT; struct string_list wanted_refs = STRING_LIST_INIT_DUP; struct oid_array haves = OID_ARRAY_INIT; struct object_array shallows = OBJECT_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; memset(data, 0, sizeof(*data)); - data->wants = wants; data->wanted_refs = wanted_refs; data->haves = haves; data->shallows = shallows; @@ -1174,7 +1171,6 @@ static void upload_pack_data_init(struct upload_pack_data *data) static void upload_pack_data_clear(struct upload_pack_data *data) { - object_array_clear(&data->wants); string_list_clear(&data->wanted_refs, 1); oid_array_clear(&data->haves); object_array_clear(&data->shallows); From 389f161ab295e9aad227ba9b13cc7b61b272dda8 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:43 +0200 Subject: [PATCH 026/447] upload-pack: move {want,have}_obj to upload_pack_data As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's move the want_obj and have_obj object arrays into 'struct upload_pack_data'. These object arrays are used by both upload_pack() and upload_pack_v2(), for example when these functions call create_pack_file(). We are going to use 'struct upload_pack_data' in upload_pack() in a followup commit. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 9aaf886828db79..e1b10522f75ca3 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1132,6 +1132,8 @@ void upload_pack(struct upload_pack_options *options) struct upload_pack_data { struct string_list wanted_refs; + struct object_array want_obj; + struct object_array have_obj; struct oid_array haves; struct object_array shallows; @@ -1157,12 +1159,16 @@ struct upload_pack_data { static void upload_pack_data_init(struct upload_pack_data *data) { struct string_list wanted_refs = STRING_LIST_INIT_DUP; + struct object_array want_obj = OBJECT_ARRAY_INIT; + struct object_array have_obj = OBJECT_ARRAY_INIT; struct oid_array haves = OID_ARRAY_INIT; struct object_array shallows = OBJECT_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; memset(data, 0, sizeof(*data)); data->wanted_refs = wanted_refs; + data->want_obj = want_obj; + data->have_obj = have_obj; data->haves = haves; data->shallows = shallows; data->deepen_not = deepen_not; @@ -1172,6 +1178,8 @@ static void upload_pack_data_init(struct upload_pack_data *data) static void upload_pack_data_clear(struct upload_pack_data *data) { string_list_clear(&data->wanted_refs, 1); + object_array_clear(&data->want_obj); + object_array_clear(&data->have_obj); oid_array_clear(&data->haves); object_array_clear(&data->shallows); string_list_clear(&data->deepen_not, 0); @@ -1256,19 +1264,18 @@ static int parse_have(const char *line, struct oid_array *haves) } static void process_args(struct packet_reader *request, - struct upload_pack_data *data, - struct object_array *want_obj) + struct upload_pack_data *data) { while (packet_reader_read(request) == PACKET_READ_NORMAL) { const char *arg = request->line; const char *p; /* process want */ - if (parse_want(&data->writer, arg, want_obj)) + if (parse_want(&data->writer, arg, &data->want_obj)) continue; if (allow_ref_in_want && parse_want_ref(&data->writer, arg, &data->wanted_refs, - want_obj)) + &data->want_obj)) continue; /* process have line */ if (parse_have(arg, &data->haves)) @@ -1399,17 +1406,16 @@ static int send_acks(struct packet_writer *writer, struct oid_array *acks, return 0; } -static int process_haves_and_send_acks(struct upload_pack_data *data, - struct object_array *have_obj, - struct object_array *want_obj) +static int process_haves_and_send_acks(struct upload_pack_data *data) { struct oid_array common = OID_ARRAY_INIT; int ret = 0; - process_haves(&data->haves, &common, have_obj); + process_haves(&data->haves, &common, &data->have_obj); if (data->done) { ret = 1; - } else if (send_acks(&data->writer, &common, have_obj, want_obj)) { + } else if (send_acks(&data->writer, &common, + &data->have_obj, &data->want_obj)) { packet_writer_delim(&data->writer); ret = 1; } else { @@ -1441,8 +1447,7 @@ static void send_wanted_ref_info(struct upload_pack_data *data) packet_writer_delim(&data->writer); } -static void send_shallow_info(struct upload_pack_data *data, - struct object_array *want_obj) +static void send_shallow_info(struct upload_pack_data *data) { /* No shallow info needs to be sent */ if (!data->depth && !data->deepen_rev_list && !data->shallows.nr && @@ -1455,10 +1460,10 @@ static void send_shallow_info(struct upload_pack_data *data, data->deepen_rev_list, data->deepen_since, &data->deepen_not, data->deepen_relative, - &data->shallows, want_obj) && + &data->shallows, &data->want_obj) && is_repository_shallow(the_repository)) deepen(&data->writer, INFINITE_DEPTH, data->deepen_relative, - &data->shallows, want_obj); + &data->shallows, &data->want_obj); packet_delim(1); } @@ -1475,8 +1480,6 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, { enum fetch_state state = FETCH_PROCESS_ARGS; struct upload_pack_data data; - struct object_array have_obj = OBJECT_ARRAY_INIT; - struct object_array want_obj = OBJECT_ARRAY_INIT; clear_object_flags(ALL_FLAGS); @@ -1488,9 +1491,9 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, while (state != FETCH_DONE) { switch (state) { case FETCH_PROCESS_ARGS: - process_args(request, &data, &want_obj); + process_args(request, &data); - if (!want_obj.nr) { + if (!data.want_obj.nr) { /* * Request didn't contain any 'want' lines, * guess they didn't want anything. @@ -1510,18 +1513,19 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, } break; case FETCH_SEND_ACKS: - if (process_haves_and_send_acks(&data, &have_obj, - &want_obj)) + if (process_haves_and_send_acks(&data)) state = FETCH_SEND_PACK; else state = FETCH_DONE; break; case FETCH_SEND_PACK: send_wanted_ref_info(&data); - send_shallow_info(&data, &want_obj); + send_shallow_info(&data); packet_writer_write(&data.writer, "packfile\n"); - create_pack_file(&have_obj, &want_obj, &data.filter_options); + create_pack_file(&data.have_obj, + &data.want_obj, + &data.filter_options); state = FETCH_DONE; break; case FETCH_DONE: @@ -1530,8 +1534,6 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, } upload_pack_data_clear(&data); - object_array_clear(&have_obj); - object_array_clear(&want_obj); return 0; } From e8498322fa033b6ed5320119a82da0bc50ca18ca Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:44 +0200 Subject: [PATCH 027/447] upload-pack: move 'struct upload_pack_data' around As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's move 'struct upload_pack_data' and the related upload_pack_data_init() and upload_pack_data_clear() functions towards the beginning of the file, so that this struct and its related functions can then be used by upload_pack() in a followup commit. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 112 +++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index e1b10522f75ca3..9aeb3477c9d1a8 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -72,6 +72,62 @@ static int allow_ref_in_want; static int allow_sideband_all; +struct upload_pack_data { + struct string_list wanted_refs; + struct object_array want_obj; + struct object_array have_obj; + struct oid_array haves; + + struct object_array shallows; + struct string_list deepen_not; + int depth; + timestamp_t deepen_since; + int deepen_rev_list; + int deepen_relative; + + struct list_objects_filter_options filter_options; + + struct packet_writer writer; + + unsigned stateless_rpc : 1; + + unsigned use_thin_pack : 1; + unsigned use_ofs_delta : 1; + unsigned no_progress : 1; + unsigned use_include_tag : 1; + unsigned done : 1; +}; + +static void upload_pack_data_init(struct upload_pack_data *data) +{ + struct string_list wanted_refs = STRING_LIST_INIT_DUP; + struct object_array want_obj = OBJECT_ARRAY_INIT; + struct object_array have_obj = OBJECT_ARRAY_INIT; + struct oid_array haves = OID_ARRAY_INIT; + struct object_array shallows = OBJECT_ARRAY_INIT; + struct string_list deepen_not = STRING_LIST_INIT_DUP; + + memset(data, 0, sizeof(*data)); + data->wanted_refs = wanted_refs; + data->want_obj = want_obj; + data->have_obj = have_obj; + data->haves = haves; + data->shallows = shallows; + data->deepen_not = deepen_not; + packet_writer_init(&data->writer, 1); +} + +static void upload_pack_data_clear(struct upload_pack_data *data) +{ + string_list_clear(&data->wanted_refs, 1); + object_array_clear(&data->want_obj); + object_array_clear(&data->have_obj); + oid_array_clear(&data->haves); + object_array_clear(&data->shallows); + string_list_clear(&data->deepen_not, 0); + list_objects_filter_release(&data->filter_options); +} + static void reset_timeout(void) { alarm(timeout); @@ -1130,62 +1186,6 @@ void upload_pack(struct upload_pack_options *options) list_objects_filter_release(&filter_options); } -struct upload_pack_data { - struct string_list wanted_refs; - struct object_array want_obj; - struct object_array have_obj; - struct oid_array haves; - - struct object_array shallows; - struct string_list deepen_not; - int depth; - timestamp_t deepen_since; - int deepen_rev_list; - int deepen_relative; - - struct list_objects_filter_options filter_options; - - struct packet_writer writer; - - unsigned stateless_rpc : 1; - - unsigned use_thin_pack : 1; - unsigned use_ofs_delta : 1; - unsigned no_progress : 1; - unsigned use_include_tag : 1; - unsigned done : 1; -}; - -static void upload_pack_data_init(struct upload_pack_data *data) -{ - struct string_list wanted_refs = STRING_LIST_INIT_DUP; - struct object_array want_obj = OBJECT_ARRAY_INIT; - struct object_array have_obj = OBJECT_ARRAY_INIT; - struct oid_array haves = OID_ARRAY_INIT; - struct object_array shallows = OBJECT_ARRAY_INIT; - struct string_list deepen_not = STRING_LIST_INIT_DUP; - - memset(data, 0, sizeof(*data)); - data->wanted_refs = wanted_refs; - data->want_obj = want_obj; - data->have_obj = have_obj; - data->haves = haves; - data->shallows = shallows; - data->deepen_not = deepen_not; - packet_writer_init(&data->writer, 1); -} - -static void upload_pack_data_clear(struct upload_pack_data *data) -{ - string_list_clear(&data->wanted_refs, 1); - object_array_clear(&data->want_obj); - object_array_clear(&data->have_obj); - oid_array_clear(&data->haves); - object_array_clear(&data->shallows); - string_list_clear(&data->deepen_not, 0); - list_objects_filter_release(&data->filter_options); -} - static int parse_want(struct packet_writer *writer, const char *line, struct object_array *want_obj) { From ebf8ebcc56d5f47c531bce0e6238244752d91888 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:45 +0200 Subject: [PATCH 028/447] upload-pack: use 'struct upload_pack_data' in upload_pack() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's use 'struct upload_pack_data' in upload_pack(). This will make it possible in followup commits to remove a lot of static variables and local variables that have the same name and purpose as fields in 'struct upload_pack_data'. This will also make upload_pack() work in a more similar way as upload_pack_v2(). Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 9aeb3477c9d1a8..cb336c5713958e 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1144,18 +1144,17 @@ static int upload_pack_config(const char *var, const char *value, void *unused) void upload_pack(struct upload_pack_options *options) { struct string_list symref = STRING_LIST_INIT_DUP; - struct object_array want_obj = OBJECT_ARRAY_INIT; struct packet_reader reader; - struct list_objects_filter_options filter_options; + struct upload_pack_data data; stateless_rpc = options->stateless_rpc; timeout = options->timeout; daemon_mode = options->daemon_mode; - memset(&filter_options, 0, sizeof(filter_options)); - git_config(upload_pack_config, NULL); + upload_pack_data_init(&data); + head_ref_namespaced(find_symref, &symref); if (options->advertise_refs || !stateless_rpc) { @@ -1169,21 +1168,24 @@ void upload_pack(struct upload_pack_options *options) for_each_namespaced_ref(check_ref, NULL); } string_list_clear(&symref, 1); - if (options->advertise_refs) - return; - packet_reader_init(&reader, 0, NULL, 0, - PACKET_READ_CHOMP_NEWLINE | - PACKET_READ_DIE_ON_ERR_PACKET); + if (!options->advertise_refs) { + packet_reader_init(&reader, 0, NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_DIE_ON_ERR_PACKET); - receive_needs(&reader, &want_obj, &filter_options); - if (want_obj.nr) { - struct object_array have_obj = OBJECT_ARRAY_INIT; - get_common_commits(&reader, &have_obj, &want_obj); - create_pack_file(&have_obj, &want_obj, &filter_options); + receive_needs(&reader, &data.want_obj, &data.filter_options); + if (data.want_obj.nr) { + get_common_commits(&reader, + &data.have_obj, + &data.want_obj); + create_pack_file(&data.have_obj, + &data.want_obj, + &data.filter_options); + } } - list_objects_filter_release(&filter_options); + upload_pack_data_clear(&data); } static int parse_want(struct packet_writer *writer, const char *line, From 079776950561befa3040ebad789da2c3fab918ef Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:46 +0200 Subject: [PATCH 029/447] upload-pack: pass upload_pack_data to get_common_commits() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's pass 'struct upload_pack_data' to get_common_commits(), so that this function and the functions it calls can use all the fields of that struct in followup commits. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index cb336c5713958e..7953a33189f685 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -414,9 +414,8 @@ static int ok_to_give_up(const struct object_array *have_obj, min_generation); } -static int get_common_commits(struct packet_reader *reader, - struct object_array *have_obj, - struct object_array *want_obj) +static int get_common_commits(struct upload_pack_data *data, + struct packet_reader *reader) { struct object_id oid; char last_hex[GIT_MAX_HEXSZ + 1]; @@ -432,12 +431,14 @@ static int get_common_commits(struct packet_reader *reader, reset_timeout(); if (packet_reader_read(reader) != PACKET_READ_NORMAL) { - if (multi_ack == 2 && got_common - && !got_other && ok_to_give_up(have_obj, want_obj)) { + if (multi_ack == 2 + && got_common + && !got_other + && ok_to_give_up(&data->have_obj, &data->want_obj)) { sent_ready = 1; packet_write_fmt(1, "ACK %s ready\n", last_hex); } - if (have_obj->nr == 0 || multi_ack) + if (data->have_obj.nr == 0 || multi_ack) packet_write_fmt(1, "NAK\n"); if (no_done && sent_ready) { @@ -451,10 +452,11 @@ static int get_common_commits(struct packet_reader *reader, continue; } if (skip_prefix(reader->line, "have ", &arg)) { - switch (got_oid(arg, &oid, have_obj)) { + switch (got_oid(arg, &oid, &data->have_obj)) { case -1: /* they have what we do not */ got_other = 1; - if (multi_ack && ok_to_give_up(have_obj, want_obj)) { + if (multi_ack + && ok_to_give_up(&data->have_obj, &data->want_obj)) { const char *hex = oid_to_hex(&oid); if (multi_ack == 2) { sent_ready = 1; @@ -470,14 +472,14 @@ static int get_common_commits(struct packet_reader *reader, packet_write_fmt(1, "ACK %s common\n", last_hex); else if (multi_ack) packet_write_fmt(1, "ACK %s continue\n", last_hex); - else if (have_obj->nr == 1) + else if (data->have_obj.nr == 1) packet_write_fmt(1, "ACK %s\n", last_hex); break; } continue; } if (!strcmp(reader->line, "done")) { - if (have_obj->nr > 0) { + if (data->have_obj.nr > 0) { if (multi_ack) packet_write_fmt(1, "ACK %s\n", last_hex); return 0; @@ -1176,9 +1178,7 @@ void upload_pack(struct upload_pack_options *options) receive_needs(&reader, &data.want_obj, &data.filter_options); if (data.want_obj.nr) { - get_common_commits(&reader, - &data.have_obj, - &data.want_obj); + get_common_commits(&data, &reader); create_pack_file(&data.have_obj, &data.want_obj, &data.filter_options); From d92ae2c0905c55f83830acec41ef7938200fa171 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:47 +0200 Subject: [PATCH 030/447] upload-pack: pass upload_pack_data to receive_needs() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's pass 'struct upload_pack_data' to receive_needs(), so that this function and the functions it calls can use all the fields of that struct in followup commits. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 7953a33189f685..94bf9cd0881b90 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -907,9 +907,8 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not, return 0; } -static void receive_needs(struct packet_reader *reader, - struct object_array *want_obj, - struct list_objects_filter_options *filter_options) +static void receive_needs(struct upload_pack_data *data, + struct packet_reader *reader) { struct object_array shallows = OBJECT_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; @@ -944,8 +943,8 @@ static void receive_needs(struct packet_reader *reader, if (skip_prefix(reader->line, "filter ", &arg)) { if (!filter_capability_requested) die("git upload-pack: filtering capability not negotiated"); - list_objects_filter_die_if_populated(filter_options); - parse_list_objects_filter(filter_options, arg); + list_objects_filter_die_if_populated(&data->filter_options); + parse_list_objects_filter(&data->filter_options, arg); continue; } @@ -990,7 +989,7 @@ static void receive_needs(struct packet_reader *reader, if (!((allow_unadvertised_object_request & ALLOW_ANY_SHA1) == ALLOW_ANY_SHA1 || is_our_ref(o))) has_non_tip = 1; - add_object_array(o, NULL, want_obj); + add_object_array(o, NULL, &data->want_obj); } } @@ -1002,7 +1001,7 @@ static void receive_needs(struct packet_reader *reader, * by another process that handled the initial request. */ if (has_non_tip) - check_non_tip(want_obj, &writer); + check_non_tip(&data->want_obj, &writer); if (!use_sideband && daemon_mode) no_progress = 1; @@ -1012,7 +1011,7 @@ static void receive_needs(struct packet_reader *reader, if (send_shallow_list(&writer, depth, deepen_rev_list, deepen_since, &deepen_not, deepen_relative, &shallows, - want_obj)) + &data->want_obj)) packet_flush(1); object_array_clear(&shallows); } @@ -1176,7 +1175,7 @@ void upload_pack(struct upload_pack_options *options) PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); - receive_needs(&reader, &data.want_obj, &data.filter_options); + receive_needs(&data, &reader); if (data.want_obj.nr) { get_common_commits(&data, &reader); create_pack_file(&data.have_obj, From 4ace0283b7dbfef1030d1998345a50af9946ea88 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:48 +0200 Subject: [PATCH 031/447] upload-pack: use upload_pack_data writer in receive_needs() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's use the 'struct packet_writer writer' field from 'struct upload_pack_data' in receive_needs(), instead of a local 'struct packet_writer writer' variable. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 94bf9cd0881b90..399ec60ade2504 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -917,10 +917,8 @@ static void receive_needs(struct upload_pack_data *data, timestamp_t deepen_since = 0; int deepen_rev_list = 0; int deepen_relative = 0; - struct packet_writer writer; shallow_nr = 0; - packet_writer_init(&writer, 1); for (;;) { struct object *o; const char *features; @@ -978,7 +976,7 @@ static void receive_needs(struct upload_pack_data *data, o = parse_object(the_repository, &oid_buf); if (!o) { - packet_writer_error(&writer, + packet_writer_error(&data->writer, "upload-pack: not our ref %s", oid_to_hex(&oid_buf)); die("git upload-pack: not our ref %s", @@ -1001,7 +999,7 @@ static void receive_needs(struct upload_pack_data *data, * by another process that handled the initial request. */ if (has_non_tip) - check_non_tip(&data->want_obj, &writer); + check_non_tip(&data->want_obj, &data->writer); if (!use_sideband && daemon_mode) no_progress = 1; @@ -1009,7 +1007,7 @@ static void receive_needs(struct upload_pack_data *data, if (depth == 0 && !deepen_rev_list && shallows.nr == 0) return; - if (send_shallow_list(&writer, depth, deepen_rev_list, deepen_since, + if (send_shallow_list(&data->writer, depth, deepen_rev_list, deepen_since, &deepen_not, deepen_relative, &shallows, &data->want_obj)) packet_flush(1); From 438528f611d7bf199d68584e514a304fa4153fbb Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:49 +0200 Subject: [PATCH 032/447] upload-pack: move symref to upload_pack_data As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, we are passing around that struct to many functions, so let's also pass 'struct string_list symref' around at the same time by moving it from a local variable in upload_pack() into a field of 'struct upload_pack_data'. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 399ec60ade2504..c7e35a7fc9eef4 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -73,6 +73,7 @@ static int allow_ref_in_want; static int allow_sideband_all; struct upload_pack_data { + struct string_list symref; struct string_list wanted_refs; struct object_array want_obj; struct object_array have_obj; @@ -100,6 +101,7 @@ struct upload_pack_data { static void upload_pack_data_init(struct upload_pack_data *data) { + struct string_list symref = STRING_LIST_INIT_DUP; struct string_list wanted_refs = STRING_LIST_INIT_DUP; struct object_array want_obj = OBJECT_ARRAY_INIT; struct object_array have_obj = OBJECT_ARRAY_INIT; @@ -108,6 +110,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) struct string_list deepen_not = STRING_LIST_INIT_DUP; memset(data, 0, sizeof(*data)); + data->symref = symref; data->wanted_refs = wanted_refs; data->want_obj = want_obj; data->have_obj = have_obj; @@ -119,6 +122,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) static void upload_pack_data_clear(struct upload_pack_data *data) { + string_list_clear(&data->symref, 1); string_list_clear(&data->wanted_refs, 1); object_array_clear(&data->want_obj); object_array_clear(&data->have_obj); @@ -1142,7 +1146,6 @@ static int upload_pack_config(const char *var, const char *value, void *unused) void upload_pack(struct upload_pack_options *options) { - struct string_list symref = STRING_LIST_INIT_DUP; struct packet_reader reader; struct upload_pack_data data; @@ -1154,19 +1157,18 @@ void upload_pack(struct upload_pack_options *options) upload_pack_data_init(&data); - head_ref_namespaced(find_symref, &symref); + head_ref_namespaced(find_symref, &data.symref); if (options->advertise_refs || !stateless_rpc) { reset_timeout(); - head_ref_namespaced(send_ref, &symref); - for_each_namespaced_ref(send_ref, &symref); + head_ref_namespaced(send_ref, &data.symref); + for_each_namespaced_ref(send_ref, &data.symref); advertise_shallow_grafts(1); packet_flush(1); } else { head_ref_namespaced(check_ref, NULL); for_each_namespaced_ref(check_ref, NULL); } - string_list_clear(&symref, 1); if (!options->advertise_refs) { packet_reader_init(&reader, 0, NULL, 0, From 762f92767c5d1f1d708f6656363f26bfb700f83a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:50 +0200 Subject: [PATCH 033/447] upload-pack: pass upload_pack_data to send_ref() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's pass that struct to send_ref(), so that this function, and the functions it calls, can use all the fields of the struct in followup commits. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index c7e35a7fc9eef4..bc259f17138f15 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1059,6 +1059,7 @@ static int send_ref(const char *refname, const struct object_id *oid, " deepen-relative no-progress include-tag multi_ack_detailed"; const char *refname_nons = strip_namespace(refname); struct object_id peeled; + struct upload_pack_data *data = cb_data; if (mark_our_ref(refname_nons, refname, oid)) return 0; @@ -1066,7 +1067,7 @@ static int send_ref(const char *refname, const struct object_id *oid, if (capabilities) { struct strbuf symref_info = STRBUF_INIT; - format_symref_info(&symref_info, cb_data); + format_symref_info(&symref_info, &data->symref); packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, @@ -1161,8 +1162,8 @@ void upload_pack(struct upload_pack_options *options) if (options->advertise_refs || !stateless_rpc) { reset_timeout(); - head_ref_namespaced(send_ref, &data.symref); - for_each_namespaced_ref(send_ref, &data.symref); + head_ref_namespaced(send_ref, &data); + for_each_namespaced_ref(send_ref, &data); advertise_shallow_grafts(1); packet_flush(1); } else { From b08c97423fa3f55c1b40eb4b3604c4f990dd955b Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:51 +0200 Subject: [PATCH 034/447] upload-pack: pass upload_pack_data to check_non_tip() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's pass that struct to check_non_tip(), so that this function and the functions it calls, can use all the fields of the struct in followup commits. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index bc259f17138f15..680c38cc134f16 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -654,8 +654,7 @@ static int has_unreachable(struct object_array *src) return 1; } -static void check_non_tip(struct object_array *want_obj, - struct packet_writer *writer) +static void check_non_tip(struct upload_pack_data *data) { int i; @@ -666,16 +665,16 @@ static void check_non_tip(struct object_array *want_obj, */ if (!stateless_rpc && !(allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1)) goto error; - if (!has_unreachable(want_obj)) + if (!has_unreachable(&data->want_obj)) /* All the non-tip ones are ancestors of what we advertised */ return; error: /* Pick one of them (we know there at least is one) */ - for (i = 0; i < want_obj->nr; i++) { - struct object *o = want_obj->objects[i].item; + for (i = 0; i < data->want_obj.nr; i++) { + struct object *o = data->want_obj.objects[i].item; if (!is_our_ref(o)) { - packet_writer_error(writer, + packet_writer_error(&data->writer, "upload-pack: not our ref %s", oid_to_hex(&o->oid)); die("git upload-pack: not our ref %s", @@ -1003,7 +1002,7 @@ static void receive_needs(struct upload_pack_data *data, * by another process that handled the initial request. */ if (has_non_tip) - check_non_tip(&data->want_obj, &data->writer); + check_non_tip(data); if (!use_sideband && daemon_mode) no_progress = 1; From df654abcccc31aed529a83dd9a0865dc93e79eeb Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:52 +0200 Subject: [PATCH 035/447] upload-pack: remove static variable 'stateless_rpc' As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's remove the 'stateless_rpc' static variable, as we can now use the field of 'struct upload_pack_data' with the same name instead. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 680c38cc134f16..4ac40c5b043ef7 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -63,7 +63,6 @@ static int keepalive = 5; * otherwise maximum packet size (up to 65520 bytes). */ static int use_sideband; -static int stateless_rpc; static const char *pack_objects_hook; static int filter_capability_requested; @@ -449,7 +448,7 @@ static int get_common_commits(struct upload_pack_data *data, packet_write_fmt(1, "ACK %s\n", last_hex); return 0; } - if (stateless_rpc) + if (data->stateless_rpc) exit(0); got_common = 0; got_other = 0; @@ -663,7 +662,8 @@ static void check_non_tip(struct upload_pack_data *data) * uploadpack.allowReachableSHA1InWant, * non-tip requests can never happen. */ - if (!stateless_rpc && !(allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1)) + if (!data->stateless_rpc + && !(allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1)) goto error; if (!has_unreachable(&data->want_obj)) /* All the non-tip ones are ancestors of what we advertised */ @@ -1074,7 +1074,7 @@ static int send_ref(const char *refname, const struct object_id *oid, " allow-tip-sha1-in-want" : "", (allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1) ? " allow-reachable-sha1-in-want" : "", - stateless_rpc ? " no-done" : "", + data->stateless_rpc ? " no-done" : "", symref_info.buf, allow_filter ? " filter" : "", git_user_agent_sanitized()); @@ -1149,7 +1149,6 @@ void upload_pack(struct upload_pack_options *options) struct packet_reader reader; struct upload_pack_data data; - stateless_rpc = options->stateless_rpc; timeout = options->timeout; daemon_mode = options->daemon_mode; @@ -1157,9 +1156,11 @@ void upload_pack(struct upload_pack_options *options) upload_pack_data_init(&data); + data.stateless_rpc = options->stateless_rpc; + head_ref_namespaced(find_symref, &data.symref); - if (options->advertise_refs || !stateless_rpc) { + if (options->advertise_refs || !data.stateless_rpc) { reset_timeout(); head_ref_namespaced(send_ref, &data); for_each_namespaced_ref(send_ref, &data); From c9f0325905f93323f52a56d1e2b3349489740ed4 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:53 +0200 Subject: [PATCH 036/447] upload-pack: pass upload_pack_data to create_pack_file() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's pass that struct to create_pack_file(), so that this function, and the function it calls, can use all the fields of the struct. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 4ac40c5b043ef7..93cf4b1fe53cd1 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -161,9 +161,7 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data) return 0; } -static void create_pack_file(const struct object_array *have_obj, - const struct object_array *want_obj, - struct list_objects_filter_options *filter_options) +static void create_pack_file(struct upload_pack_data *pack_data) { struct child_process pack_objects = CHILD_PROCESS_INIT; char data[8193], progress[128]; @@ -200,9 +198,9 @@ static void create_pack_file(const struct object_array *have_obj, argv_array_push(&pack_objects.args, "--delta-base-offset"); if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); - if (filter_options->choice) { + if (pack_data->filter_options.choice) { const char *spec = - expand_list_objects_filter_spec(filter_options); + expand_list_objects_filter_spec(&pack_data->filter_options); if (pack_objects.use_shell) { struct strbuf buf = STRBUF_INIT; sq_quote_buf(&buf, spec); @@ -226,13 +224,13 @@ static void create_pack_file(const struct object_array *have_obj, if (shallow_nr) for_each_commit_graft(write_one_shallow, pipe_fd); - for (i = 0; i < want_obj->nr; i++) + for (i = 0; i < pack_data->want_obj.nr; i++) fprintf(pipe_fd, "%s\n", - oid_to_hex(&want_obj->objects[i].item->oid)); + oid_to_hex(&pack_data->want_obj.objects[i].item->oid)); fprintf(pipe_fd, "--not\n"); - for (i = 0; i < have_obj->nr; i++) + for (i = 0; i < pack_data->have_obj.nr; i++) fprintf(pipe_fd, "%s\n", - oid_to_hex(&have_obj->objects[i].item->oid)); + oid_to_hex(&pack_data->have_obj.objects[i].item->oid)); for (i = 0; i < extra_edge_obj.nr; i++) fprintf(pipe_fd, "%s\n", oid_to_hex(&extra_edge_obj.objects[i].item->oid)); @@ -1179,9 +1177,7 @@ void upload_pack(struct upload_pack_options *options) receive_needs(&data, &reader); if (data.want_obj.nr) { get_common_commits(&data, &reader); - create_pack_file(&data.have_obj, - &data.want_obj, - &data.filter_options); + create_pack_file(&data); } } @@ -1525,9 +1521,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, send_shallow_info(&data); packet_writer_write(&data.writer, "packfile\n"); - create_pack_file(&data.have_obj, - &data.want_obj, - &data.filter_options); + create_pack_file(&data); state = FETCH_DONE; break; case FETCH_DONE: From 7a516764a335412f0a96bc536cf1b408515e5ddc Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Fri, 15 May 2020 12:04:54 +0200 Subject: [PATCH 037/447] upload-pack: use upload_pack_data fields in receive_needs() As we cleanup 'upload-pack.c' by using 'struct upload_pack_data' more thoroughly, let's use fields from this struct in receive_needs(), instead of local variables with the same name and purpose. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- upload-pack.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 93cf4b1fe53cd1..401c9e6c4b7015 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -911,13 +911,7 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not, static void receive_needs(struct upload_pack_data *data, struct packet_reader *reader) { - struct object_array shallows = OBJECT_ARRAY_INIT; - struct string_list deepen_not = STRING_LIST_INIT_DUP; - int depth = 0; int has_non_tip = 0; - timestamp_t deepen_since = 0; - int deepen_rev_list = 0; - int deepen_relative = 0; shallow_nr = 0; for (;;) { @@ -930,13 +924,13 @@ static void receive_needs(struct upload_pack_data *data, if (packet_reader_read(reader) != PACKET_READ_NORMAL) break; - if (process_shallow(reader->line, &shallows)) + if (process_shallow(reader->line, &data->shallows)) continue; - if (process_deepen(reader->line, &depth)) + if (process_deepen(reader->line, &data->depth)) continue; - if (process_deepen_since(reader->line, &deepen_since, &deepen_rev_list)) + if (process_deepen_since(reader->line, &data->deepen_since, &data->deepen_rev_list)) continue; - if (process_deepen_not(reader->line, &deepen_not, &deepen_rev_list)) + if (process_deepen_not(reader->line, &data->deepen_not, &data->deepen_rev_list)) continue; if (skip_prefix(reader->line, "filter ", &arg)) { @@ -953,7 +947,7 @@ static void receive_needs(struct upload_pack_data *data, "expected to get object ID, not '%s'", reader->line); if (parse_feature_request(features, "deepen-relative")) - deepen_relative = 1; + data->deepen_relative = 1; if (parse_feature_request(features, "multi_ack_detailed")) multi_ack = 2; else if (parse_feature_request(features, "multi_ack")) @@ -1005,14 +999,18 @@ static void receive_needs(struct upload_pack_data *data, if (!use_sideband && daemon_mode) no_progress = 1; - if (depth == 0 && !deepen_rev_list && shallows.nr == 0) + if (data->depth == 0 && !data->deepen_rev_list && data->shallows.nr == 0) return; - if (send_shallow_list(&data->writer, depth, deepen_rev_list, deepen_since, - &deepen_not, deepen_relative, &shallows, + if (send_shallow_list(&data->writer, + data->depth, + data->deepen_rev_list, + data->deepen_since, + &data->deepen_not, + data->deepen_relative, + &data->shallows, &data->want_obj)) packet_flush(1); - object_array_clear(&shallows); } /* return non-zero if the ref is hidden, otherwise 0 */ From aba8187e4d6a1c517e327a1512589e540943e4c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Mon, 18 May 2020 11:44:15 -0700 Subject: [PATCH 038/447] t/helper: teach test-regex to report pattern errors (like REG_ILLSEQ) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7187c7bbb8 (t4210: skip i18n tests that don't work on FreeBSD, 2019-11-27) adds a REG_ILLSEQ prerequisite to avoid failures from the tests added in 4e2443b181 (log tests: test regex backends in "--encode=" tests, 2019-06-28), but hardcodes it to be only enabled in FreeBSD. Instead of hardcoding the affected platform, teach the test-regex helper, how to validate a pattern and report back, so it can be used to detect the same issue in other affected systems (like DragonFlyBSD or macOS). While at it, refactor the tool so it can report back the source of the errors it founds, and can be invoked also in a --silent mode, when needed, for backward compatibility. A missing flag has been added and the code reformatted, as well as updates to the way the parameters are handled, for consistency. To minimize changes, it is assumed the regcomp error is of the right type since we control the only caller, and is also assumed to affect both basic and extended syntax (only basic is tested, but both behave the same in all three affected platforms since they use the same function). Based-on-patch-by: Junio C Hamano Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- t/helper/test-regex.c | 94 ++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/t/helper/test-regex.c b/t/helper/test-regex.c index 10284cc56fa9f6..d6f28ca8d148d9 100644 --- a/t/helper/test-regex.c +++ b/t/helper/test-regex.c @@ -1,5 +1,4 @@ #include "test-tool.h" -#include "git-compat-util.h" #include "gettext.h" struct reg_flag { @@ -8,12 +7,13 @@ struct reg_flag { }; static struct reg_flag reg_flags[] = { - { "EXTENDED", REG_EXTENDED }, - { "NEWLINE", REG_NEWLINE }, - { "ICASE", REG_ICASE }, - { "NOTBOL", REG_NOTBOL }, + { "EXTENDED", REG_EXTENDED }, + { "NEWLINE", REG_NEWLINE }, + { "ICASE", REG_ICASE }, + { "NOTBOL", REG_NOTBOL }, + { "NOTEOL", REG_NOTEOL }, #ifdef REG_STARTEND - { "STARTEND", REG_STARTEND }, + { "STARTEND", REG_STARTEND }, #endif { NULL, 0 } }; @@ -41,36 +41,74 @@ int cmd__regex(int argc, const char **argv) { const char *pat; const char *str; - int flags = 0; + int ret, silent = 0, flags = 0; regex_t r; regmatch_t m[1]; - - if (argc == 2 && !strcmp(argv[1], "--bug")) - return test_regex_bug(); - else if (argc < 3) - usage("test-tool regex --bug\n" - "test-tool regex []"); + char errbuf[64]; argv++; - pat = *argv++; - str = *argv++; - while (*argv) { - struct reg_flag *rf; - for (rf = reg_flags; rf->name; rf++) - if (!strcmp(*argv, rf->name)) { - flags |= rf->flag; - break; - } - if (!rf->name) - die("do not recognize %s", *argv); + argc--; + + if (!argc) + goto usage; + + if (!strcmp(*argv, "--bug")) { + if (argc == 1) + return test_regex_bug(); + else + goto usage; + } + if (!strcmp(*argv, "--silent")) { + silent = 1; argv++; + argc--; + } + if (!argc) + goto usage; + + pat = *argv++; + if (argc == 1) + str = NULL; + else { + str = *argv++; + while (*argv) { + struct reg_flag *rf; + for (rf = reg_flags; rf->name; rf++) + if (!strcmp(*argv, rf->name)) { + flags |= rf->flag; + break; + } + if (!rf->name) + die("do not recognize flag %s", *argv); + argv++; + } } git_setup_gettext(); - if (regcomp(&r, pat, flags)) - die("failed regcomp() for pattern '%s'", pat); - if (regexec(&r, str, 1, m, 0)) - return 1; + ret = regcomp(&r, pat, flags); + if (ret) { + if (silent) + return ret; + + regerror(ret, &r, errbuf, sizeof(errbuf)); + die("failed regcomp() for pattern '%s' (%s)", pat, errbuf); + } + if (!str) + return 0; + + ret = regexec(&r, str, 1, m, 0); + if (ret) { + if (silent || ret == REG_NOMATCH) + return ret; + + regerror(ret, &r, errbuf, sizeof(errbuf)); + die("failed regexec() for subject '%s' (%s)", str, errbuf); + } return 0; +usage: + usage("\ttest-tool regex --bug\n" + "\ttest-tool regex [--silent] \n" + "\ttest-tool regex [--silent] []"); + return -1; } From c4c2a96ec73775b1a4d4d850bb9ae7f50bc6912e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Mon, 18 May 2020 11:44:16 -0700 Subject: [PATCH 039/447] t4210: detect REG_ILLSEQ dynamically and skip affected tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7187c7bbb8 (t4210: skip i18n tests that don't work on FreeBSD, 2019-11-27) adds a REG_ILLSEQ prerequisite, and to do that copies the common branch in test-lib and expands it to include it in a special case for FreeBSD. Instead; test for it using a previously added extension to test-tool and use that, together with a function that identifies when regcomp/regexec will be called with broken patterns to avoid any test that would otherwise rely on undefined behaviour. The description of the first test which wasn't accurate has been corrected, and the test rearranged for clarity, including a helper function that avoids overly long lines. Only the affected engines will have their tests suppressed, also including "fixed" if the PCRE optimization that uses LIBPCRE2 since b65abcafc7 (grep: use PCRE v2 for optimized fixed-string search, 2019-07-01) is not available. Helped-by: Eric Sunshine Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- t/t4210-log-i18n.sh | 77 ++++++++++++++++++++++++++++++++++----------- t/test-lib.sh | 6 ---- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh index c3792081e627ff..d2dfcf164e25b8 100755 --- a/t/t4210-log-i18n.sh +++ b/t/t4210-log-i18n.sh @@ -10,6 +10,13 @@ latin1_e=$(printf '\351') # invalid UTF-8 invalid_e=$(printf '\303\50)') # ")" at end to close opening "(" +have_reg_illseq= +if test_have_prereq GETTEXT_LOCALE && + ! LC_ALL=$is_IS_locale test-tool regex --silent $latin1_e +then + have_reg_illseq=1 +fi + test_expect_success 'create commits in different encodings' ' test_tick && cat >msg <<-EOF && @@ -51,43 +58,77 @@ test_expect_success !MINGW 'log --grep does not find non-reencoded values (utf8) test_must_be_empty actual ' -test_expect_success !MINGW 'log --grep does not find non-reencoded values (latin1)' ' +test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual && test_must_be_empty actual ' +triggers_undefined_behaviour () { + local engine=$1 + + case $engine in + fixed) + if test -n "$have_reg_illseq" && + ! test_have_prereq LIBPCRE2 + then + return 0 + fi + ;; + basic|extended) + if test -n "$have_reg_illseq" + then + return 0 + fi + ;; + esac + return 1 +} + +mismatched_git_log () { + local pattern=$1 + + LC_ALL=$is_IS_locale git log --encoding=ISO-8859-1 --format=%s \ + --grep=$pattern +} + for engine in fixed basic extended perl do prereq= if test $engine = "perl" then - prereq="PCRE" - else - prereq="" + prereq=PCRE fi force_regex= if test $engine != "fixed" then - force_regex=.* + force_regex='.*' fi - test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " - cat >expect <<-\EOF && - latin1 - utf8 - EOF - LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$latin1_e\" >actual && - test_cmp expect actual - " - test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " - LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual && - test_must_be_empty actual + test_expect_success $prereq "config grep.patternType=$engine" " + git config grep.patternType $engine " - test_expect_success !MINGW,!REGEX_ILLSEQ,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " - LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && + test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not find non-reencoded values (latin1 + locale)" " + mismatched_git_log '$force_regex$utf8_e' >actual && test_must_be_empty actual " + + if ! triggers_undefined_behaviour $engine + then + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "log --grep searches in log output encoding (latin1 + locale)" " + cat >expect <<-\EOF && + latin1 + utf8 + EOF + mismatched_git_log '$force_regex$latin1_e' >actual && + test_cmp expect actual + " + + test_expect_success GETTEXT_LOCALE,$prereq "log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " + mismatched_git_log '$force_regex$invalid_e' >actual && + test_must_be_empty actual + " + fi done test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index 0ea1e5a05edd86..81473fea1dafaf 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1454,12 +1454,6 @@ case $uname_s in test_set_prereq SED_STRIPS_CR test_set_prereq GREP_STRIPS_CR ;; -FreeBSD) - test_set_prereq REGEX_ILLSEQ - test_set_prereq POSIXPERM - test_set_prereq BSLASHPSPEC - test_set_prereq EXECKEEPSPID - ;; *) test_set_prereq POSIXPERM test_set_prereq BSLASHPSPEC From 8777616e4db2868609bc42a4c66cc69d23532dbc Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Tue, 19 May 2020 14:05:35 +0100 Subject: [PATCH 040/447] merge: optimization to skip evaluate_result for single strategy For a merge with a single strategy, the result of evaluate_result() is effectively not used and therefore is not needed, so avoid altogether. On Windows, this optimization can halve the time required to perform a recursive merge of a single commit with the LLVM repo. Signed-off-by: Andrew Ng Signed-off-by: Junio C Hamano --- builtin/merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/merge.c b/builtin/merge.c index d127d2225f897f..826e2c26560b32 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1629,7 +1629,7 @@ int cmd_merge(int argc, const char **argv, const char *prefix) } merge_was_ok = 1; } - cnt = evaluate_result(); + cnt = (use_strategies_nr > 1) ? evaluate_result() : 0; if (best_cnt <= 0 || cnt <= best_cnt) { best_strategy = use_strategies[i]->name; best_cnt = cnt; From dde72f94bcba8f84f4ea6523b67302df6638c9c0 Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Tue, 19 May 2020 06:53:56 -0400 Subject: [PATCH 041/447] transport: extract common fetch_pack() call In the switch statement, the difference between the `protocol_v2` and `protocol_v{1,0}` arms is a preparatory call to die_if_server_options() in the latter. The fetch_pack() call is identical in both arms. However, since this fetch_pack() call has so many parameters, it is not immediately obvious that the call is identical in both cases. Extract the common fetch_pack() call out of the switch statement so that code duplication is reduced and the logic is more clear for future readers. While we're at it, rewrite the switch statement as an if-else tower for increased clarity. Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- transport.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/transport.c b/transport.c index 15f5ba4e8f22c6..431a93caef710f 100644 --- a/transport.c +++ b/transport.c @@ -369,24 +369,15 @@ static int fetch_refs_via_pack(struct transport *transport, refs_tmp = handshake(transport, 0, NULL, must_list_refs); } - switch (data->version) { - case protocol_v2: - refs = fetch_pack(&args, data->fd, - refs_tmp ? refs_tmp : transport->remote_refs, - to_fetch, nr_heads, &data->shallow, - &transport->pack_lockfile, data->version); - break; - case protocol_v1: - case protocol_v0: - die_if_server_options(transport); - refs = fetch_pack(&args, data->fd, - refs_tmp ? refs_tmp : transport->remote_refs, - to_fetch, nr_heads, &data->shallow, - &transport->pack_lockfile, data->version); - break; - case protocol_unknown_version: + if (data->version == protocol_unknown_version) BUG("unknown protocol version"); - } + else if (data->version <= protocol_v1) + die_if_server_options(transport); + + refs = fetch_pack(&args, data->fd, + refs_tmp ? refs_tmp : transport->remote_refs, + to_fetch, nr_heads, &data->shallow, + &transport->pack_lockfile, data->version); close(data->fd[0]); close(data->fd[1]); From 101736a14c9ad734fe24efed7513189849cd22eb Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Tue, 19 May 2020 06:53:57 -0400 Subject: [PATCH 042/447] pkt-line: extern packet_length() In a future commit, we will be manually processing packets and we will need to access the length header. In order to simplify this, extern packet_length() so that the logic can be reused. Change the function parameter from `const char *linelen` to `const char lenbuf_hex[4]`. Even though these two types behave identically as function parameters, use the array notation to semantically indicate exactly what this function is expecting as an argument. Also, rename it from linelen to lenbuf_hex as the former sounds like it should be an integral type which is misleading. Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- pkt-line.c | 6 +++--- pkt-line.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkt-line.c b/pkt-line.c index a0e87b1e81408e..3beab1dc6b9847 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -306,10 +306,10 @@ static int get_packet_data(int fd, char **src_buf, size_t *src_size, return ret; } -static int packet_length(const char *linelen) +int packet_length(const char lenbuf_hex[4]) { - int val = hex2chr(linelen); - return (val < 0) ? val : (val << 8) | hex2chr(linelen + 2); + int val = hex2chr(lenbuf_hex); + return (val < 0) ? val : (val << 8) | hex2chr(lenbuf_hex + 2); } enum packet_read_status packet_read_with_status(int fd, char **src_buffer, diff --git a/pkt-line.h b/pkt-line.h index fef3a0d792d31b..a72af9112ba1ce 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -74,6 +74,15 @@ int write_packetized_from_buf(const char *src_in, size_t len, int fd_out); int packet_read(int fd, char **src_buffer, size_t *src_len, char *buffer, unsigned size, int options); +/* + * Convert a four hex digit packet line length header into its numeric + * representation. + * + * If lenbuf_hex contains non-hex characters, return -1. Otherwise, return the + * numeric value of the length header. + */ +int packet_length(const char lenbuf_hex[4]); + /* * Read a packetized line into a buffer like the 'packet_read()' function but * returns an 'enum packet_read_status' which indicates the status of the read. From d1eb22da09e0ffebbd6c16ef1d0e952ec7b88b8d Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Wed, 20 May 2020 17:36:07 +0000 Subject: [PATCH 043/447] refs.h: clarify reflog iteration order Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- refs.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/refs.h b/refs.h index a92d2c74c8306a..e010f8aec28aa4 100644 --- a/refs.h +++ b/refs.h @@ -432,19 +432,35 @@ int delete_refs(const char *msg, struct string_list *refnames, int refs_delete_reflog(struct ref_store *refs, const char *refname); int delete_reflog(const char *refname); -/* iterate over reflog entries */ +/* + * Callback to process a reflog entry found by the iteration functions (see + * below) + */ typedef int each_reflog_ent_fn( struct object_id *old_oid, struct object_id *new_oid, const char *committer, timestamp_t timestamp, int tz, const char *msg, void *cb_data); +/* Iterate over reflog entries in the log for `refname`. */ + +/* oldest entry first */ int refs_for_each_reflog_ent(struct ref_store *refs, const char *refname, each_reflog_ent_fn fn, void *cb_data); + +/* youngest entry first */ int refs_for_each_reflog_ent_reverse(struct ref_store *refs, const char *refname, each_reflog_ent_fn fn, void *cb_data); + +/* + * Iterate over reflog entries in the log for `refname` in the main ref store. + */ + +/* oldest entry first */ int for_each_reflog_ent(const char *refname, each_reflog_ent_fn fn, void *cb_data); + +/* youngest entry first */ int for_each_reflog_ent_reverse(const char *refname, each_reflog_ent_fn fn, void *cb_data); /* From cdb73ca56f21835b93345442a96c97f6a0589ef7 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Wed, 20 May 2020 17:36:08 +0000 Subject: [PATCH 044/447] t: use update-ref and show-ref to reading/writing refs Reading and writing .git/refs/* assumes that refs are stored in the 'files' ref backend. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- t/t0002-gitfile.sh | 2 +- t/t1400-update-ref.sh | 32 ++++++++++++++++---------------- t/t1506-rev-parse-diagnosis.sh | 2 +- t/t6050-replace.sh | 2 +- t/t9020-remote-svn.sh | 4 ++-- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh index 0aa9908ea12d75..960ed150cb5905 100755 --- a/t/t0002-gitfile.sh +++ b/t/t0002-gitfile.sh @@ -62,7 +62,7 @@ test_expect_success 'check commit-tree' ' ' test_expect_success 'check rev-list' ' - echo $SHA >"$REAL/HEAD" && + git update-ref "HEAD" "$SHA" && test "$SHA" = "$(git rev-list HEAD)" ' diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index e1197ac8189b94..27171f82612916 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -37,15 +37,15 @@ test_expect_success setup ' test_expect_success "create $m" ' git update-ref $m $A && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success "create $m with oldvalue verification" ' git update-ref $m $B $A && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "fail to delete $m with stale ref" ' test_must_fail git update-ref -d $m $A && - test $B = "$(cat .git/$m)" + test $B = "$(git show-ref -s --verify $m)" ' test_expect_success "delete $m" ' test_when_finished "rm -f .git/$m" && @@ -56,7 +56,7 @@ test_expect_success "delete $m" ' test_expect_success "delete $m without oldvalue verification" ' test_when_finished "rm -f .git/$m" && git update-ref $m $A && - test $A = $(cat .git/$m) && + test $A = $(git show-ref -s --verify $m) && git update-ref -d $m && test_path_is_missing .git/$m ' @@ -69,15 +69,15 @@ test_expect_success "fail to create $n" ' test_expect_success "create $m (by HEAD)" ' git update-ref HEAD $A && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success "create $m (by HEAD) with oldvalue verification" ' git update-ref HEAD $B $A && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "fail to delete $m (by HEAD) with stale ref" ' test_must_fail git update-ref -d HEAD $A && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "delete $m (by HEAD)" ' test_when_finished "rm -f .git/$m" && @@ -178,14 +178,14 @@ test_expect_success '--no-create-reflog overrides core.logAllRefUpdates=always' test_expect_success "create $m (by HEAD)" ' git update-ref HEAD $A && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success 'pack refs' ' git pack-refs --all ' test_expect_success "move $m (by HEAD)" ' git update-ref HEAD $B $A && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "delete $m (by HEAD) should remove both packed and loose $m" ' test_when_finished "rm -f .git/$m" && @@ -255,7 +255,7 @@ test_expect_success '(not) change HEAD with wrong SHA1' ' ' test_expect_success "(not) changed .git/$m" ' test_when_finished "rm -f .git/$m" && - ! test $B = $(cat .git/$m) + ! test $B = $(git show-ref -s --verify $m) ' rm -f .git/logs/refs/heads/master @@ -263,19 +263,19 @@ test_expect_success "create $m (logged by touch)" ' test_config core.logAllRefUpdates false && GIT_COMMITTER_DATE="2005-05-26 23:30" \ git update-ref --create-reflog HEAD $A -m "Initial Creation" && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success "update $m (logged by touch)" ' test_config core.logAllRefUpdates false && GIT_COMMITTER_DATE="2005-05-26 23:31" \ git update-ref HEAD $B $A -m "Switch" && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "set $m (logged by touch)" ' test_config core.logAllRefUpdates false && GIT_COMMITTER_DATE="2005-05-26 23:41" \ git update-ref HEAD $A && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success 'empty directory removal' ' @@ -319,19 +319,19 @@ test_expect_success "create $m (logged by config)" ' test_config core.logAllRefUpdates true && GIT_COMMITTER_DATE="2005-05-26 23:32" \ git update-ref HEAD $A -m "Initial Creation" && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' test_expect_success "update $m (logged by config)" ' test_config core.logAllRefUpdates true && GIT_COMMITTER_DATE="2005-05-26 23:33" \ git update-ref HEAD'" $B $A "'-m "Switch" && - test $B = $(cat .git/$m) + test $B = $(git show-ref -s --verify $m) ' test_expect_success "set $m (logged by config)" ' test_config core.logAllRefUpdates true && GIT_COMMITTER_DATE="2005-05-26 23:43" \ git update-ref HEAD $A && - test $A = $(cat .git/$m) + test $A = $(git show-ref -s --verify $m) ' cat >expect <expect && git rev-parse foobar -- >actual && diff --git a/t/t6050-replace.sh b/t/t6050-replace.sh index e7e64e085ddcfe..c80dc10b8f1258 100755 --- a/t/t6050-replace.sh +++ b/t/t6050-replace.sh @@ -135,7 +135,7 @@ test_expect_success 'tag replaced commit' ' test_expect_success '"git fsck" works' ' git fsck master >fsck_master.out && test_i18ngrep "dangling commit $R" fsck_master.out && - test_i18ngrep "dangling tag $(cat .git/refs/tags/mytag)" fsck_master.out && + test_i18ngrep "dangling tag $(git show-ref -s refs/tags/mytag)" fsck_master.out && test -z "$(git fsck)" ' diff --git a/t/t9020-remote-svn.sh b/t/t9020-remote-svn.sh index 6fca08e5e35bd3..9fcfa969a9b460 100755 --- a/t/t9020-remote-svn.sh +++ b/t/t9020-remote-svn.sh @@ -48,8 +48,8 @@ test_expect_success REMOTE_SVN 'simple fetch' ' ' test_debug ' - cat .git/refs/svn/svnsim/master - cat .git/refs/remotes/svnsim/master + git show-ref -s refs/svn/svnsim/master + git show-ref -s refs/remotes/svnsim/master ' test_expect_success REMOTE_SVN 'repeated fetch, nothing shall change' ' From 84ee4ca10d7f598542eb2c1ea2cdf6199f9bdee1 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Wed, 20 May 2020 17:36:09 +0000 Subject: [PATCH 045/447] refs: improve documentation for ref iterator Document some of the flag options in refs_ref_iterator_begin, and explain how ref_iterator_advance_fn should handle them. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- refs/refs-internal.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/refs/refs-internal.h b/refs/refs-internal.h index ff2436c0fb706d..4271362d26458f 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -347,9 +347,13 @@ int is_empty_ref_iterator(struct ref_iterator *ref_iterator); /* * Return an iterator that goes over each reference in `refs` for * which the refname begins with prefix. If trim is non-zero, then - * trim that many characters off the beginning of each refname. flags - * can be DO_FOR_EACH_INCLUDE_BROKEN to include broken references in - * the iteration. The output is ordered by refname. + * trim that many characters off the beginning of each refname. + * The output is ordered by refname. The following flags are supported: + * + * DO_FOR_EACH_INCLUDE_BROKEN: include broken references in + * the iteration. + * + * DO_FOR_EACH_PER_WORKTREE_ONLY: only produce REF_TYPE_PER_WORKTREE refs. */ struct ref_iterator *refs_ref_iterator_begin( struct ref_store *refs, @@ -438,6 +442,14 @@ void base_ref_iterator_free(struct ref_iterator *iter); /* Virtual function declarations for ref_iterators: */ +/* + * backend-specific implementation of ref_iterator_advance. For symrefs, the + * function should set REF_ISSYMREF, and it should also dereference the symref + * to provide the OID referent. If DO_FOR_EACH_INCLUDE_BROKEN is set, symrefs + * with non-existent referents and refs pointing to non-existent object names + * should also be returned. If DO_FOR_EACH_PER_WORKTREE_ONLY, only + * REF_TYPE_PER_WORKTREE refs should be returned. + */ typedef int ref_iterator_advance_fn(struct ref_iterator *ref_iterator); typedef int ref_iterator_peel_fn(struct ref_iterator *ref_iterator, From 3697caf4b962f65430fdcd6f0d41b7fe1f579e18 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 20 May 2020 19:15:33 -0700 Subject: [PATCH 046/447] config: let feature.experimental imply protocol.version=2 Git 2.26 used protocol v2 as its default protocol, but soon after release, users noticed that the protocol v2 negotiation code was prone to fail when fetching from some remotes that are far ahead of others (such as linux-next.git versus Linus's linux.git). That has been fixed by 0b07eecf6ed (Merge branch 'jt/v2-fetch-nego-fix', 2020-05-01), but to be cautious, we are using protocol v0 as the default in 2.27 to buy some time for any other unanticipated issues to surface. To that end, let's ensure that users requesting the bleeding edge using the feature.experimental flag *do* get protocol v2. This way, we can gain experience with a wider audience for the new protocol version and be more confident when it is time to enable it by default for all users in some future Git version. Implementation note: this isn't with the rest of the feature.experimental options in repo-settings.c because those are tied to a repository object, whereas this code path is used for operations like "git ls-remote" that do not require a repository. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- Documentation/config/feature.txt | 4 ++++ Documentation/config/protocol.txt | 3 ++- protocol.c | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt index 4e3a5c0cebc90d..28c33602d527fa 100644 --- a/Documentation/config/feature.txt +++ b/Documentation/config/feature.txt @@ -22,6 +22,10 @@ existing commit-graph file(s). Occasionally, these files will merge and the write may take longer. Having an updated commit-graph file helps performance of many Git commands, including `git merge-base`, `git push -f`, and `git log --graph`. ++ +* `protocol.version=2` speeds up fetches from repositories with many refs by +allowing the client to specify which refs to list before the server lists +them. feature.manyFiles:: Enable config options that optimize for repos with many files in the diff --git a/Documentation/config/protocol.txt b/Documentation/config/protocol.txt index 0b40141613e3d3..c46e9b3d00a97e 100644 --- a/Documentation/config/protocol.txt +++ b/Documentation/config/protocol.txt @@ -48,7 +48,8 @@ protocol.version:: If set, clients will attempt to communicate with a server using the specified protocol version. If the server does not support it, communication falls back to version 0. - If unset, the default is `0`. + If unset, the default is `0`, unless `feature.experimental` + is enabled, in which case the default is `2`. Supported versions: + -- diff --git a/protocol.c b/protocol.c index d390391ebac80a..d1dd3424bbaa56 100644 --- a/protocol.c +++ b/protocol.c @@ -17,6 +17,7 @@ static enum protocol_version parse_protocol_version(const char *value) enum protocol_version get_protocol_version_config(void) { const char *value; + int val; const char *git_test_k = "GIT_TEST_PROTOCOL_VERSION"; const char *git_test_v; @@ -30,6 +31,9 @@ enum protocol_version get_protocol_version_config(void) return version; } + if (!git_config_get_bool("feature.experimental", &val) && val) + return protocol_v2; + git_test_v = getenv(git_test_k); if (git_test_v && *git_test_v) { enum protocol_version env = parse_protocol_version(git_test_v); From 81861288a987c6e05526526fa5dc74d2e2b80a5a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 21 May 2020 02:07:11 +0000 Subject: [PATCH 047/447] builtin/checkout: simplify metadata initialization When we call init_checkout_metadata in reset_tree, we want to pass the object ID of the commit in question so that it can be passed to filters, or if there is no commit, the tree. We anticipated this latter case, which can occur elsewhere in the checkout code, but it cannot occur here. The only case in which we do not have a commit object is when invoking git switch with --orphan. Moreover, we can only hit this code path without a commit object additionally with either --force or --discard-changes. In such a case, there is no point initializing the checkout metadata with a commit or tree because (a) there is no commit, only the empty tree, and (b) we will never use the data, since no files will be smudged when checking out a branch with no files. Pass the all-zeros object ID in this case, since we just need some value which is a valid pointer. Signed-off-by: brian m. carlson Reviewed-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/checkout.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 8bc94d392b8362..c88e651a6d170f 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -621,9 +621,7 @@ static int reset_tree(struct tree *tree, const struct checkout_opts *o, opts.src_index = &the_index; opts.dst_index = &the_index; init_checkout_metadata(&opts.meta, info->refname, - info->commit ? &info->commit->object.oid : - is_null_oid(&info->oid) ? &tree->object.oid : - &info->oid, + info->commit ? &info->commit->object.oid : &null_oid, NULL); parse_tree(tree); init_tree_desc(&tree_desc, tree->buffer, tree->size); From 8d3e33dadd359495e43cb65dfadd775987e3da26 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 21 May 2020 02:07:12 +0000 Subject: [PATCH 048/447] t2060: add a test for switch with --orphan and --discard-changes We have several code paths in the checkout code which are traversed only in this case, due to switch having different defaults from checkout. Let's add a test that the combination of options works and produces the expected behavior. Signed-off-by: brian m. carlson Reviewed-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t2060-switch.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/t2060-switch.sh b/t/t2060-switch.sh index f9efa29dfb8c7a..2c1b8c0d6d2246 100755 --- a/t/t2060-switch.sh +++ b/t/t2060-switch.sh @@ -68,6 +68,14 @@ test_expect_success 'new orphan branch from empty' ' test_cmp expected tracked-files ' +test_expect_success 'orphan branch works with --discard-changes' ' + test_when_finished git switch master && + echo foo >foo.txt && + git switch --discard-changes --orphan new-orphan2 && + git ls-files >tracked-files && + test_must_be_empty tracked-files +' + test_expect_success 'switching ignores file of same branch name' ' test_when_finished git switch master && : >first-branch && From 2c31a7aa4459f0036aad76e4a0f2c0ddaebb4fc3 Mon Sep 17 00:00:00 2001 From: Jiuyang Xie Date: Thu, 21 May 2020 19:32:38 +0800 Subject: [PATCH 049/447] doc: fix wrong 4-byte length of pkt-line message The first four bytes of the line, the pkt-len, indicates the total length of the pkt-line in hexadecimal. Fix wrong pkt-len headers of some pkt-line messages in `http-protocol.txt` and `pack-protocol.txt`. Reviewed-by: Denton Liu Signed-off-by: Jiuyang Xie Signed-off-by: Junio C Hamano --- Documentation/technical/http-protocol.txt | 2 +- Documentation/technical/pack-protocol.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/technical/http-protocol.txt b/Documentation/technical/http-protocol.txt index 9c5b6f0facbf41..51a79e63de9b77 100644 --- a/Documentation/technical/http-protocol.txt +++ b/Documentation/technical/http-protocol.txt @@ -216,7 +216,7 @@ smart server reply: S: 001e# service=git-upload-pack\n S: 0000 S: 004895dcfa3633004da0049d3d0fa03f80589cbcaf31 refs/heads/maint\0multi_ack\n - S: 0042d049f6c27a2244e12041955e262a404c7faba355 refs/heads/master\n + S: 003fd049f6c27a2244e12041955e262a404c7faba355 refs/heads/master\n S: 003c2cb58b79488a98d2721cea644875a8dd0026b115 refs/tags/v1.0\n S: 003fa3c2e2402b99163d1d59756e5f207ae21cccba4c refs/tags/v1.0^{}\n S: 0000 diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index d5ce4eea8a19b9..a4573d12ce8222 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -96,7 +96,7 @@ Basically what the Git client is doing to connect to an 'upload-pack' process on the server side over the Git protocol is this: $ echo -e -n \ - "0039git-upload-pack /schacon/gitbook.git\0host=example.com\0" | + "003agit-upload-pack /schacon/gitbook.git\0host=example.com\0" | nc -v example.com 9418 @@ -171,9 +171,9 @@ with a version number (if "version=1" is sent as an Extra Parameter), and a listing of each reference it has (all branches and tags) along with the object name that each reference currently points to. - $ echo -e -n "0044git-upload-pack /schacon/gitbook.git\0host=example.com\0\0version=1\0" | + $ echo -e -n "0045git-upload-pack /schacon/gitbook.git\0host=example.com\0\0version=1\0" | nc -v example.com 9418 - 000aversion 1 + 000eversion 1 00887217a7c7e582c46cec22a130adf4b9d7d950fba0 HEAD\0multi_ack thin-pack side-band side-band-64k ofs-delta shallow no-progress include-tag 00441d3fcd5ced445d1abc402225c0b8a1299641f497 refs/heads/integration From 86715592fd319a6e04cbb9c84415c0a052ca4344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 21 May 2020 11:52:04 +0200 Subject: [PATCH 050/447] fsck: fix a typo in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Junio C Hamano Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index b9b3350f762fa4..bc71b0f35bd7a2 100644 --- a/fsck.c +++ b/fsck.c @@ -598,7 +598,7 @@ static int verify_ordered(unsigned mode1, const char *name1, /* * There can be non-consecutive duplicates due to the implicitly - * add slash, e.g.: + * added slash, e.g.: * * foo * foo.bar From fc12aa7bfd50fd175a0760b446de1e4514a69d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 21 May 2020 11:52:28 +0200 Subject: [PATCH 051/447] t1450: increase test coverage of in-tree d/f detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercise the case of putting a conflict candidate file name back on the stack because a matching directory might yet come up later. Do that by factoring out the test code into a function to allow for more concise notation in the form of parameters indicating names of trees (with trailing slash) and blobs (without trailing slash) in no particular order (they are sorted by git mktree). Then add the new test case as a second function call. Fix a typo in the test title while at it ("dublicate"). Reported-by: Derrick Stolee Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index c43bc6870a8680..a006ef1ee132ea 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -234,21 +234,33 @@ test_expect_success 'tree object with duplicate entries' ' test_i18ngrep "error in tree .*contains duplicate file entries" out ' -test_expect_success 'tree object with dublicate names' ' - test_when_finished "remove_object \$blob" && - test_when_finished "remove_object \$tree" && - test_when_finished "remove_object \$badtree" && - blob=$(echo blob | git hash-object -w --stdin) && - printf "100644 blob %s\t%s\n" $blob x.2 >tree && - tree=$(git mktree badtree && - printf "100644 blob %s\t%s\n" $blob x >>badtree && - printf "040000 tree %s\t%s\n" $tree x >>badtree && - badtree=$(git mktree out && - test_i18ngrep "$badtree" out && - test_i18ngrep "error in tree .*contains duplicate file entries" out -' +check_duplicate_names () { + expect=$1 && + shift && + names=$@ && + test_expect_$expect "tree object with duplicate names: $names" ' + test_when_finished "remove_object \$blob" && + test_when_finished "remove_object \$tree" && + test_when_finished "remove_object \$badtree" && + blob=$(echo blob | git hash-object -w --stdin) && + printf "100644 blob %s\t%s\n" $blob x.2 >tree && + tree=$(git mktree badtree && + badtree=$(git mktree out && + test_i18ngrep "$badtree" out && + test_i18ngrep "error in tree .*contains duplicate file entries" out + ' +} + +check_duplicate_names success x x.1 x/ +check_duplicate_names success x x.1.2 x.1/ x/ test_expect_success 'unparseable tree object' ' test_oid_cache <<-\EOF && From 3d71b1cf6031e5dc4381a067fba16167d14f567a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 21 May 2020 11:52:43 +0200 Subject: [PATCH 052/447] t1450: demonstrate undetected in-tree d/f conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/t1450-fsck.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index a006ef1ee132ea..7e70a253f86489 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -261,6 +261,7 @@ check_duplicate_names () { check_duplicate_names success x x.1 x/ check_duplicate_names success x x.1.2 x.1/ x/ +check_duplicate_names failure x x.1 x.1.2 x/ test_expect_success 'unparseable tree object' ' test_oid_cache <<-\EOF && From fe747043dc96fd5c151496f0e45dbf80de07793d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 21 May 2020 11:52:54 +0200 Subject: [PATCH 053/447] fsck: detect more in-tree d/f conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the conflict candidate file name from the top of the stack is not a prefix of the current candiate directory then we can discard it as no matching directory can come up later. But we are not done checking the candidate directory -- the stack might still hold a matching file name, so stay in the loop and check the next candidate file name. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- fsck.c | 2 +- t/t1450-fsck.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fsck.c b/fsck.c index bc71b0f35bd7a2..2095491735b792 100644 --- a/fsck.c +++ b/fsck.c @@ -620,7 +620,7 @@ static int verify_ordered(unsigned mode1, const char *name1, if (!f_name) break; if (!skip_prefix(name2, f_name, &p)) - break; + continue; if (!*p) return TREE_HAS_DUPS; if (is_less_than_slash(*p)) { diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 7e70a253f86489..6555a62099028b 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -261,7 +261,7 @@ check_duplicate_names () { check_duplicate_names success x x.1 x/ check_duplicate_names success x x.1.2 x.1/ x/ -check_duplicate_names failure x x.1 x.1.2 x/ +check_duplicate_names success x x.1 x.1.2 x/ test_expect_success 'unparseable tree object' ' test_oid_cache <<-\EOF && From fffd0cf520718fcd0315bc5e0ee7a6d4762b96b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Thu, 21 May 2020 21:35:59 +0300 Subject: [PATCH 054/447] completion: don't override given stash subcommand with -p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit df70b190 (completion: make stash -p and alias for stash push -p, 2018-04-20) wanted to make sure "git stash -p " offers the same completion as "git stash push -p ", but it did so by forcing the $subcommand to be "push" whenever then "-p" option is found on the command line. This harms any subcommand that can take the "-p" option---even when the subcommand is explicitly given, e.g. "git stash show -p", the code added by the change would overwrite the $subcommand the user gave us. Fix it by making sure that the defaulting to "push" happens only when there is no $subcommand given yet. Signed-off-by: Ville Skyttä Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index adb6516b6d1b1e..75724caafc55a4 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2776,7 +2776,7 @@ _git_stash () local save_opts='--all --keep-index --no-keep-index --quiet --patch --include-untracked' local subcommands='push list show apply clear drop pop create branch' local subcommand="$(__git_find_on_cmdline "$subcommands save")" - if [ -n "$(__git_find_on_cmdline "-p")" ]; then + if [ -z "$subcommand" -a -n "$(__git_find_on_cmdline "-p")" ]; then subcommand="push" fi if [ -z "$subcommand" ]; then From 4d9005ff5d1de0b4e82402320aab4ff27aa23b43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Wed, 20 May 2020 16:26:27 -0700 Subject: [PATCH 055/447] bisect--helper: avoid segfault with bad syntax in `start --term-*` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 06f5608c14 (bisect--helper: `bisect_start` shell function partially in C, 2019-01-02) adds a lax parser for `git bisect start` which could result in a segfault under a bad syntax call for start with custom terms. Detect if there are enough arguments left in the command line to use for --term-{old,good,new,bad} and abort with the same syntax error the original implementation will show if not. While at it, remove an unnecessary (and incomplete) check for unknown arguments and make sure to add a test to avoid regressions. Signed-off-by: Carlo Marcelo Arenas Belón Acked-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/bisect--helper.c | 13 +++++++++---- t/t6030-bisect-porcelain.sh | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/builtin/bisect--helper.c b/builtin/bisect--helper.c index 22e669e3b170f9..63b3a8389ccd69 100644 --- a/builtin/bisect--helper.c +++ b/builtin/bisect--helper.c @@ -452,9 +452,12 @@ static int bisect_start(struct bisect_terms *terms, int no_checkout, no_checkout = 1; } else if (!strcmp(arg, "--term-good") || !strcmp(arg, "--term-old")) { + i++; + if (argc <= i) + return error(_("'' is not a valid term")); must_write_terms = 1; free((void *) terms->term_good); - terms->term_good = xstrdup(argv[++i]); + terms->term_good = xstrdup(argv[i]); } else if (skip_prefix(arg, "--term-good=", &arg) || skip_prefix(arg, "--term-old=", &arg)) { must_write_terms = 1; @@ -462,16 +465,18 @@ static int bisect_start(struct bisect_terms *terms, int no_checkout, terms->term_good = xstrdup(arg); } else if (!strcmp(arg, "--term-bad") || !strcmp(arg, "--term-new")) { + i++; + if (argc <= i) + return error(_("'' is not a valid term")); must_write_terms = 1; free((void *) terms->term_bad); - terms->term_bad = xstrdup(argv[++i]); + terms->term_bad = xstrdup(argv[i]); } else if (skip_prefix(arg, "--term-bad=", &arg) || skip_prefix(arg, "--term-new=", &arg)) { must_write_terms = 1; free((void *) terms->term_bad); terms->term_bad = xstrdup(arg); - } else if (starts_with(arg, "--") && - !one_of(arg, "--term-good", "--term-bad", NULL)) { + } else if (starts_with(arg, "--")) { return error(_("unrecognized option: '%s'"), arg); } else { char *commit_id = xstrfmt("%s^{commit}", arg); diff --git a/t/t6030-bisect-porcelain.sh b/t/t6030-bisect-porcelain.sh index 55835ee4a47158..1ebf5307697cbc 100755 --- a/t/t6030-bisect-porcelain.sh +++ b/t/t6030-bisect-porcelain.sh @@ -858,7 +858,9 @@ test_expect_success 'bisect cannot mix terms' ' test_expect_success 'bisect terms rejects invalid terms' ' git bisect reset && + test_must_fail git bisect start --term-good && test_must_fail git bisect start --term-good invalid..term && + test_must_fail git bisect start --term-bad && test_must_fail git bisect terms --term-bad invalid..term && test_must_fail git bisect terms --term-good bad && test_must_fail git bisect terms --term-good old && From 16ab794b8257dd08906994d5cccacfa3886aa543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 24 May 2020 09:22:51 +0200 Subject: [PATCH 056/447] checkout: add tests for -b and --track MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test git checkout -b with and without --track and demonstrate unexpected error messages when it's given an extra (i.e. unsupported) path argument. In both cases it reports: $ git checkout -b foo origin/master bar fatal: 'bar' is not a commit and a branch 'foo' cannot be created from it The problem is that the start point we gave for the new branch is "origin/master" and "bar" is just some extra argument -- it could even be a valid commit, which would make the message even more confusing. We have more fitting error messages in git commit, but get confused; use the text of the rights ones in the tests. Reported-by: Dana Dahlstrom Original-test-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- t/t2018-checkout-branch.sh | 10 ++++++++++ t/t2027-checkout-track.sh | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100755 t/t2027-checkout-track.sh diff --git a/t/t2018-checkout-branch.sh b/t/t2018-checkout-branch.sh index 21583154d8e0d0..b166cb302f94f6 100755 --- a/t/t2018-checkout-branch.sh +++ b/t/t2018-checkout-branch.sh @@ -260,4 +260,14 @@ test_expect_success 'checkout -b to a new branch preserves mergeable changes des test_cmp expect actual ' +test_expect_success 'checkout -b rejects an invalid start point' ' + test_must_fail git checkout -b branch4 file1 2>err && + test_i18ngrep "is not a commit" err +' + +test_expect_failure 'checkout -b rejects an extra path argument' ' + test_must_fail git checkout -b branch5 branch1 file1 2>err && + test_i18ngrep "Cannot update paths and switch to branch" err +' + test_done diff --git a/t/t2027-checkout-track.sh b/t/t2027-checkout-track.sh new file mode 100755 index 00000000000000..d0b41d7cd08ccc --- /dev/null +++ b/t/t2027-checkout-track.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='tests for git branch --track' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit one && + test_commit two +' + +test_expect_success 'checkout --track -b creates a new tracking branch' ' + git checkout --track -b branch1 master && + test $(git rev-parse --abbrev-ref HEAD) = branch1 && + test $(git config --get branch.branch1.remote) = . && + test $(git config --get branch.branch1.merge) = refs/heads/master +' + +test_expect_failure 'checkout --track -b rejects an extra path argument' ' + test_must_fail git checkout --track -b branch2 master one.t 2>err && + test_i18ngrep "cannot be used with updating paths" err +' + +test_done From bb2198fb91ada94cfc6f8ec81b9dadcf3959fe10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 24 May 2020 09:23:00 +0200 Subject: [PATCH 057/447] checkout: improve error messages for -b with extra argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we try to create a branch "foo" based on "origin/master" and give git commit -b an extra unsupported argument "bar", it confusingly reports: $ git checkout -b foo origin/master bar fatal: 'bar' is not a commit and a branch 'foo' cannot be created from it $ git checkout --track -b foo origin/master bar fatal: 'bar' is not a commit and a branch 'foo' cannot be created from it That's wrong, because it very well understands that "origin/master" is supposed to be the start point for the new branch and not "bar". Check if we got a commit and show more fitting messages in that case instead: $ git checkout -b foo origin/master bar fatal: Cannot update paths and switch to branch 'foo' at the same time. $ git checkout --track -b foo origin/master bar fatal: '--track' cannot be used with updating paths Original-patch-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/checkout.c | 2 +- t/t2018-checkout-branch.sh | 2 +- t/t2027-checkout-track.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index e9d111bb8360d1..24336e10173362 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1689,7 +1689,7 @@ static int checkout_main(int argc, const char **argv, const char *prefix, * Try to give more helpful suggestion. * new_branch && argc > 1 will be caught later. */ - if (opts->new_branch && argc == 1) + if (opts->new_branch && argc == 1 && !new_branch_info.commit) die(_("'%s' is not a commit and a branch '%s' cannot be created from it"), argv[0], opts->new_branch); diff --git a/t/t2018-checkout-branch.sh b/t/t2018-checkout-branch.sh index b166cb302f94f6..5f761bc616ee58 100755 --- a/t/t2018-checkout-branch.sh +++ b/t/t2018-checkout-branch.sh @@ -265,7 +265,7 @@ test_expect_success 'checkout -b rejects an invalid start point' ' test_i18ngrep "is not a commit" err ' -test_expect_failure 'checkout -b rejects an extra path argument' ' +test_expect_success 'checkout -b rejects an extra path argument' ' test_must_fail git checkout -b branch5 branch1 file1 2>err && test_i18ngrep "Cannot update paths and switch to branch" err ' diff --git a/t/t2027-checkout-track.sh b/t/t2027-checkout-track.sh index d0b41d7cd08ccc..bcba1bf90c090a 100755 --- a/t/t2027-checkout-track.sh +++ b/t/t2027-checkout-track.sh @@ -16,7 +16,7 @@ test_expect_success 'checkout --track -b creates a new tracking branch' ' test $(git config --get branch.branch1.merge) = refs/heads/master ' -test_expect_failure 'checkout --track -b rejects an extra path argument' ' +test_expect_success 'checkout --track -b rejects an extra path argument' ' test_must_fail git checkout --track -b branch2 master one.t 2>err && test_i18ngrep "cannot be used with updating paths" err ' From d63ae319622f5cb4d2c6663dd63fd566050f5810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Fri, 22 May 2020 11:42:14 -0700 Subject: [PATCH 058/447] t5608: avoid say() and use "skip_all" instead for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Printing a message directly to stdout could affect TAP processing and is not really needed, as there is a standard way to skip all tests that could be used instead, while printing an equivalent message. While at it; update the message to better reflect that since a85efb5985 (t5608-clone-2gb.sh: turn GIT_TEST_CLONE_2GB into a bool, 2019-11-22), the enabling variable should be a recognized boolean (ex: true, false, 1, 0) and get rid of the prerequisite that used to guard all the tests, since "skip_all" is just much faster and idempotent. Helped-by: Johannes Schindelin Signed-off-by: Carlo Marcelo Arenas Belón Signed-off-by: Junio C Hamano --- t/t5608-clone-2gb.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/t/t5608-clone-2gb.sh b/t/t5608-clone-2gb.sh index eee0842888414b..4c476d2fa18a61 100755 --- a/t/t5608-clone-2gb.sh +++ b/t/t5608-clone-2gb.sh @@ -5,12 +5,11 @@ test_description='Test cloning a repository larger than 2 gigabyte' if ! test_bool_env GIT_TEST_CLONE_2GB false then - say 'Skipping expensive 2GB clone test; enable it with GIT_TEST_CLONE_2GB=t' -else - test_set_prereq CLONE_2GB + skip_all='expensive 2GB clone test; enable with GIT_TEST_CLONE_2GB=true' + test_done fi -test_expect_success CLONE_2GB 'setup' ' +test_expect_success 'setup' ' git config pack.compression 0 && git config pack.depth 0 && @@ -38,13 +37,13 @@ test_expect_success CLONE_2GB 'setup' ' ' -test_expect_success CLONE_2GB 'clone - bare' ' +test_expect_success 'clone - bare' ' git clone --bare --no-hardlinks . clone-bare ' -test_expect_success CLONE_2GB 'clone - with worktree, file:// protocol' ' +test_expect_success 'clone - with worktree, file:// protocol' ' git clone "file://$(pwd)" clone-wt From c28ded83fc95be8157c851c8be179733a7d4b137 Mon Sep 17 00:00:00 2001 From: Laurent Arnoud Date: Fri, 22 May 2020 12:46:18 +0200 Subject: [PATCH 059/447] diff: add config option relative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `diff.relative` boolean option set to `true` shows only changes in the current directory/value specified by the `path` argument of the `relative` option and shows pathnames relative to the aforementioned directory. Teach `--no-relative` to override earlier `--relative` Add for git-format-patch(1) options documentation `--relative` and `--no-relative` Signed-off-by: Laurent Arnoud Acked-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- Documentation/config/diff.txt | 4 ++ Documentation/diff-options.txt | 5 ++- diff.c | 11 +++-- t/t4014-format-patch.sh | 13 ++++++ t/t4045-diff-relative.sh | 82 ++++++++++++++++++++++++++++++++-- 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/Documentation/config/diff.txt b/Documentation/config/diff.txt index ff09f1cf737c06..c3ae136eba6de1 100644 --- a/Documentation/config/diff.txt +++ b/Documentation/config/diff.txt @@ -105,6 +105,10 @@ diff.mnemonicPrefix:: diff.noprefix:: If set, 'git diff' does not show any source or destination prefix. +diff.relative:: + If set to 'true', 'git diff' does not show changes outside of the directory + and show pathnames relative to the current directory. + diff.orderFile:: File indicating how to order files within a diff. See the '-O' option to linkgit:git-diff[1] for details. diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index bb31f0c42b3f8a..7987d72b0212e1 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -643,15 +643,18 @@ ifndef::git-format-patch[] -R:: Swap two inputs; that is, show differences from index or on-disk file to tree contents. +endif::git-format-patch[] --relative[=]:: +--no-relative:: When run from a subdirectory of the project, it can be told to exclude changes outside the directory and show pathnames relative to it with this option. When you are not in a subdirectory (e.g. in a bare repository), you can name which subdirectory to make the output relative to by giving a as an argument. -endif::git-format-patch[] + `--no-relative` can be used to countermand both `diff.relative` config + option and previous `--relative`. -a:: --text:: diff --git a/diff.c b/diff.c index d1ad6a3c4ad0bc..863da896c00faf 100644 --- a/diff.c +++ b/diff.c @@ -48,6 +48,7 @@ static const char *diff_order_file_cfg; int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; static int diff_no_prefix; +static int diff_relative; static int diff_stat_graph_width; static int diff_dirstat_permille_default = 30; static struct diff_options default_diff_options; @@ -386,6 +387,10 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) diff_no_prefix = git_config_bool(var, value); return 0; } + if (!strcmp(var, "diff.relative")) { + diff_relative = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "diff.statgraphwidth")) { diff_stat_graph_width = git_config_int(var, value); return 0; @@ -4538,6 +4543,7 @@ void repo_diff_setup(struct repository *r, struct diff_options *options) options->interhunkcontext = diff_interhunk_context_default; options->ws_error_highlight = ws_error_highlight_default; options->flags.rename_empty = 1; + options->flags.relative_name = diff_relative; options->objfind = NULL; /* pathchange left =NULL by default */ @@ -5195,8 +5201,7 @@ static int diff_opt_relative(const struct option *opt, { struct diff_options *options = opt->value; - BUG_ON_OPT_NEG(unset); - options->flags.relative_name = 1; + options->flags.relative_name = !unset; if (arg) options->prefix = arg; return 0; @@ -5492,7 +5497,7 @@ static void prep_parse_options(struct diff_options *options) OPT_GROUP(N_("Other diff options")), OPT_CALLBACK_F(0, "relative", options, N_(""), N_("when run from subdir, exclude changes outside and show relative paths"), - PARSE_OPT_NONEG | PARSE_OPT_OPTARG, + PARSE_OPT_OPTARG, diff_opt_relative), OPT_BOOL('a', "text", &options->flags.text, N_("treat all files as text")), diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index db7e733af9e5be..575e079cc26162 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -1602,6 +1602,19 @@ test_expect_success 'format patch ignores color.ui' ' test_cmp expect actual ' +test_expect_success 'format patch respects diff.relative' ' + rm -rf subdir && + mkdir subdir && + echo other content >subdir/file2 && + git add subdir/file2 && + git commit -F msg && + test_unconfig diff.relative && + git format-patch --relative=subdir --stdout -1 >expect && + test_config diff.relative true && + git -C subdir format-patch --stdout -1 >actual && + test_cmp expect actual +' + test_expect_success 'cover letter with invalid --cover-from-description and config' ' test_config branch.rebuild-1.description "config subject diff --git a/t/t4045-diff-relative.sh b/t/t4045-diff-relative.sh index 258808708e1093..7be1de736d86c9 100755 --- a/t/t4045-diff-relative.sh +++ b/t/t4045-diff-relative.sh @@ -8,7 +8,8 @@ test_expect_success 'setup' ' echo content >file1 && mkdir subdir && echo other content >subdir/file2 && - blob=$(git hash-object subdir/file2) && + blob_file1=$(git hash-object file1) && + blob_file2=$(git hash-object subdir/file2) && git add . && git commit -m one ' @@ -18,7 +19,7 @@ check_diff () { shift expect=$1 shift - short_blob=$(git rev-parse --short $blob) + short_blob=$(git rev-parse --short $blob_file2) cat >expected <<-EOF diff --git a/$expect b/$expect new file mode 100644 @@ -70,7 +71,7 @@ check_raw () { expect=$1 shift cat >expected <<-EOF - :000000 100644 $ZERO_OID $blob A $expect + :000000 100644 $ZERO_OID $blob_file2 A $expect EOF test_expect_success "--raw $*" " git -C '$dir' diff --no-abbrev --raw $* HEAD^ >actual && @@ -86,4 +87,79 @@ do check_$type . dir/file2 --relative=sub done +check_diff_relative_option () { + dir=$1 + shift + expect=$1 + shift + relative_opt=$1 + shift + test_expect_success "config diff.relative $relative_opt -p $*" " + short_blob=\$(git rev-parse --short $blob_file2) && + cat >expected <<-EOF && + diff --git a/$expect b/$expect + new file mode 100644 + index 0000000..\$short_blob + --- /dev/null + +++ b/$expect + @@ -0,0 +1 @@ + +other content + EOF + test_config -C $dir diff.relative $relative_opt && + git -C '$dir' diff -p $* HEAD^ >actual && + test_cmp expected actual + " +} + +check_diff_no_relative_option () { + dir=$1 + shift + expect=$1 + shift + relative_opt=$1 + shift + test_expect_success "config diff.relative $relative_opt -p $*" " + short_blob_file1=\$(git rev-parse --short $blob_file1) && + short_blob_file2=\$(git rev-parse --short $blob_file2) && + cat >expected <<-EOF && + diff --git a/file1 b/file1 + new file mode 100644 + index 0000000..\$short_blob_file1 + --- /dev/null + +++ b/file1 + @@ -0,0 +1 @@ + +content + diff --git a/$expect b/$expect + new file mode 100644 + index 0000000..\$short_blob_file2 + --- /dev/null + +++ b/$expect + @@ -0,0 +1 @@ + +other content + EOF + test_config -C $dir diff.relative $relative_opt && + git -C '$dir' diff -p $* HEAD^ >actual && + test_cmp expected actual + " +} + +check_diff_no_relative_option . subdir/file2 false +check_diff_no_relative_option . subdir/file2 true --no-relative +check_diff_no_relative_option . subdir/file2 false --no-relative +check_diff_no_relative_option subdir subdir/file2 false +check_diff_no_relative_option subdir subdir/file2 true --no-relative +check_diff_no_relative_option subdir subdir/file2 false --no-relative + +check_diff_relative_option . file2 false --relative=subdir/ +check_diff_relative_option . file2 false --relative=subdir +check_diff_relative_option . file2 true --relative=subdir/ +check_diff_relative_option . file2 true --relative=subdir +check_diff_relative_option subdir file2 false --relative +check_diff_relative_option subdir file2 true --relative +check_diff_relative_option subdir file2 true +check_diff_relative_option subdir file2 false --no-relative --relative +check_diff_relative_option subdir file2 true --no-relative --relative +check_diff_relative_option . file2 false --no-relative --relative=subdir +check_diff_relative_option . file2 true --no-relative --relative=subdir + test_done From 74b082ad34fe2c727c676dac5c33d5e1e5f5ca56 Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Tue, 19 May 2020 06:53:58 -0400 Subject: [PATCH 060/447] remote-curl: error on incomplete packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated with a partially written packet, remote-curl will blindly send the partially written packet before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the partial packet and continue reading, expecting more input. This results in a deadlock between the two processes. For a stateless connection, inspect packets before sending them and error out if a packet line packet is incomplete. Helped-by: Jeff King Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- remote-curl.c | 59 ++++++++++++++++++- t/lib-httpd.sh | 2 + t/lib-httpd/apache.conf | 8 +++ .../incomplete-body-upload-pack-v2-http.sh | 3 + .../incomplete-length-upload-pack-v2-http.sh | 3 + t/t5702-protocol-v2.sh | 34 +++++++++++ 6 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 t/lib-httpd/incomplete-body-upload-pack-v2-http.sh create mode 100644 t/lib-httpd/incomplete-length-upload-pack-v2-http.sh diff --git a/remote-curl.c b/remote-curl.c index da3e07184aed3c..e020140092dc56 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -679,9 +679,53 @@ static curlioerr rpc_ioctl(CURL *handle, int cmd, void *clientp) } #endif +struct check_pktline_state { + char len_buf[4]; + int len_filled; + int remaining; +}; + +static void check_pktline(struct check_pktline_state *state, const char *ptr, size_t size) +{ + while (size) { + if (!state->remaining) { + int digits_remaining = 4 - state->len_filled; + if (digits_remaining > size) + digits_remaining = size; + memcpy(&state->len_buf[state->len_filled], ptr, digits_remaining); + state->len_filled += digits_remaining; + ptr += digits_remaining; + size -= digits_remaining; + + if (state->len_filled == 4) { + state->remaining = packet_length(state->len_buf); + if (state->remaining < 0) { + die(_("remote-curl: bad line length character: %.4s"), state->len_buf); + } else if (state->remaining < 4) { + state->remaining = 0; + } else { + state->remaining -= 4; + } + state->len_filled = 0; + } + } + + if (state->remaining) { + int remaining = state->remaining; + if (remaining > size) + remaining = size; + ptr += remaining; + size -= remaining; + state->remaining -= remaining; + } + } +} + struct rpc_in_data { struct rpc_state *rpc; struct active_request_slot *slot; + int check_pktline; + struct check_pktline_state pktline_state; }; /* @@ -702,6 +746,8 @@ static size_t rpc_in(char *ptr, size_t eltsize, return size; if (size) data->rpc->any_written = 1; + if (data->check_pktline) + check_pktline(&data->pktline_state, ptr, size); write_or_die(data->rpc->in, ptr, size); return size; } @@ -778,7 +824,7 @@ static curl_off_t xcurl_off_t(size_t len) * If flush_received is true, do not attempt to read any more; just use what's * in rpc->buf. */ -static int post_rpc(struct rpc_state *rpc, int flush_received) +static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_received) { struct active_request_slot *slot; struct curl_slist *headers = http_copy_default_headers(); @@ -920,6 +966,8 @@ static int post_rpc(struct rpc_state *rpc, int flush_received) curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, rpc_in); rpc_in_data.rpc = rpc; rpc_in_data.slot = slot; + rpc_in_data.check_pktline = stateless_connect; + memset(&rpc_in_data.pktline_state, 0, sizeof(rpc_in_data.pktline_state)); curl_easy_setopt(slot->curl, CURLOPT_FILE, &rpc_in_data); curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0); @@ -936,6 +984,11 @@ static int post_rpc(struct rpc_state *rpc, int flush_received) if (!rpc->any_written) err = -1; + if (rpc_in_data.pktline_state.len_filled) + err = error(_("%d bytes of length header were received"), rpc_in_data.pktline_state.len_filled); + if (rpc_in_data.pktline_state.remaining) + err = error(_("%d bytes of body are still expected"), rpc_in_data.pktline_state.remaining); + curl_slist_free_all(headers); free(gzip_body); return err; @@ -985,7 +1038,7 @@ static int rpc_service(struct rpc_state *rpc, struct discovery *heads, break; rpc->pos = 0; rpc->len = n; - err |= post_rpc(rpc, 0); + err |= post_rpc(rpc, 0, 0); } close(client.in); @@ -1342,7 +1395,7 @@ static int stateless_connect(const char *service_name) BUG("The entire rpc->buf should be larger than LARGE_PACKET_MAX"); if (status == PACKET_READ_EOF) break; - if (post_rpc(&rpc, status == PACKET_READ_FLUSH)) + if (post_rpc(&rpc, 1, status == PACKET_READ_FLUSH)) /* We would have an err here */ break; /* Reset the buffer for next request */ diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 1449ee95e9eaa0..d2edfa4c503af0 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -129,6 +129,8 @@ install_script () { prepare_httpd() { mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH" cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH" + install_script incomplete-length-upload-pack-v2-http.sh + install_script incomplete-body-upload-pack-v2-http.sh install_script broken-smart-http.sh install_script error-smart-http.sh install_script error.sh diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf index 994e5290d63b0f..afa91e38b0e213 100644 --- a/t/lib-httpd/apache.conf +++ b/t/lib-httpd/apache.conf @@ -117,6 +117,8 @@ Alias /auth/dumb/ www/auth/dumb/ SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH} SetEnv GIT_HTTP_EXPORT_ALL +ScriptAlias /smart/incomplete_length/git-upload-pack incomplete-length-upload-pack-v2-http.sh/ +ScriptAlias /smart/incomplete_body/git-upload-pack incomplete-body-upload-pack-v2-http.sh/ ScriptAliasMatch /error_git_upload_pack/(.*)/git-upload-pack error.sh/ ScriptAliasMatch /smart_*[^/]*/(.*) ${GIT_EXEC_PATH}/git-http-backend/$1 ScriptAlias /broken_smart/ broken-smart-http.sh/ @@ -126,6 +128,12 @@ ScriptAliasMatch /one_time_perl/(.*) apply-one-time-perl.sh/$1 Options FollowSymlinks + + Options ExecCGI + + + Options ExecCGI + Options ExecCGI diff --git a/t/lib-httpd/incomplete-body-upload-pack-v2-http.sh b/t/lib-httpd/incomplete-body-upload-pack-v2-http.sh new file mode 100644 index 00000000000000..90e73ef8d559a5 --- /dev/null +++ b/t/lib-httpd/incomplete-body-upload-pack-v2-http.sh @@ -0,0 +1,3 @@ +printf "Content-Type: text/%s\n" "application/x-git-upload-pack-result" +echo +printf "%s%s" "0079" "45" diff --git a/t/lib-httpd/incomplete-length-upload-pack-v2-http.sh b/t/lib-httpd/incomplete-length-upload-pack-v2-http.sh new file mode 100644 index 00000000000000..dce552e3482398 --- /dev/null +++ b/t/lib-httpd/incomplete-length-upload-pack-v2-http.sh @@ -0,0 +1,3 @@ +printf "Content-Type: text/%s\n" "application/x-git-upload-pack-result" +echo +printf "%s" "00" diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 5039e66dc47c0e..4eb81ba2d49abe 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -586,6 +586,40 @@ test_expect_success 'clone with http:// using protocol v2' ' ! grep "Send header: Transfer-Encoding: chunked" log ' +test_expect_success 'clone repository with http:// using protocol v2 with incomplete pktline length' ' + test_when_finished "rm -f log" && + + git init "$HTTPD_DOCUMENT_ROOT_PATH/incomplete_length" && + test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/incomplete_length" file && + + test_must_fail env GIT_TRACE_PACKET="$(pwd)/log" GIT_TRACE_CURL="$(pwd)/log" git -c protocol.version=2 \ + clone "$HTTPD_URL/smart/incomplete_length" incomplete_length_child 2>err && + + # Client requested to use protocol v2 + grep "Git-Protocol: version=2" log && + # Server responded using protocol v2 + grep "git< version 2" log && + # Client reported appropriate failure + test_i18ngrep "bytes of length header were received" err +' + +test_expect_success 'clone repository with http:// using protocol v2 with incomplete pktline body' ' + test_when_finished "rm -f log" && + + git init "$HTTPD_DOCUMENT_ROOT_PATH/incomplete_body" && + test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/incomplete_body" file && + + test_must_fail env GIT_TRACE_PACKET="$(pwd)/log" GIT_TRACE_CURL="$(pwd)/log" git -c protocol.version=2 \ + clone "$HTTPD_URL/smart/incomplete_body" incomplete_body_child 2>err && + + # Client requested to use protocol v2 + grep "Git-Protocol: version=2" log && + # Server responded using protocol v2 + grep "git< version 2" log && + # Client reported appropriate failure + test_i18ngrep "bytes of body are still expected" err +' + test_expect_success 'clone big repository with http:// using protocol v2' ' test_when_finished "rm -f log" && From 0181b600a6deb66a346dbcbe8300cf9e2467ebbb Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Tue, 19 May 2020 06:53:59 -0400 Subject: [PATCH 061/447] pkt-line: define PACKET_READ_RESPONSE_END In a future commit, we will use PACKET_READ_RESPONSE_END to separate messages proxied by remote-curl. To prepare for this, add the PACKET_READ_RESPONSE_END enum value. In switch statements that need a case added, die() or BUG() when a PACKET_READ_RESPONSE_END is unexpected. Otherwise, mirror how PACKET_READ_DELIM is implemented (especially in cases where packets are being forwarded). Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- connect.c | 2 ++ pkt-line.c | 11 +++++++++++ pkt-line.h | 2 ++ remote-curl.c | 2 ++ serve.c | 2 ++ t/helper/test-pkt-line.c | 4 ++++ 6 files changed, 23 insertions(+) diff --git a/connect.c b/connect.c index 23013c634436ad..11c6ec70a0c571 100644 --- a/connect.c +++ b/connect.c @@ -127,6 +127,7 @@ enum protocol_version discover_version(struct packet_reader *reader) die_initial_contact(0); case PACKET_READ_FLUSH: case PACKET_READ_DELIM: + case PACKET_READ_RESPONSE_END: version = protocol_v0; break; case PACKET_READ_NORMAL: @@ -310,6 +311,7 @@ struct ref **get_remote_heads(struct packet_reader *reader, state = EXPECTING_DONE; break; case PACKET_READ_DELIM: + case PACKET_READ_RESPONSE_END: die(_("invalid packet")); } diff --git a/pkt-line.c b/pkt-line.c index 3beab1dc6b9847..8f9bc68ee28ef4 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -99,6 +99,13 @@ void packet_delim(int fd) die_errno(_("unable to write delim packet")); } +void packet_response_end(int fd) +{ + packet_trace("0002", 4, 1); + if (write_in_full(fd, "0002", 4) < 0) + die_errno(_("unable to write stateless separator packet")); +} + int packet_flush_gently(int fd) { packet_trace("0000", 4, 1); @@ -337,6 +344,10 @@ enum packet_read_status packet_read_with_status(int fd, char **src_buffer, packet_trace("0001", 4, 0); *pktlen = 0; return PACKET_READ_DELIM; + } else if (len == 2) { + packet_trace("0002", 4, 0); + *pktlen = 0; + return PACKET_READ_RESPONSE_END; } else if (len < 4) { die(_("protocol error: bad line length %d"), len); } diff --git a/pkt-line.h b/pkt-line.h index a72af9112ba1ce..5b373fe4cdaae2 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -22,6 +22,7 @@ */ void packet_flush(int fd); void packet_delim(int fd); +void packet_response_end(int fd); void packet_write_fmt(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3))); void packet_buf_flush(struct strbuf *buf); void packet_buf_delim(struct strbuf *buf); @@ -94,6 +95,7 @@ enum packet_read_status { PACKET_READ_NORMAL, PACKET_READ_FLUSH, PACKET_READ_DELIM, + PACKET_READ_RESPONSE_END, }; enum packet_read_status packet_read_with_status(int fd, char **src_buffer, size_t *src_len, char *buffer, diff --git a/remote-curl.c b/remote-curl.c index e020140092dc56..d02cb547e97ee5 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -601,6 +601,8 @@ static int rpc_read_from_out(struct rpc_state *rpc, int options, case PACKET_READ_FLUSH: memcpy(buf - 4, "0000", 4); break; + case PACKET_READ_RESPONSE_END: + die(_("remote server sent stateless separator")); } } diff --git a/serve.c b/serve.c index 317256c1a493c4..c046926ba141c7 100644 --- a/serve.c +++ b/serve.c @@ -217,6 +217,8 @@ static int process_request(void) state = PROCESS_REQUEST_DONE; break; + case PACKET_READ_RESPONSE_END: + BUG("unexpected stateless separator packet"); } } diff --git a/t/helper/test-pkt-line.c b/t/helper/test-pkt-line.c index 12ca698e17a1d5..69152958e58eaf 100644 --- a/t/helper/test-pkt-line.c +++ b/t/helper/test-pkt-line.c @@ -46,6 +46,9 @@ static void unpack(void) case PACKET_READ_DELIM: printf("0001\n"); break; + case PACKET_READ_RESPONSE_END: + printf("0002\n"); + break; } } } @@ -75,6 +78,7 @@ static void unpack_sideband(void) case PACKET_READ_FLUSH: return; case PACKET_READ_DELIM: + case PACKET_READ_RESPONSE_END: break; } } From b0df0c16ead4c5512d506dcbbdf31194d992803c Mon Sep 17 00:00:00 2001 From: Denton Liu Date: Tue, 19 May 2020 06:54:00 -0400 Subject: [PATCH 062/447] stateless-connect: send response end packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated before the transaction is complete, remote-curl will blindly forward the packets before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the transaction and continue reading, expecting more input to continue the transaction. This results in a deadlock between the two processes. This can be seen in the following command which does not terminate: $ git -c protocol.version=2 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... whereas the v1 version does terminate as expected: $ git -c protocol.version=1 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... fatal: the remote end hung up unexpectedly Instead of blindly forwarding packets, make remote-curl insert a response end packet after proxying the responses from the remote server when using stateless_connect(). On the RPC client side, ensure that each response ends as described. A separate control packet is chosen because we need to be able to differentiate between what the remote server sends and remote-curl's control packets. By ensuring in the remote-curl code that a server cannot send response end packets, we prevent a malicious server from being able to perform a denial of service attack in which they spoof a response end packet and cause the described deadlock to happen. Reported-by: Force Charlie Helped-by: Jeff King Signed-off-by: Denton Liu Signed-off-by: Junio C Hamano --- Documentation/gitremote-helpers.txt | 4 +++- Documentation/technical/protocol-v2.txt | 2 ++ builtin/fetch-pack.c | 2 +- connect.c | 16 +++++++++++++++- connect.h | 4 ++++ fetch-pack.c | 13 +++++++++++++ remote-curl.c | 5 +++++ remote.h | 3 ++- t/t5702-protocol-v2.sh | 13 +++++++++++++ transport.c | 3 ++- 10 files changed, 60 insertions(+), 5 deletions(-) diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index f48a031dc346a3..93baeeb0295824 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -405,7 +405,9 @@ Supported if the helper has the "connect" capability. trying to fall back). After line feed terminating the positive (empty) response, the output of the service starts. Messages (both request and response) must consist of zero or more - PKT-LINEs, terminating in a flush packet. The client must not + PKT-LINEs, terminating in a flush packet. Response messages will + then have a response end packet after the flush packet to + indicate the end of a response. The client must not expect the server to store any state in between request-response pairs. After the connection ends, the remote helper exits. + diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 7e3766cafb39cc..3996d708916298 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -33,6 +33,8 @@ In protocol v2 these special packets will have the following semantics: * '0000' Flush Packet (flush-pkt) - indicates the end of a message * '0001' Delimiter Packet (delim-pkt) - separates sections of a message + * '0002' Message Packet (response-end-pkt) - indicates the end of a response + for stateless connections Initial Client Request ---------------------- diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 47711000725b4f..94b0c89b8241f6 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -224,7 +224,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) version = discover_version(&reader); switch (version) { case protocol_v2: - get_remote_refs(fd[1], &reader, &ref, 0, NULL, NULL); + get_remote_refs(fd[1], &reader, &ref, 0, NULL, NULL, args.stateless_rpc); break; case protocol_v1: case protocol_v0: diff --git a/connect.c b/connect.c index 11c6ec70a0c571..0df45a110888e5 100644 --- a/connect.c +++ b/connect.c @@ -406,10 +406,21 @@ static int process_ref_v2(const char *line, struct ref ***list) return ret; } +void check_stateless_delimiter(int stateless_rpc, + struct packet_reader *reader, + const char *error) +{ + if (!stateless_rpc) + return; /* not in stateless mode, no delimiter expected */ + if (packet_reader_read(reader) != PACKET_READ_RESPONSE_END) + die("%s", error); +} + struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, struct ref **list, int for_push, const struct argv_array *ref_prefixes, - const struct string_list *server_options) + const struct string_list *server_options, + int stateless_rpc) { int i; *list = NULL; @@ -446,6 +457,9 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, if (reader->status != PACKET_READ_FLUSH) die(_("expected flush after ref listing")); + check_stateless_delimiter(stateless_rpc, reader, + _("expected response end packet after ref listing")); + return list; } diff --git a/connect.h b/connect.h index 5f2382e0186804..235bc66254d4a0 100644 --- a/connect.h +++ b/connect.h @@ -22,4 +22,8 @@ int server_supports_v2(const char *c, int die_on_error); int server_supports_feature(const char *c, const char *feature, int die_on_error); +void check_stateless_delimiter(int stateless_rpc, + struct packet_reader *reader, + const char *error); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index 7eaa19d7c17abe..d8bbf45ee27a73 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1451,6 +1451,13 @@ enum fetch_state { FETCH_DONE, }; +static void do_check_stateless_delimiter(const struct fetch_pack_args *args, + struct packet_reader *reader) +{ + check_stateless_delimiter(args->stateless_rpc, reader, + _("git fetch-pack: expected response end packet")); +} + static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, int fd[2], const struct ref *orig_ref, @@ -1535,6 +1542,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, /* Process ACKs/NAKs */ switch (process_acks(negotiator, &reader, &common)) { case READY: + /* + * Don't check for response delimiter; get_pack() will + * read the rest of this response. + */ state = FETCH_GET_PACK; break; case COMMON_FOUND: @@ -1542,6 +1553,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, seen_ack = 1; /* fallthrough */ case NO_COMMON_FOUND: + do_check_stateless_delimiter(args, &reader); state = FETCH_SEND_REQUEST; break; } @@ -1561,6 +1573,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, process_section_header(&reader, "packfile", 0); if (get_pack(args, fd, pack_lockfile, sought, nr_sought)) die(_("git fetch-pack: fetch failed.")); + do_check_stateless_delimiter(args, &reader); state = FETCH_DONE; break; diff --git a/remote-curl.c b/remote-curl.c index d02cb547e97ee5..75532a8baea8f2 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -703,6 +703,8 @@ static void check_pktline(struct check_pktline_state *state, const char *ptr, si state->remaining = packet_length(state->len_buf); if (state->remaining < 0) { die(_("remote-curl: bad line length character: %.4s"), state->len_buf); + } else if (state->remaining == 2) { + die(_("remote-curl: unexpected response end packet")); } else if (state->remaining < 4) { state->remaining = 0; } else { @@ -991,6 +993,9 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece if (rpc_in_data.pktline_state.remaining) err = error(_("%d bytes of body are still expected"), rpc_in_data.pktline_state.remaining); + if (stateless_connect) + packet_response_end(rpc->in); + curl_slist_free_all(headers); free(gzip_body); return err; diff --git a/remote.h b/remote.h index 11d8719b587767..5cc26c1b3b3e1f 100644 --- a/remote.h +++ b/remote.h @@ -179,7 +179,8 @@ struct ref **get_remote_heads(struct packet_reader *reader, struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, struct ref **list, int for_push, const struct argv_array *ref_prefixes, - const struct string_list *server_options); + const struct string_list *server_options, + int stateless_rpc); int resolve_remote_symref(struct ref *ref, struct ref *list); diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 4eb81ba2d49abe..8da65e60deea33 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -620,6 +620,19 @@ test_expect_success 'clone repository with http:// using protocol v2 with incomp test_i18ngrep "bytes of body are still expected" err ' +test_expect_success 'clone with http:// using protocol v2 and invalid parameters' ' + test_when_finished "rm -f log" && + + test_must_fail env GIT_TRACE_PACKET="$(pwd)/log" GIT_TRACE_CURL="$(pwd)/log" \ + git -c protocol.version=2 \ + clone --shallow-since=20151012 "$HTTPD_URL/smart/http_parent" http_child_invalid && + + # Client requested to use protocol v2 + grep "Git-Protocol: version=2" log && + # Server responded using protocol v2 + grep "git< version 2" log +' + test_expect_success 'clone big repository with http:// using protocol v2' ' test_when_finished "rm -f log" && diff --git a/transport.c b/transport.c index 431a93caef710f..7d50c502adfb6c 100644 --- a/transport.c +++ b/transport.c @@ -297,7 +297,8 @@ static struct ref *handshake(struct transport *transport, int for_push, if (must_list_refs) get_remote_refs(data->fd[1], &reader, &refs, for_push, ref_prefixes, - transport->server_options); + transport->server_options, + transport->stateless_rpc); break; case protocol_v1: case protocol_v0: From 1c966423263cf77bb3fd2d87df4537d31853b58f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Fri, 22 May 2020 02:25:02 +0200 Subject: [PATCH 063/447] sparse: allow '{ 0 }' to be used without warnings In standard C, '{ 0 }' can be used as an universal zero-initializer. However, Sparse complains if this is used on a type where the first member (possibly nested) is a pointer since Sparse purposely wants to warn when '0' is used to initialize a pointer type. Legitimaly, it's desirable to be able to use '{ 0 }' as an idiom without these warnings [1,2]. To allow this, an option have now been added to Sparse: 537e3e2dae univ-init: conditionally accept { 0 } without warnings So, add this option to the SPARSE_FLAGS variable. Note: The option have just been added to Sparse. So, to benefit now from this patch it's needed to use the latest Sparse source from kernel.org. The option will simply be ignored by older versions of Sparse. [1] https://lore.kernel.org/r/e6796c60-a870-e761-3b07-b680f934c537@ramsayjones.plus.com [2] https://lore.kernel.org/r/xmqqd07xem9l.fsf@gitster.c.googlers.com Signed-off-by: Luc Van Oostenryck Signed-off-by: Junio C Hamano --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9804a0758b2458..c58b781105496c 100644 --- a/Makefile +++ b/Makefile @@ -1188,7 +1188,7 @@ PTHREAD_CFLAGS = # For the 'sparse' target SPARSE_FLAGS ?= -SP_EXTRA_FLAGS = +SP_EXTRA_FLAGS = -Wno-universal-initializer # For the 'coccicheck' target; setting SPATCH_BATCH_SIZE higher will # usually result in less CPU usage at the cost of higher peak memory. From 92315e50b275deee8e84d28ee1ff1ad555a5de36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:49 +0000 Subject: [PATCH 064/447] connect: have ref processing code take struct packet_reader In a future patch, we'll want to access multiple members from struct packet_reader when parsing references. Therefore, have the ref parsing code take pointers to struct reader instead of having to pass multiple arguments to each function. Rename the len variable to "linelen" to make it clearer what the variable does in light of the variable change. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/connect.c b/connect.c index 23013c634436ad..ccc5274189da55 100644 --- a/connect.c +++ b/connect.c @@ -204,17 +204,19 @@ static void annotate_refs_with_symref_info(struct ref *ref) string_list_clear(&symref, 0); } -static void process_capabilities(const char *line, int *len) +static void process_capabilities(struct packet_reader *reader, int *linelen) { + const char *line = reader->line; int nul_location = strlen(line); - if (nul_location == *len) + if (nul_location == *linelen) return; server_capabilities_v1 = xstrdup(line + nul_location + 1); - *len = nul_location; + *linelen = nul_location; } -static int process_dummy_ref(const char *line) +static int process_dummy_ref(const struct packet_reader *reader) { + const char *line = reader->line; struct object_id oid; const char *name; @@ -234,9 +236,11 @@ static void check_no_capabilities(const char *line, int len) line + strlen(line)); } -static int process_ref(const char *line, int len, struct ref ***list, - unsigned int flags, struct oid_array *extra_have) +static int process_ref(const struct packet_reader *reader, int len, + struct ref ***list, unsigned int flags, + struct oid_array *extra_have) { + const char *line = reader->line; struct object_id old_oid; const char *name; @@ -260,9 +264,10 @@ static int process_ref(const char *line, int len, struct ref ***list, return 1; } -static int process_shallow(const char *line, int len, +static int process_shallow(const struct packet_reader *reader, int len, struct oid_array *shallow_points) { + const char *line = reader->line; const char *arg; struct object_id old_oid; @@ -315,20 +320,20 @@ struct ref **get_remote_heads(struct packet_reader *reader, switch (state) { case EXPECTING_FIRST_REF: - process_capabilities(reader->line, &len); - if (process_dummy_ref(reader->line)) { + process_capabilities(reader, &len); + if (process_dummy_ref(reader)) { state = EXPECTING_SHALLOW; break; } state = EXPECTING_REF; /* fallthrough */ case EXPECTING_REF: - if (process_ref(reader->line, len, &list, flags, extra_have)) + if (process_ref(reader, len, &list, flags, extra_have)) break; state = EXPECTING_SHALLOW; /* fallthrough */ case EXPECTING_SHALLOW: - if (process_shallow(reader->line, len, shallow_points)) + if (process_shallow(reader, len, shallow_points)) break; die(_("protocol error: unexpected '%s'"), reader->line); case EXPECTING_DONE: From 14570dc67d2a500dfb9f33a7445bdbd6133af4ac Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:50 +0000 Subject: [PATCH 065/447] wrapper: add function to compare strings with different NUL termination When parsing capabilities for the pack protocol, there are times we'll want to compare the value of a capability to a NUL-terminated string. Since the data we're reading will be space-terminated, not NUL-terminated, we need a function that compares the two strings, but also checks that they're the same length. Otherwise, if we used strncmp to compare these strings, we might accidentally accept a parameter that was a prefix of the expected value. Add a function, xstrncmpz, that takes a NUL-terminated string and a non-NUL-terminated string, plus a length, and compares them, ensuring that they are the same length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- git-compat-util.h | 6 ++++++ wrapper.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 8ba576e81e3e52..a3dcfc8613a821 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -868,6 +868,12 @@ char *xgetcwd(void); FILE *fopen_for_writing(const char *path); FILE *fopen_or_warn(const char *path, const char *mode); +/* + * Like strncmp, but only return zero if s is NUL-terminated and exactly len + * characters long. If it is not, consider it greater than t. + */ +int xstrncmpz(const char *s, const char *t, size_t len); + /* * FREE_AND_NULL(ptr) is like free(ptr) followed by ptr = NULL. Note * that ptr is used twice, so don't pass e.g. ptr++. diff --git a/wrapper.c b/wrapper.c index 3a1c0e052677dc..4ff4a9c3db0e1c 100644 --- a/wrapper.c +++ b/wrapper.c @@ -105,6 +105,14 @@ char *xstrndup(const char *str, size_t len) return xmemdupz(str, p ? p - str : len); } +int xstrncmpz(const char *s, const char *t, size_t len) +{ + int res = strncmp(s, t, len); + if (res) + return res; + return s[len] == '\0' ? 0 : 1; +} + void *xrealloc(void *ptr, size_t size) { void *ret; From bf30dbf82611e3bb7e2bbac1d38bb38ad10c8636 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:51 +0000 Subject: [PATCH 066/447] remote: advertise the object-format capability on the server side Advertise the current hash algorithm in use by using the object-format capability as part of the ref advertisement. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 1 + upload-pack.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d37ab776b3f593..a4159b559e9679 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -248,6 +248,7 @@ static void show_ref(const char *path, const struct object_id *oid) strbuf_addf(&cap, " push-cert=%s", push_cert_nonce); if (advertise_push_options) strbuf_addstr(&cap, " push-options"); + strbuf_addf(&cap, " object-format=%s", the_hash_algo->name); strbuf_addf(&cap, " agent=%s", git_user_agent_sanitized()); packet_write_fmt(1, "%s %s%c%s\n", oid_to_hex(oid), path, 0, cap.buf); diff --git a/upload-pack.c b/upload-pack.c index 902d0ad5e157fd..df6cb51db7c7b7 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1005,7 +1005,7 @@ static int send_ref(const char *refname, const struct object_id *oid, struct strbuf symref_info = STRBUF_INIT; format_symref_info(&symref_info, cb_data); - packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n", + packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s object-format=%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, (allow_unadvertised_object_request & ALLOW_TIP_SHA1) ? @@ -1015,6 +1015,7 @@ static int send_ref(const char *refname, const struct object_id *oid, stateless_rpc ? " no-done" : "", symref_info.buf, allow_filter ? " filter" : "", + the_hash_algo->name, git_user_agent_sanitized()); strbuf_release(&symref_info); } else { From 2c6a403d96cd5d31f1638679502f06e2b953647f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:52 +0000 Subject: [PATCH 067/447] connect: add function to parse multiple v1 capability values In a capability response, we can have multiple symref entries. In the future, we will also allow for multiple hash algorithms to be specified. To avoid duplication, expand the parse_feature_value function to take an optional offset where the parsing should begin next time. Add a wrapper function that allows us to query the next server feature value, and use it in the existing symref parsing code. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/connect.c b/connect.c index ccc5274189da55..2b55a32d4dc10d 100644 --- a/connect.c +++ b/connect.c @@ -18,7 +18,8 @@ static char *server_capabilities_v1; static struct argv_array server_capabilities_v2 = ARGV_ARRAY_INIT; -static const char *parse_feature_value(const char *, const char *, int *); +static const char *parse_feature_value(const char *, const char *, int *, int *); +static const char *next_server_feature_value(const char *feature, int *len, int *offset); static int check_ref(const char *name, unsigned int flags) { @@ -180,17 +181,16 @@ static void parse_one_symref_info(struct string_list *symref, const char *val, i static void annotate_refs_with_symref_info(struct ref *ref) { struct string_list symref = STRING_LIST_INIT_DUP; - const char *feature_list = server_capabilities_v1; + int offset = 0; - while (feature_list) { + while (1) { int len; const char *val; - val = parse_feature_value(feature_list, "symref", &len); + val = next_server_feature_value("symref", &len, &offset); if (!val) break; parse_one_symref_info(&symref, val, len); - feature_list = val + 1; } string_list_sort(&symref); @@ -452,7 +452,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, return list; } -static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp) +static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) { int len; @@ -460,6 +460,8 @@ static const char *parse_feature_value(const char *feature_list, const char *fea return NULL; len = strlen(feature); + if (offset) + feature_list += *offset; while (*feature_list) { const char *found = strstr(feature_list, feature); if (!found) @@ -474,9 +476,14 @@ static const char *parse_feature_value(const char *feature_list, const char *fea } /* feature with a value (e.g., "agent=git/1.2.3") */ else if (*value == '=') { + int end; + value++; + end = strcspn(value, " \t\n"); if (lenp) - *lenp = strcspn(value, " \t\n"); + *lenp = end; + if (offset) + *offset = value + end - feature_list; return value; } /* @@ -491,12 +498,17 @@ static const char *parse_feature_value(const char *feature_list, const char *fea int parse_feature_request(const char *feature_list, const char *feature) { - return !!parse_feature_value(feature_list, feature, NULL); + return !!parse_feature_value(feature_list, feature, NULL, NULL); +} + +static const char *next_server_feature_value(const char *feature, int *len, int *offset) +{ + return parse_feature_value(server_capabilities_v1, feature, len, offset); } const char *server_feature_value(const char *feature, int *len) { - return parse_feature_value(server_capabilities_v1, feature, len); + return parse_feature_value(server_capabilities_v1, feature, len, NULL); } int server_supports(const char *feature) From 1349ffed6dfa8ddcf9f48ede3b9cfd16fdde16fc Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:53 +0000 Subject: [PATCH 068/447] connect: add function to fetch value of a v2 server capability So far in protocol v2, all of our server capabilities that have values have not had values that we've been interested in parsing. For example, we receive but ignore the agent value. However, in a future commit, we're going to want to parse out the value of a server capability. To make this easy, add a function, server_feature_v2, that can fetch the value provided as part of the server capability. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 15 +++++++++++++++ connect.h | 1 + 2 files changed, 16 insertions(+) diff --git a/connect.c b/connect.c index 2b55a32d4dc10d..ad0e4e8e564a3c 100644 --- a/connect.c +++ b/connect.c @@ -84,6 +84,21 @@ int server_supports_v2(const char *c, int die_on_error) return 0; } +int server_feature_v2(const char *c, const char **v) +{ + int i; + + for (i = 0; i < server_capabilities_v2.argc; i++) { + const char *out; + if (skip_prefix(server_capabilities_v2.argv[i], c, &out) && + (*out == '=')) { + *v = out + 1; + return 1; + } + } + return 0; +} + int server_supports_feature(const char *c, const char *feature, int die_on_error) { diff --git a/connect.h b/connect.h index 5f2382e0186804..4d76a6017deafa 100644 --- a/connect.h +++ b/connect.h @@ -19,6 +19,7 @@ struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); int server_supports_v2(const char *c, int die_on_error); +int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, int die_on_error); From 9a9f0d3fc0888599723812be62fa2d7b3cc4d2d6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:54 +0000 Subject: [PATCH 069/447] pkt-line: add a member for hash algorithm Add a member for the hash algorithm currently in use to the packet reader so it can parse references correctly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pkt-line.c | 1 + pkt-line.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/pkt-line.c b/pkt-line.c index a0e87b1e81408e..a4aea075decca2 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -479,6 +479,7 @@ void packet_reader_init(struct packet_reader *reader, int fd, reader->buffer_size = sizeof(packet_buffer); reader->options = options; reader->me = "git"; + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; } enum packet_read_status packet_reader_read(struct packet_reader *reader) diff --git a/pkt-line.h b/pkt-line.h index fef3a0d792d31b..4cd9435e9ad841 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -166,6 +166,9 @@ struct packet_reader { unsigned use_sideband : 1; const char *me; + + /* hash algorithm in use */ + const struct git_hash_algo *hash_algo; }; /* From 7c97af4d64100bf9ce4b335ee91e743378e2e181 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:55 +0000 Subject: [PATCH 070/447] transport: add a hash algorithm member When connecting to a remote system, we need to know what hash algorithm it will be using to talk to us. Add a hash_algo member to struct transport and add a function to read this data from the transport object. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- transport.c | 8 ++++++++ transport.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/transport.c b/transport.c index 15f5ba4e8f22c6..b43d985f90f926 100644 --- a/transport.c +++ b/transport.c @@ -311,6 +311,7 @@ static struct ref *handshake(struct transport *transport, int for_push, BUG("unknown protocol version"); } data->got_remote_heads = 1; + transport->hash_algo = reader.hash_algo; if (reader.line_peeked) BUG("buffer must be empty at the end of handshake()"); @@ -996,9 +997,16 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->smart_options->receivepack = remote->receivepack; } + ret->hash_algo = &hash_algos[GIT_HASH_SHA1]; + return ret; } +const struct git_hash_algo *transport_get_hash_algo(struct transport *transport) +{ + return transport->hash_algo; +} + int transport_set_option(struct transport *transport, const char *name, const char *value) { diff --git a/transport.h b/transport.h index 4298c855be66bb..2a9f96c05a664c 100644 --- a/transport.h +++ b/transport.h @@ -115,6 +115,8 @@ struct transport { struct git_transport_options *smart_options; enum transport_family family; + + const struct git_hash_algo *hash_algo; }; #define TRANSPORT_PUSH_ALL (1<<0) @@ -243,6 +245,12 @@ int transport_push(struct repository *repo, const struct ref *transport_get_remote_refs(struct transport *transport, const struct argv_array *ref_prefixes); +/* + * Fetch the hash algorithm used by a remote. + * + * This can only be called after fetching the remote refs. + */ +const struct git_hash_algo *transport_get_hash_algo(struct transport *transport); int transport_fetch_refs(struct transport *transport, struct ref *refs); void transport_unlock_pack(struct transport *transport); int transport_disconnect(struct transport *transport); From 122037c2edec0c2bbcbfe52679fddc438165ab54 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:56 +0000 Subject: [PATCH 071/447] connect: add function to detect supported v1 hash functions Add a function, server_supports_hash, to see if the remote server supports a particular hash algorithm when speaking protocol v1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 22 ++++++++++++++++++++++ connect.h | 1 + 2 files changed, 23 insertions(+) diff --git a/connect.c b/connect.c index ad0e4e8e564a3c..eaa13b41bbeb9d 100644 --- a/connect.c +++ b/connect.c @@ -511,6 +511,28 @@ static const char *parse_feature_value(const char *feature_list, const char *fea return NULL; } +int server_supports_hash(const char *desired, int *feature_supported) +{ + int offset = 0; + int len; + const char *hash; + + hash = next_server_feature_value("object-format", &len, &offset); + if (feature_supported) + *feature_supported = !!hash; + if (!hash) { + hash = hash_algos[GIT_HASH_SHA1].name; + len = strlen(hash); + } + while (hash) { + if (!xstrncmpz(desired, hash, len)) + return 1; + + hash = next_server_feature_value("object-format", &len, &offset); + } + return 0; +} + int parse_feature_request(const char *feature_list, const char *feature) { return !!parse_feature_value(feature_list, feature, NULL, NULL); diff --git a/connect.h b/connect.h index 4d76a6017deafa..fc75d6a45773af 100644 --- a/connect.h +++ b/connect.h @@ -18,6 +18,7 @@ int url_is_local_not_ssh(const char *url); struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); +int server_supports_hash(const char *desired, int *feature_supported); int server_supports_v2(const char *c, int die_on_error); int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, From 82db03abbbce3cfe28f42a16a16c2b5723974954 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:57 +0000 Subject: [PATCH 072/447] send-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. If the server does support our hash, advertise it as part of our capabilities. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- send-pack.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/send-pack.c b/send-pack.c index d1b7edc9957874..fb037568a946a3 100644 --- a/send-pack.c +++ b/send-pack.c @@ -362,6 +362,7 @@ int send_pack(struct send_pack_args *args, int atomic_supported = 0; int use_push_options = 0; int push_options_supported = 0; + int object_format_supported = 0; unsigned cmds_sent = 0; int ret; struct async demux; @@ -388,6 +389,9 @@ int send_pack(struct send_pack_args *args, if (server_supports("push-options")) push_options_supported = 1; + if (!server_supports_hash(the_hash_algo->name, &object_format_supported)) + die(_("the receiving end does not support this repository's hash algorithm")); + if (args->push_cert != SEND_PACK_PUSH_CERT_NEVER) { int len; push_cert_nonce = server_feature_value("push-cert", &len); @@ -428,6 +432,8 @@ int send_pack(struct send_pack_args *args, strbuf_addstr(&cap_buf, " atomic"); if (use_push_options) strbuf_addstr(&cap_buf, " push-options"); + if (object_format_supported) + strbuf_addf(&cap_buf, " object-format=%s", the_hash_algo->name); if (agent_supported) strbuf_addf(&cap_buf, " agent=%s", git_user_agent_sanitized()); From 84eca27aebeb11ce96441f957f2595c28d89fe36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:58 +0000 Subject: [PATCH 073/447] connect: make parse_feature_value extern We're going to be using this function in other files, so no longer mark this function static. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 3 +-- connect.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/connect.c b/connect.c index eaa13b41bbeb9d..397fad7e32c37d 100644 --- a/connect.c +++ b/connect.c @@ -18,7 +18,6 @@ static char *server_capabilities_v1; static struct argv_array server_capabilities_v2 = ARGV_ARRAY_INIT; -static const char *parse_feature_value(const char *, const char *, int *, int *); static const char *next_server_feature_value(const char *feature, int *len, int *offset); static int check_ref(const char *name, unsigned int flags) @@ -467,7 +466,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, return list; } -static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) +const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset) { int len; diff --git a/connect.h b/connect.h index fc75d6a45773af..ace074dcb6a024 100644 --- a/connect.h +++ b/connect.h @@ -19,6 +19,7 @@ struct packet_reader; enum protocol_version discover_version(struct packet_reader *reader); int server_supports_hash(const char *desired, int *feature_supported); +const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset); int server_supports_v2(const char *c, int die_on_error); int server_feature_v2(const char *c, const char **v); int server_supports_feature(const char *c, const char *feature, From 48bf1415896db0f890530dcd182f33c2f0df10a8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:58:59 +0000 Subject: [PATCH 074/447] fetch-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index f73a2ce6cba2af..1d277190e7c546 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1039,6 +1039,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, print_verbose(args, _("Server supports %s"), "deepen-relative"); else if (args->deepen_relative) die(_("Server does not support --deepen")); + if (!server_supports_hash(the_hash_algo->name, NULL)) + die(_("Server does not support this repository's object format")); if (!args->no_dependents) { mark_complete_and_common_ref(negotiator, args, &ref); From 7c601dc333b6cd86a84e77f41c968a3bb773ba36 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:00 +0000 Subject: [PATCH 075/447] connect: detect algorithm when fetching refs If we're fetching refs, detect the hash algorithm and parse the refs using that algorithm. As mentioned in the documentation, if multiple versions of the object-format capability are provided, we use the first. No known implementation supports multiple algorithms now, but they may in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/connect.c b/connect.c index 397fad7e32c37d..915f1736a029c3 100644 --- a/connect.c +++ b/connect.c @@ -220,12 +220,25 @@ static void annotate_refs_with_symref_info(struct ref *ref) static void process_capabilities(struct packet_reader *reader, int *linelen) { + const char *feat_val; + int feat_len; const char *line = reader->line; int nul_location = strlen(line); if (nul_location == *linelen) return; server_capabilities_v1 = xstrdup(line + nul_location + 1); *linelen = nul_location; + + feat_val = server_feature_value("object-format", &feat_len); + if (feat_val) { + char *hash_name = xstrndup(feat_val, feat_len); + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo != GIT_HASH_UNKNOWN) + reader->hash_algo = &hash_algos[hash_algo]; + free(hash_name); + } else { + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; + } } static int process_dummy_ref(const struct packet_reader *reader) @@ -234,7 +247,7 @@ static int process_dummy_ref(const struct packet_reader *reader) struct object_id oid; const char *name; - if (parse_oid_hex(line, &oid, &name)) + if (parse_oid_hex_algop(line, &oid, &name, reader->hash_algo)) return 0; if (*name != ' ') return 0; @@ -258,7 +271,7 @@ static int process_ref(const struct packet_reader *reader, int len, struct object_id old_oid; const char *name; - if (parse_oid_hex(line, &old_oid, &name)) + if (parse_oid_hex_algop(line, &old_oid, &name, reader->hash_algo)) return 0; if (*name != ' ') return 0; @@ -270,7 +283,7 @@ static int process_ref(const struct packet_reader *reader, int len, die(_("protocol error: unexpected capabilities^{}")); } else if (check_ref(name, flags)) { struct ref *ref = alloc_ref(name); - oidcpy(&ref->old_oid, &old_oid); + memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); **list = ref; *list = &ref->next; } @@ -288,7 +301,7 @@ static int process_shallow(const struct packet_reader *reader, int len, if (!skip_prefix(line, "shallow ", &arg)) return 0; - if (get_oid_hex(arg, &old_oid)) + if (get_oid_hex_algop(arg, &old_oid, reader->hash_algo)) die(_("protocol error: expected shallow sha-1, got '%s'"), arg); if (!shallow_points) die(_("repository on the other end cannot be shallow")); From bb095d087557b9ded2245270d3fac64ddc774af6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:01 +0000 Subject: [PATCH 076/447] builtin/receive-pack: detect when the server doesn't support our hash Detect when the server doesn't support our hash algorithm and abort. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index a4159b559e9679..0da8ca513402a4 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1624,6 +1624,8 @@ static struct command *read_head_info(struct packet_reader *reader, linelen = strlen(reader->line); if (linelen < reader->pktlen) { const char *feature_list = reader->line + linelen + 1; + const char *hash = NULL; + int len = 0; if (parse_feature_request(feature_list, "report-status")) report_status = 1; if (parse_feature_request(feature_list, "side-band-64k")) @@ -1636,6 +1638,13 @@ static struct command *read_head_info(struct packet_reader *reader, if (advertise_push_options && parse_feature_request(feature_list, "push-options")) use_push_options = 1; + hash = parse_feature_value(feature_list, "object-format", &len, NULL); + if (!hash) { + hash = hash_algos[GIT_HASH_SHA1].name; + len = strlen(hash); + } + if (xstrncmpz(the_hash_algo->name, hash, len)) + die("error: unsupported object format '%s'", hash); } if (!strcmp(reader->line, "push-cert")) { From 452e35684f97be1ef3b1fa608b8ec7fbaebf18a6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:02 +0000 Subject: [PATCH 077/447] docs: update remote helper docs for object-format extensions Update the remote helper docs to document the object-format extensions we will implement in remote-curl and the transport helper code shortly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/gitremote-helpers.txt | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index f48a031dc346a3..26f32e44210503 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -238,6 +238,9 @@ the remote repository. `--signed-tags=verbatim` to linkgit:git-fast-export[1]. In the absence of this capability, Git will use `--signed-tags=warn-strip`. +'object-format':: + This indicates that the helper is able to interact with the remote + side using an explicit hash algorithm extension. COMMANDS @@ -257,12 +260,14 @@ Support for this command is mandatory. 'list':: Lists the refs, one per line, in the format " [ ...]". The value may be a hex sha1 hash, "@" for - a symref, or "?" to indicate that the helper could not get the - value of the ref. A space-separated list of attributes follows - the name; unrecognized attributes are ignored. The list ends - with a blank line. + a symref, ": " for a key-value pair, or + "?" to indicate that the helper could not get the value of the + ref. A space-separated list of attributes follows the name; + unrecognized attributes are ignored. The list ends with a + blank line. + See REF LIST ATTRIBUTES for a list of currently defined attributes. +See REF LIST KEYWORDS for a list of currently defined keywords. + Supported if the helper has the "fetch" or "import" capability. @@ -430,6 +435,18 @@ attributes are defined. This ref is unchanged since the last import or fetch, although the helper cannot necessarily determine what value that produced. +REF LIST KEYWORDS +----------------- + +The 'list' command may produce a list of key-value pairs. +The following keys are defined. + +'object-format':: + The refs are using the given hash algorithm. This keyword is only + used if the server and client both support the object-format + extension. + + OPTIONS ------- @@ -514,6 +531,14 @@ set by Git if the remote helper has the 'option' capability. transaction. If successful, all refs will be updated, or none will. If the remote side does not support this capability, the push will fail. +'option object-format' {'true'|algorithm}:: + If 'true', indicate that the caller wants hash algorithm information + to be passed back from the remote. This mode is used when fetching + refs. ++ +If set to an algorithm, indicate that the caller wants to interact with +the remote side using that algorithm. + SEE ALSO -------- linkgit:git-remote[1] From 8b85ee4f47aadc23c8806bff5540baf819bbde95 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:03 +0000 Subject: [PATCH 078/447] transport-helper: implement object-format extensions Implement the object-format extensions that let us determine the hash algorithm in use when pushing or pulling data. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- transport-helper.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/transport-helper.c b/transport-helper.c index a46afcb69db615..ae33b0eea7f725 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -32,7 +32,8 @@ struct helper_data { signed_tags : 1, check_connectivity : 1, no_disconnect_req : 1, - no_private_update : 1; + no_private_update : 1, + object_format : 1; /* * As an optimization, the transport code may invoke fetch before @@ -207,6 +208,8 @@ static struct child_process *get_helper(struct transport *transport) data->import_marks = xstrdup(arg); } else if (starts_with(capname, "no-private-update")) { data->no_private_update = 1; + } else if (starts_with(capname, "object-format")) { + data->object_format = 1; } else if (mandatory) { die(_("unknown mandatory capability %s; this remote " "helper probably needs newer version of Git"), @@ -1103,6 +1106,12 @@ static struct ref *get_refs_list_using_list(struct transport *transport, data->get_refs_list_called = 1; helper = get_helper(transport); + if (data->object_format) { + write_str_in_full(helper->in, "option object-format\n"); + if (recvline(data, &buf) || strcmp(buf.buf, "ok")) + exit(128); + } + if (data->push && for_push) write_str_in_full(helper->in, "list for-push\n"); else @@ -1115,6 +1124,17 @@ static struct ref *get_refs_list_using_list(struct transport *transport, if (!*buf.buf) break; + else if (buf.buf[0] == ':') { + const char *value; + if (skip_prefix(buf.buf, ":object-format ", &value)) { + int algo = hash_algo_by_name(value); + if (algo == GIT_HASH_UNKNOWN) + die(_("unsupported object format '%s'"), + value); + transport->hash_algo = &hash_algos[algo]; + } + continue; + } eov = strchr(buf.buf, ' '); if (!eov) @@ -1127,7 +1147,7 @@ static struct ref *get_refs_list_using_list(struct transport *transport, if (buf.buf[0] == '@') (*tail)->symref = xstrdup(buf.buf + 1); else if (buf.buf[0] != '?') - get_oid_hex(buf.buf, &(*tail)->old_oid); + get_oid_hex_algop(buf.buf, &(*tail)->old_oid, transport->hash_algo); if (eon) { if (has_attribute(eon + 1, "unchanged")) { (*tail)->status |= REF_STATUS_UPTODATE; From 7f60501775b2a0e0dcabb98fde3eb46fd980a8cc Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:04 +0000 Subject: [PATCH 079/447] remote-curl: implement object-format extensions Implement the object-format extensions that let us determine the hash algorithm in use when pushing, pulling, and fetching. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- remote-curl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/remote-curl.c b/remote-curl.c index 1c9aa3d0ab978c..3ed0dfec1bf058 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -41,7 +41,9 @@ struct options { deepen_relative : 1, from_promisor : 1, no_dependents : 1, - atomic : 1; + atomic : 1, + object_format : 1; + const struct git_hash_algo *hash_algo; }; static struct options options; static struct string_list cas_options = STRING_LIST_INIT_DUP; @@ -190,6 +192,16 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "filter")) { options.filter = xstrdup(value); return 0; + } else if (!strcmp(name, "object-format")) { + int algo; + options.object_format = 1; + if (strcmp(value, "true")) { + algo = hash_algo_by_name(value); + if (algo == GIT_HASH_UNKNOWN) + die("unknown object format '%s'", value); + options.hash_algo = &hash_algos[algo]; + } + return 0; } else { return 1 /* unsupported */; } @@ -231,6 +243,7 @@ static struct ref *parse_git_refs(struct discovery *heads, int for_push) case protocol_v0: get_remote_heads(&reader, &list, for_push ? REF_NORMAL : 0, NULL, &heads->shallow); + options.hash_algo = reader.hash_algo; break; case protocol_unknown_version: BUG("unknown protocol version"); @@ -509,6 +522,9 @@ static struct ref *get_refs(int for_push) static void output_refs(struct ref *refs) { struct ref *posn; + if (options.object_format && options.hash_algo) { + printf(":object-format %s\n", options.hash_algo->name); + } for (posn = refs; posn; posn = posn->next) { if (posn->symref) printf("@%s %s\n", posn->symref, posn->name); @@ -1439,6 +1455,7 @@ int cmd_main(int argc, const char **argv) printf("option\n"); printf("push\n"); printf("check-connectivity\n"); + printf("object-format\n"); printf("\n"); fflush(stdout); } else if (skip_prefix(buf.buf, "stateless-connect ", &arg)) { From b65dc2cebd6ac7d869895297ed5061af15428544 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:05 +0000 Subject: [PATCH 080/447] builtin/clone: initialize hash algorithm properly When performing a clone, we don't know what hash algorithm the other end will support. Currently, we don't support fetching data belonging to a different algorithm, so we must know what algorithm the remote side is using in order to properly initialize the repository. We can know that only after fetching the refs, so if the remote side has any references, use that information to reinitialize the repository with the correct hash algorithm information. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/clone.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/clone.c b/builtin/clone.c index cb48a291caf9a3..f27d38bc8e50d6 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1217,6 +1217,15 @@ int cmd_clone(int argc, const char **argv, const char *prefix) refs = transport_get_remote_refs(transport, &ref_prefixes); if (refs) { + int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + + /* + * Now that we know what algorithm the remote side is using, + * let's set ours to the same thing. + */ + initialize_repository_version(hash_algo); + repo_set_hash_algo(the_repository, hash_algo); + mapped_refs = wanted_peer_refs(refs, &remote->fetch); /* * transport_get_remote_refs() may return refs with null sha-1 From 9dc78c20dc2aa5ed073a95dc35880d940658a5be Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:06 +0000 Subject: [PATCH 081/447] t5562: pass object-format in synthesized test data Ensure that we pass the object-format capability in the synthesized test data so that this test works with algorithms other than SHA-1. In addition, add an additional test using the old data for when we're using SHA-1 so that we can be sure that we preserve backwards compatibility with servers not offering the object-format capability. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5562-http-backend-content-length.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/t5562-http-backend-content-length.sh b/t/t5562-http-backend-content-length.sh index 3f4ac71f83b571..c6ec625497db44 100755 --- a/t/t5562-http-backend-content-length.sh +++ b/t/t5562-http-backend-content-length.sh @@ -46,6 +46,7 @@ ssize_b100dots() { } test_expect_success 'setup' ' + test_oid_init && HTTP_CONTENT_ENCODING="identity" && export HTTP_CONTENT_ENCODING && git config http.receivepack true && @@ -62,8 +63,8 @@ test_expect_success 'setup' ' test_copy_bytes 10 fetch_body.trunc && hash_next=$(git commit-tree -p HEAD -m next HEAD^{tree}) && { - printf "%s %s refs/heads/newbranch\\0report-status\\n" \ - "$ZERO_OID" "$hash_next" | packetize && + printf "%s %s refs/heads/newbranch\\0report-status object-format=%s\\n" \ + "$ZERO_OID" "$hash_next" "$(test_oid algo)" | packetize && printf 0000 && echo "$hash_next" | git pack-objects --stdout } >push_body && From 4b831208bb365fedd0cce286e0c27627598d9393 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:07 +0000 Subject: [PATCH 082/447] fetch-pack: parse and advertise the object-format capability Parse the server's object-format capability and respond accordingly, dying if there is a mismatch. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- fetch-pack.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index 1d277190e7c546..d5a919f2aaa4d5 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1179,6 +1179,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, int sideband_all, int seen_ack) { int ret = 0; + const char *hash_name; struct strbuf req_buf = STRBUF_INIT; if (server_supports_v2("fetch", 1)) @@ -1193,6 +1194,17 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, args->server_options->items[i].string); } + if (server_feature_v2("object-format", &hash_name)) { + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo_by_ptr(the_hash_algo) != hash_algo) + die(_("mismatched algorithms: client %s; server %s"), + the_hash_algo->name, hash_name); + packet_write_fmt(fd_out, "object-format=%s", the_hash_algo->name); + } else if (hash_algo_by_ptr(the_hash_algo) != GIT_HASH_SHA1) { + die(_("the server does not support algorithm '%s'"), + the_hash_algo->name); + } + packet_buf_delim(&req_buf); if (args->use_thin_pack) packet_buf_write(&req_buf, "thin-pack"); From d553acebeeda584e0e749cfe04dad450c667de90 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:08 +0000 Subject: [PATCH 083/447] setup: set the_repository's hash algo when checking format When we're checking the repository's format, set the hash algorithm at the same time. This ensures that we perform a suitable initialization early enough to avoid confusing any parts of the code. If we defer until later, we can end up with portions of the code which are confused about the hash algorithm, resulting in segfaults when working with SHA-256 repositories. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.c b/setup.c index 65fe5ecefbe196..019a1c6367b02e 100644 --- a/setup.c +++ b/setup.c @@ -1273,6 +1273,7 @@ void check_repository_format(struct repository_format *fmt) fmt = &repo_fmt; check_repository_format_gently(get_git_dir(), fmt, NULL); startup_info->have_repository = 1; + repo_set_hash_algo(the_repository, fmt->hash_algo); clear_repository_format(&repo_fmt); } From 49c9a2ffe59b2e89ed4fd3f90b3dbc5d67760d74 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:09 +0000 Subject: [PATCH 084/447] t3200: mark assertion with SHA1 prerequisite One of the test assertions in this test checks that git branch -m works even without a .git/config file. However, if the repository requires configuration extensions, such as because it uses a non-SHA-1 algorithm, this assertion will fail. Mark the assertion as requiring SHA-1. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t3200-branch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh index 411a70b0ce966f..2a3fedc6b0f1f6 100755 --- a/t/t3200-branch.sh +++ b/t/t3200-branch.sh @@ -402,7 +402,7 @@ EOF mv .git/config .git/config-saved -test_expect_success 'git branch -m q q2 without config should succeed' ' +test_expect_success SHA1 'git branch -m q q2 without config should succeed' ' git branch -m q q2 && git branch -m q2 q ' From 629dffc461f3631bb7acfe905d805caa38b49dfa Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:10 +0000 Subject: [PATCH 085/447] packfile: compute and use the index CRC offset Both v2 pack index files and the v3 format specified as part of the NewHash work have similar data starting at the CRC table. Much of the existing code wants to read either this table or the offset entries following it, and in doing so computes the offset each time. In order to share as much code between v2 and v3, compute the offset of the CRC table and store it when the pack is opened. Use this value to compute offsets to not only the CRC table, but to the offset entries beyond it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 6 +----- object-store.h | 1 + packfile.c | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f176dd28c870d5..7bea1fba52a9d3 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1555,13 +1555,9 @@ static void read_v2_anomalous_offsets(struct packed_git *p, { const uint32_t *idx1, *idx2; uint32_t i; - const uint32_t hashwords = the_hash_algo->rawsz / sizeof(uint32_t); /* The address of the 4-byte offset table */ - idx1 = (((const uint32_t *)p->index_data) - + 2 /* 8-byte header */ - + 256 /* fan out */ - + hashwords * p->num_objects /* object ID table */ + idx1 = (((const uint32_t *)((const uint8_t *)p->index_data + p->crc_offset)) + p->num_objects /* CRC32 table */ ); diff --git a/object-store.h b/object-store.h index d1e490f2035d2f..f439d47af81d1b 100644 --- a/object-store.h +++ b/object-store.h @@ -70,6 +70,7 @@ struct packed_git { size_t index_size; uint32_t num_objects; uint32_t num_bad_objects; + uint32_t crc_offset; unsigned char *bad_object_sha1; int index_version; time_t mtime; diff --git a/packfile.c b/packfile.c index f4e752996dbcca..6ab5233613e241 100644 --- a/packfile.c +++ b/packfile.c @@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, */ (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); + p->crc_offset = 8 + 4 * 256 + nr * hashsz; } p->index_version = version; From 1610dda8ae5190438b3de205c2a0f87dfe878ca3 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:11 +0000 Subject: [PATCH 086/447] t5302: modernize test formatting Our style these days is to place the description and the opening quote of the body on the same line as test_expect_success (if it fits), to place the trailing quote on a line by itself after the body, and to use tabs. Since we're going to be making several significant changes to this test, modernize the style to aid in readability of the subsequent patches. This patch should have no functional change. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5302-pack-index.sh | 356 +++++++++++++++++++++--------------------- 1 file changed, 182 insertions(+), 174 deletions(-) diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index ad07f2f7fc268d..8981c9b90ebe15 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -7,65 +7,65 @@ test_description='pack index with 64-bit offsets and object CRC' . ./test-lib.sh test_expect_success 'setup' ' - test_oid_init && - rawsz=$(test_oid rawsz) && - rm -rf .git && - git init && - git config pack.threads 1 && - i=1 && - while test $i -le 100 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree obj-list && - git update-ref HEAD $commit + test_oid_init && + rawsz=$(test_oid rawsz) && + rm -rf .git && + git init && + git config pack.threads 1 && + i=1 && + while test $i -le 100 + do + iii=$(printf '%03i' $i) + test-tool genrandom "bar" 200 > wide_delta_$iii && + test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && + test-tool genrandom "foo"$i 100 > deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-tool genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=$(expr $i + 1) || return 1 + done && + { echo 101 && test-tool genrandom 100 8192; } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree obj-list && + git update-ref HEAD $commit ' -test_expect_success \ - 'pack-objects with index version 1' \ - 'pack1=$(git pack-objects --index-version=1 test-1 &1) || ! (echo "$msg" | grep "pack too large .* off_t") @@ -91,21 +91,21 @@ else say "# skipping tests concerning 64-bit offsets" fi -test_expect_success OFF64_T \ - 'index v2: verify a pack with some 64-bit offsets' \ - 'git verify-pack -v "test-3-${pack3}.pack"' +test_expect_success OFF64_T 'index v2: verify a pack with some 64-bit offsets' ' + git verify-pack -v "test-3-${pack3}.pack" +' -test_expect_success OFF64_T \ - '64-bit offsets: should be different from previous index v2 results' \ - '! cmp "test-2-${pack2}.idx" "test-3-${pack3}.idx"' +test_expect_success OFF64_T '64-bit offsets: should be different from previous index v2 results' ' + ! cmp "test-2-${pack2}.idx" "test-3-${pack3}.idx" +' -test_expect_success OFF64_T \ - 'index v2: force some 64-bit offsets with index-pack' \ - 'git index-pack --index-version=2,0x40000 -o 3.idx "test-1-${pack1}.pack"' +test_expect_success OFF64_T 'index v2: force some 64-bit offsets with index-pack' ' + git index-pack --index-version=2,0x40000 -o 3.idx "test-1-${pack1}.pack" +' -test_expect_success OFF64_T \ - '64-bit offsets: index-pack result should match pack-objects one' \ - 'cmp "test-3-${pack3}.idx" "3.idx"' +test_expect_success OFF64_T '64-bit offsets: index-pack result should match pack-objects one' ' + cmp "test-3-${pack3}.idx" "3.idx" +' test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2 (cheat)' ' # This cheats by knowing which lower offset should still be encoded @@ -120,135 +120,143 @@ test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2' ' # returns the object number for given object in given pack index index_obj_nr() { - idx_file=$1 - object_sha1=$2 - nr=0 - git show-index < $idx_file | - while read offs sha1 extra - do - nr=$(($nr + 1)) - test "$sha1" = "$object_sha1" || continue - echo "$(($nr - 1))" - break - done + idx_file=$1 + object_sha1=$2 + nr=0 + git show-index < $idx_file | + while read offs sha1 extra + do + nr=$(($nr + 1)) + test "$sha1" = "$object_sha1" || continue + echo "$(($nr - 1))" + break + done } # returns the pack offset for given object as found in given pack index index_obj_offset() { - idx_file=$1 - object_sha1=$2 - git show-index < $idx_file | grep $object_sha1 | - ( read offs extra && echo "$offs" ) + idx_file=$1 + object_sha1=$2 + git show-index < $idx_file | grep $object_sha1 | + ( read offs extra && echo "$offs" ) } -test_expect_success \ - '[index v1] 1) stream pack to repository' \ - 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && - git prune-packed && - git count-objects | ( read nr rest && test "$nr" -eq 1 ) && - cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" && - cmp "test-1-${pack1}.idx" ".git/objects/pack/pack-${pack1}.idx"' +test_expect_success '[index v1] 1) stream pack to repository' ' + git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && + git prune-packed && + git count-objects | ( read nr rest && test "$nr" -eq 1 ) && + cmp "test-1-${pack1}.pack" ".git/objects/pack/pack-${pack1}.pack" && + cmp "test-1-${pack1}.idx" ".git/objects/pack/pack-${pack1}.idx" +' test_expect_success \ - '[index v1] 2) create a stealth corruption in a delta base reference' \ - '# This test assumes file_101 is a delta smaller than 16 bytes. - # It should be against file_100 but we substitute its base for file_099 - sha1_101=$(git hash-object file_101) && - sha1_099=$(git hash-object file_099) && - offs_101=$(index_obj_offset 1.idx $sha1_101) && - nr_099=$(index_obj_nr 1.idx $sha1_099) && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - recordsz=$((rawsz + 4)) && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" \ - skip=$((4 + 256 * 4 + $nr_099 * recordsz)) \ - bs=1 count=$rawsz conv=notrunc && - git cat-file blob $sha1_101 > file_101_foo1' + '[index v1] 2) create a stealth corruption in a delta base reference' ' + # This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=$(git hash-object file_101) && + sha1_099=$(git hash-object file_099) && + offs_101=$(index_obj_offset 1.idx $sha1_101) && + nr_099=$(index_obj_nr 1.idx $sha1_099) && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + recordsz=$((rawsz + 4)) && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((4 + 256 * 4 + $nr_099 * recordsz)) \ + bs=1 count=$rawsz conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo1 +' test_expect_success \ - '[index v1] 3) corrupted delta happily returned wrong data' \ - 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1' + '[index v1] 3) corrupted delta happily returned wrong data' ' + test -f file_101_foo1 && ! cmp file_101 file_101_foo1 +' test_expect_success \ - '[index v1] 4) confirm that the pack is actually corrupted' \ - 'test_must_fail git fsck --full $commit' + '[index v1] 4) confirm that the pack is actually corrupted' ' + test_must_fail git fsck --full $commit +' test_expect_success \ - '[index v1] 5) pack-objects happily reuses corrupted data' \ - 'pack4=$(git pack-objects test-4 file_101_foo2' + '[index v2] 2) create a stealth corruption in a delta base reference' ' + # This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=$(git hash-object file_101) && + sha1_099=$(git hash-object file_099) && + offs_101=$(index_obj_offset 1.idx $sha1_101) && + nr_099=$(index_obj_nr 1.idx $sha1_099) && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((8 + 256 * 4 + $nr_099 * rawsz)) \ + bs=1 count=$rawsz conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo2 +' test_expect_success \ - '[index v2] 3) corrupted delta happily returned wrong data' \ - 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2' + '[index v2] 3) corrupted delta happily returned wrong data' ' + test -f file_101_foo2 && ! cmp file_101 file_101_foo2 +' test_expect_success \ - '[index v2] 4) confirm that the pack is actually corrupted' \ - 'test_must_fail git fsck --full $commit' + '[index v2] 4) confirm that the pack is actually corrupted' ' + test_must_fail git fsck --full $commit +' test_expect_success \ - '[index v2] 5) pack-objects refuses to reuse corrupted data' \ - 'test_must_fail git pack-objects test-5 /dev/null || exit 1 - done /dev/null || exit 1 + done wrong-tag <wrong-tag <err && - grep "^warning:.* expected .tagger. line" err + tag=$(git hash-object -t tag -w --stdin err && + grep "^warning:.* expected .tagger. line" err ' test_expect_success 'index-pack --fsck-objects also warns upon missing tagger in tag' ' - git index-pack --fsck-objects tag-test-${pack1}.pack 2>err && - grep "^warning:.* expected .tagger. line" err + git index-pack --fsck-objects tag-test-${pack1}.pack 2>err && + grep "^warning:.* expected .tagger. line" err ' test_done From 88a09a557c7571760b3ded75764bf3b1fd0e8bf0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:12 +0000 Subject: [PATCH 087/447] builtin/show-index: provide options to determine hash algo show-index is capable of reading any possible index file whether or not the index is inside a repository. However, because our index files lack metadata about the hash algorithm in use, it's not possible to autodetect the algorithm that a particular index file is using. In order to allow us to read index files of any algorithm, let's set up the .git directory gently so that we default to the algorithm for the current repository, and add an --object-format option to allow users to override this setting and continue to run show-index outside of a repository altogether. Let's also document this new option so that people can find it and use it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/git-show-index.txt | 11 ++++++++++- builtin/show-index.c | 29 ++++++++++++++++++++++++----- git.c | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/Documentation/git-show-index.txt b/Documentation/git-show-index.txt index 424e4ba84cf9b0..39b1d8eaa1456f 100644 --- a/Documentation/git-show-index.txt +++ b/Documentation/git-show-index.txt @@ -9,7 +9,7 @@ git-show-index - Show packed archive index SYNOPSIS -------- [verse] -'git show-index' +'git show-index' [--object-format=] DESCRIPTION @@ -36,6 +36,15 @@ Note that you can get more information on a packfile by calling linkgit:git-verify-pack[1]. However, as this command considers only the index file itself, it's both faster and more flexible. +OPTIONS +------- + +--object-format=:: + Specify the given object format (hash algorithm) for the index file. The + valid values are 'sha1' and (if enabled) 'sha256'. The default is the + algorithm for the current repository (set by `extensions.objectFormat`), or + 'sha1' if no value is set or outside a repository.. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/show-index.c b/builtin/show-index.c index 0826f6a5a2c220..8106b03a6b3243 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -1,9 +1,12 @@ #include "builtin.h" #include "cache.h" #include "pack.h" +#include "parse-options.h" -static const char show_index_usage[] = -"git show-index"; +static const char *const show_index_usage[] = { + "git show-index [--object-format=]", + NULL +}; int cmd_show_index(int argc, const char **argv, const char *prefix) { @@ -11,10 +14,26 @@ int cmd_show_index(int argc, const char **argv, const char *prefix) unsigned nr; unsigned int version; static unsigned int top_index[256]; - const unsigned hashsz = the_hash_algo->rawsz; + unsigned hashsz; + const char *hash_name = NULL; + int hash_algo; + const struct option show_index_options[] = { + OPT_STRING(0, "object-format", &hash_name, N_("hash-algorithm"), + N_("specify the hash algorithm to use")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, show_index_options, show_index_usage, 0); + + if (hash_name) { + hash_algo = hash_algo_by_name(hash_name); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("Unknown hash algorithm")); + repo_set_hash_algo(the_repository, hash_algo); + } + + hashsz = the_hash_algo->rawsz; - if (argc != 1) - usage(show_index_usage); if (fread(top_index, 2 * 4, 1, stdin) != 1) die("unable to read header"); if (top_index[0] == htonl(PACK_IDX_SIGNATURE)) { diff --git a/git.c b/git.c index 2e4efb4ff08f16..e53e8159a2a63f 100644 --- a/git.c +++ b/git.c @@ -573,7 +573,7 @@ static struct cmd_struct commands[] = { { "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER }, { "show", cmd_show, RUN_SETUP }, { "show-branch", cmd_show_branch, RUN_SETUP }, - { "show-index", cmd_show_index }, + { "show-index", cmd_show_index, RUN_SETUP_GENTLY }, { "show-ref", cmd_show_ref, RUN_SETUP }, { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, From 059d8066f843a09c615be2b7c7e42fd393852ed6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:13 +0000 Subject: [PATCH 088/447] t1302: expect repo format version 1 for SHA-256 When using SHA-256, we need to take advantage of the extensions section in the config file, so we need to use repository format version 1. Update the test to look for the correct value. Note that test_oid produces a value without a trailing newline, so use echo to ensure we print a trailing newline to compare it correctly against the actual results. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t1302-repo-version.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/t/t1302-repo-version.sh b/t/t1302-repo-version.sh index ce4cff13bbced5..d60c042ce88b53 100755 --- a/t/t1302-repo-version.sh +++ b/t/t1302-repo-version.sh @@ -8,6 +8,10 @@ test_description='Test repository version check' . ./test-lib.sh test_expect_success 'setup' ' + test_oid_cache <<-\EOF && + version sha1:0 + version sha256:1 + EOF cat >test.patch <<-\EOF && diff --git a/test.txt b/test.txt new file mode 100644 @@ -23,7 +27,7 @@ test_expect_success 'setup' ' ' test_expect_success 'gitdir selection on normal repos' ' - echo 0 >expect && + echo $(test_oid version) >expect && git config core.repositoryformatversion >actual && git -C test config core.repositoryformatversion >actual2 && test_cmp expect actual && From 7f46e7ead1d82538866265228f9d743f0e93e17a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:14 +0000 Subject: [PATCH 089/447] Documentation/technical: document object-format for protocol v2 Document the object-format extension for protocol v2. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/technical/protocol-v2.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 7e3766cafb39cc..107e421fb76955 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -453,3 +453,12 @@ included in a request. This is done by sending each option as a a request. The provided options must not contain a NUL or LF character. + + object-format +~~~~~~~~~~~~~~~ + +The server can advertise the `object-format` capability with a value `X` (in the +form `object-format=X`) to notify the client that the server is able to deal +with objects using hash algorithm X. If not specified, the server is assumed to +only handle SHA-1. If the client would like to use a hash algorithm other than +SHA-1, it should specify its object-format string. From 67e9a70741c577b6e49bd60556779c7ab32ae4f8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:15 +0000 Subject: [PATCH 090/447] connect: pass full packet reader when parsing v2 refs When we're parsing refs, we need to know not only what the line we're parsing is, but also the hash algorithm we should use to parse it, which is stored in the reader object. Pass the packet reader object through to the protocol v2 ref parsing function. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/connect.c b/connect.c index 915f1736a029c3..1d05bc56eda2e6 100644 --- a/connect.c +++ b/connect.c @@ -374,7 +374,7 @@ struct ref **get_remote_heads(struct packet_reader *reader, } /* Returns 1 when a valid ref has been added to `list`, 0 otherwise */ -static int process_ref_v2(const char *line, struct ref ***list) +static int process_ref_v2(struct packet_reader *reader, struct ref ***list) { int ret = 1; int i = 0; @@ -382,6 +382,7 @@ static int process_ref_v2(const char *line, struct ref ***list) struct ref *ref; struct string_list line_sections = STRING_LIST_INIT_DUP; const char *end; + const char *line = reader->line; /* * Ref lines have a number of fields which are space deliminated. The @@ -469,7 +470,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, /* Process response from server */ while (packet_reader_read(reader) == PACKET_READ_NORMAL) { - if (!process_ref_v2(reader->line, &list)) + if (!process_ref_v2(reader, &list)) die(_("invalid ls-refs response: %s"), reader->line); } From ab67235bc4900d8203a1a6b6f33cf8afa845e43e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:16 +0000 Subject: [PATCH 091/447] connect: parse v2 refs with correct hash algorithm When using protocol v2, we need to know what hash algorithm is used by the remote end. See if the server has sent us an object-format capability, and if so, use it to determine the hash algorithm in use and set that value in the packet reader. Parse the refs using this algorithm. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/connect.c b/connect.c index 1d05bc56eda2e6..66650ff2d33492 100644 --- a/connect.c +++ b/connect.c @@ -283,7 +283,7 @@ static int process_ref(const struct packet_reader *reader, int len, die(_("protocol error: unexpected capabilities^{}")); } else if (check_ref(name, flags)) { struct ref *ref = alloc_ref(name); - memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); + oidcpy(&ref->old_oid, &old_oid); **list = ref; *list = &ref->next; } @@ -395,7 +395,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) goto out; } - if (parse_oid_hex(line_sections.items[i++].string, &old_oid, &end) || + if (parse_oid_hex_algop(line_sections.items[i++].string, &old_oid, &end, reader->hash_algo) || *end) { ret = 0; goto out; @@ -403,7 +403,7 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) ref = alloc_ref(line_sections.items[i++].string); - oidcpy(&ref->old_oid, &old_oid); + memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz); **list = ref; *list = &ref->next; @@ -416,7 +416,8 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) struct object_id peeled_oid; char *peeled_name; struct ref *peeled; - if (parse_oid_hex(arg, &peeled_oid, &end) || *end) { + if (parse_oid_hex_algop(arg, &peeled_oid, &end, + reader->hash_algo) || *end) { ret = 0; goto out; } @@ -424,7 +425,8 @@ static int process_ref_v2(struct packet_reader *reader, struct ref ***list) peeled_name = xstrfmt("%s^{}", ref->name); peeled = alloc_ref(peeled_name); - oidcpy(&peeled->old_oid, &peeled_oid); + memcpy(peeled->old_oid.hash, peeled_oid.hash, + reader->hash_algo->rawsz); **list = peeled; *list = &peeled->next; @@ -443,6 +445,7 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, const struct string_list *server_options) { int i; + const char *hash_name; *list = NULL; if (server_supports_v2("ls-refs", 1)) @@ -451,6 +454,14 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, if (server_supports_v2("agent", 0)) packet_write_fmt(fd_out, "agent=%s", git_user_agent_sanitized()); + if (server_feature_v2("object-format", &hash_name)) { + int hash_algo = hash_algo_by_name(hash_name); + if (hash_algo == GIT_HASH_UNKNOWN) + die(_("unknown object format '%s' specified by server"), hash_name); + reader->hash_algo = &hash_algos[hash_algo]; + packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); + } + if (server_options && server_options->nr && server_supports_v2("server-option", 1)) for (i = 0; i < server_options->nr; i++) From 9de0dd361c9ea2ca6eca14a7dd43fe11d170a253 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:17 +0000 Subject: [PATCH 092/447] serve: advertise object-format capability for protocol v2 In order to communicate the protocol supported by the server side, add support for advertising the object-format capability. We check that the client side sends us an identical algorithm if it sends us its own object-format capability, and assume it speaks SHA-1 if not. In the test, when we're using an algorithm other than SHA-1, we need to specify the algorithm in use so we don't get a failure with an "unknown format" message. Add a test that we handle a mismatched algorithm. Remove the test_oid_init call since it's no longer necessary. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- connect.c | 2 ++ serve.c | 27 +++++++++++++++++++++++++++ t/t5701-git-serve.sh | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/connect.c b/connect.c index 66650ff2d33492..2ada5b516186eb 100644 --- a/connect.c +++ b/connect.c @@ -460,6 +460,8 @@ struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, die(_("unknown object format '%s' specified by server"), hash_name); reader->hash_algo = &hash_algos[hash_algo]; packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name); + } else { + reader->hash_algo = &hash_algos[GIT_HASH_SHA1]; } if (server_options && server_options->nr && diff --git a/serve.c b/serve.c index 317256c1a493c4..7ab7807feffea4 100644 --- a/serve.c +++ b/serve.c @@ -22,6 +22,14 @@ static int agent_advertise(struct repository *r, return 1; } +static int object_format_advertise(struct repository *r, + struct strbuf *value) +{ + if (value) + strbuf_addstr(value, r->hash_algo->name); + return 1; +} + struct protocol_capability { /* * The name of the capability. The server uses this name when @@ -57,6 +65,7 @@ static struct protocol_capability capabilities[] = { { "ls-refs", always_advertise, ls_refs }, { "fetch", upload_pack_advertise, upload_pack_v2 }, { "server-option", always_advertise, NULL }, + { "object-format", object_format_advertise, NULL }, }; static void advertise_capabilities(void) @@ -153,6 +162,22 @@ int has_capability(const struct argv_array *keys, const char *capability, return 0; } +static void check_algorithm(struct repository *r, struct argv_array *keys) +{ + int client = GIT_HASH_SHA1, server = hash_algo_by_ptr(r->hash_algo); + const char *algo_name; + + if (has_capability(keys, "object-format", &algo_name)) { + client = hash_algo_by_name(algo_name); + if (client == GIT_HASH_UNKNOWN) + die("unknown object format '%s'", algo_name); + } + + if (client != server) + die("mismatched object format: server %s; client %s\n", + r->hash_algo->name, hash_algos[client].name); +} + enum request_state { PROCESS_REQUEST_KEYS, PROCESS_REQUEST_DONE, @@ -223,6 +248,8 @@ static int process_request(void) if (!command) die("no command requested"); + check_algorithm(the_repository, &keys); + command->command(the_repository, &keys, &reader); argv_array_clear(&keys); diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh index ffb9613885904e..a1f5fdc9fdcf52 100755 --- a/t/t5701-git-serve.sh +++ b/t/t5701-git-serve.sh @@ -5,12 +5,17 @@ test_description='test protocol v2 server commands' . ./test-lib.sh test_expect_success 'test capability advertisement' ' + test_oid_cache <<-EOF && + wrong_algo sha1:sha256 + wrong_algo sha256:sha1 + EOF cat >expect <<-EOF && version 2 agent=git/$(git version | cut -d" " -f3) ls-refs fetch=shallow server-option + object-format=$(test_oid algo) 0000 EOF @@ -45,6 +50,7 @@ test_expect_success 'request invalid capability' ' test_expect_success 'request with no command' ' test-tool pkt-line pack >in <<-EOF && agent=git/test + object-format=$(test_oid algo) 0000 EOF test_must_fail test-tool serve-v2 --stateless-rpc 2>err in <<-EOF && command=foo + object-format=$(test_oid algo) agent=git/test 0000 EOF @@ -61,6 +68,17 @@ test_expect_success 'request invalid command' ' test_i18ngrep "invalid command" err ' +test_expect_success 'wrong object-format' ' + test-tool pkt-line pack >in <<-EOF && + command=fetch + agent=git/test + object-format=$(test_oid wrong_algo) + 0000 + EOF + test_must_fail test-tool serve-v2 --stateless-rpc 2>err in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0000 EOF @@ -96,6 +115,7 @@ test_expect_success 'basics of ls-refs' ' test_expect_success 'basic ref-prefixes' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 ref-prefix refs/heads/master ref-prefix refs/tags/one @@ -116,6 +136,7 @@ test_expect_success 'basic ref-prefixes' ' test_expect_success 'refs/heads prefix' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 ref-prefix refs/heads/ 0000 @@ -136,6 +157,7 @@ test_expect_success 'refs/heads prefix' ' test_expect_success 'peel parameter' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 peel ref-prefix refs/tags/ @@ -157,6 +179,7 @@ test_expect_success 'peel parameter' ' test_expect_success 'symrefs parameter' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) 0001 symrefs ref-prefix refs/heads/ @@ -178,6 +201,7 @@ test_expect_success 'symrefs parameter' ' test_expect_success 'sending server-options' ' test-tool pkt-line pack >in <<-EOF && command=ls-refs + object-format=$(test_oid algo) server-option=hello server-option=world 0001 @@ -200,6 +224,7 @@ test_expect_success 'unexpected lines are not allowed in fetch request' ' test-tool pkt-line pack >in <<-EOF && command=fetch + object-format=$(test_oid algo) 0001 this-is-not-a-command 0000 From f0af95f42455b03e3dd83f27f24cb6203eef99ae Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:18 +0000 Subject: [PATCH 093/447] t5500: make hash independent This test has hard-coded pkt-lines with object IDs. The pkt-line lengths necessarily differ between hash algorithms, so generate these lines with the packetize helper so they're always the right size. In addition, we will require an object-format capability for SHA-256, so pass that capability on to the upload-pack process. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- t/t5500-fetch-pack.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 52dd1a688cd052..8fee99ecfb2fc9 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -871,9 +871,10 @@ test_expect_success 'shallow since with commit graph and already-seen commit' ' GIT_PROTOCOL=version=2 git upload-pack . <<-EOF >/dev/null 0012command=fetch + $(echo "object-format=$(test_oid algo)" | packetize) 00010013deepen-since 1 - 0032want $(git rev-parse other) - 0032have $(git rev-parse master) + $(echo "want $(git rev-parse other)" | packetize) + $(echo "have $(git rev-parse master)" | packetize) 0000 EOF ) From d96dab868e63cd5a55b50e1d8a23b640e17413bb Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 25 May 2020 19:59:19 +0000 Subject: [PATCH 094/447] builtin/ls-remote: initialize repository based on fetch ls-remote may or may not operate within a repository, and as such will not have been initialized with the repository's hash algorithm. Even if it were, the remote side could be using a different algorithm and we would still want to display those refs properly. Find the hash algorithm used by the remote side by querying the transport object and set our hash algorithm accordingly. Without this change, if the remote side is using SHA-256, we truncate the refs to 40 hex characters, since that's the length of the default hash algorithm (SHA-1). Note that technically this is not a correct setting of the repository hash algorithm since, if we are in a repository, it might be one of a different hash algorithm from the remote side. However, our current code paths don't handle multiple algorithms and won't for some time, so this is the best we can do. We rely on the fact that ls-remote never modifies the current repository, which is a reasonable assumption to make. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/ls-remote.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 6ef519514bd1a4..3a4dd12903206a 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -118,6 +118,10 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) transport->server_options = &server_options; ref = transport_get_remote_refs(transport, &ref_prefixes); + if (ref) { + int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); + repo_set_hash_algo(the_repository, hash_algo); + } if (transport_disconnect(transport)) { UNLEAK(sorting); return 1; From 2c8bd8471a6abc68064dafc743362547fc730f77 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 27 May 2020 21:09:06 +0000 Subject: [PATCH 095/447] checkout -p: handle new files correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original patch selection code was written for `git add -p`, and the fundamental unit on which it works is a hunk. We hacked around that to handle deletions back in 24ab81ae4d (add-interactive: handle deletion of empty files, 2009-10-27). But `git add -p` would never see a new file, since we only consider the set of tracked files in the index. However, since the same machinery was used for `git checkout -p` & friends, we can see new files. Handle this case specifically, adding a new prompt for it that is modeled after the `deleted file` case. This also fixes the problem where added _empty_ files could not be staged via `git checkout -p`. Reported-by: Merlin Büge Helped-by: Jeff King Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- add-patch.c | 30 +++++++++++++++++++++++------- git-add--interactive.perl | 21 +++++++++++++++++++-- t/t3701-add-interactive.sh | 19 +++++++++++++++++++ 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/add-patch.c b/add-patch.c index d8dafa8168dc83..eaace1b824f6f5 100644 --- a/add-patch.c +++ b/add-patch.c @@ -9,7 +9,7 @@ #include "compat/terminal.h" enum prompt_mode_type { - PROMPT_MODE_CHANGE = 0, PROMPT_DELETION, PROMPT_HUNK, + PROMPT_MODE_CHANGE = 0, PROMPT_DELETION, PROMPT_ADDITION, PROMPT_HUNK, PROMPT_MODE_MAX, /* must be last */ }; @@ -32,6 +32,7 @@ static struct patch_mode patch_mode_add = { .prompt_mode = { N_("Stage mode change [y,n,q,a,d%s,?]? "), N_("Stage deletion [y,n,q,a,d%s,?]? "), + N_("Stage addition [y,n,q,a,d%s,?]? "), N_("Stage this hunk [y,n,q,a,d%s,?]? ") }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -53,6 +54,7 @@ static struct patch_mode patch_mode_stash = { .prompt_mode = { N_("Stash mode change [y,n,q,a,d%s,?]? "), N_("Stash deletion [y,n,q,a,d%s,?]? "), + N_("Stash addition [y,n,q,a,d%s,?]? "), N_("Stash this hunk [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -76,6 +78,7 @@ static struct patch_mode patch_mode_reset_head = { .prompt_mode = { N_("Unstage mode change [y,n,q,a,d%s,?]? "), N_("Unstage deletion [y,n,q,a,d%s,?]? "), + N_("Unstage addition [y,n,q,a,d%s,?]? "), N_("Unstage this hunk [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -98,6 +101,7 @@ static struct patch_mode patch_mode_reset_nothead = { .prompt_mode = { N_("Apply mode change to index [y,n,q,a,d%s,?]? "), N_("Apply deletion to index [y,n,q,a,d%s,?]? "), + N_("Apply addition to index [y,n,q,a,d%s,?]? "), N_("Apply this hunk to index [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -120,6 +124,7 @@ static struct patch_mode patch_mode_checkout_index = { .prompt_mode = { N_("Discard mode change from worktree [y,n,q,a,d%s,?]? "), N_("Discard deletion from worktree [y,n,q,a,d%s,?]? "), + N_("Discard addition from worktree [y,n,q,a,d%s,?]? "), N_("Discard this hunk from worktree [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -142,6 +147,7 @@ static struct patch_mode patch_mode_checkout_head = { .prompt_mode = { N_("Discard mode change from index and worktree [y,n,q,a,d%s,?]? "), N_("Discard deletion from index and worktree [y,n,q,a,d%s,?]? "), + N_("Discard addition from index and worktree [y,n,q,a,d%s,?]? "), N_("Discard this hunk from index and worktree [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -163,6 +169,7 @@ static struct patch_mode patch_mode_checkout_nothead = { .prompt_mode = { N_("Apply mode change to index and worktree [y,n,q,a,d%s,?]? "), N_("Apply deletion to index and worktree [y,n,q,a,d%s,?]? "), + N_("Apply addition to index and worktree [y,n,q,a,d%s,?]? "), N_("Apply this hunk to index and worktree [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -185,6 +192,7 @@ static struct patch_mode patch_mode_worktree_head = { .prompt_mode = { N_("Discard mode change from index and worktree [y,n,q,a,d%s,?]? "), N_("Discard deletion from index and worktree [y,n,q,a,d%s,?]? "), + N_("Discard addition from index and worktree [y,n,q,a,d%s,?]? "), N_("Discard this hunk from index and worktree [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -206,6 +214,7 @@ static struct patch_mode patch_mode_worktree_nothead = { .prompt_mode = { N_("Apply mode change to index and worktree [y,n,q,a,d%s,?]? "), N_("Apply deletion to index and worktree [y,n,q,a,d%s,?]? "), + N_("Apply addition to index and worktree [y,n,q,a,d%s,?]? "), N_("Apply this hunk to index and worktree [y,n,q,a,d%s,?]? "), }, .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " @@ -247,7 +256,7 @@ struct add_p_state { struct hunk head; struct hunk *hunk; size_t hunk_nr, hunk_alloc; - unsigned deleted:1, mode_change:1,binary:1; + unsigned deleted:1, added:1, mode_change:1,binary:1; } *file_diff; size_t file_diff_nr; @@ -441,7 +450,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) pend = p + plain->len; while (p != pend) { char *eol = memchr(p, '\n', pend - p); - const char *deleted = NULL, *mode_change = NULL; + const char *deleted = NULL, *added = NULL, *mode_change = NULL; if (!eol) eol = pend; @@ -460,11 +469,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) } else if (p == plain->buf) BUG("diff starts with unexpected line:\n" "%.*s\n", (int)(eol - p), p); - else if (file_diff->deleted) + else if (file_diff->deleted || file_diff->added) ; /* keep the rest of the file in a single "hunk" */ else if (starts_with(p, "@@ ") || (hunk == &file_diff->head && - skip_prefix(p, "deleted file", &deleted))) { + (skip_prefix(p, "deleted file", &deleted) || + skip_prefix(p, "new file", &added)))) { if (marker == '-' || marker == '+') /* * Should not happen; previous hunk did not end @@ -484,6 +494,8 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) if (deleted) file_diff->deleted = 1; + else if (added) + file_diff->added = 1; else if (parse_hunk_header(s, hunk) < 0) return -1; @@ -536,8 +548,10 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) starts_with(p, "Binary files ")) file_diff->binary = 1; - if (file_diff->deleted && file_diff->mode_change) - BUG("diff contains delete *and* a mode change?!?\n%.*s", + if (!!file_diff->deleted + !!file_diff->added + + !!file_diff->mode_change > 1) + BUG("diff can only contain delete *or* add *or* a " + "mode change?!?\n%.*s", (int)(eol - (plain->buf + file_diff->head.start)), plain->buf + file_diff->head.start); @@ -1397,6 +1411,8 @@ static int patch_update_file(struct add_p_state *s, if (file_diff->deleted) prompt_mode_type = PROMPT_DELETION; + else if (file_diff->added) + prompt_mode_type = PROMPT_ADDITION; else if (file_diff->mode_change && !hunk_index) prompt_mode_type = PROMPT_MODE_CHANGE; else diff --git a/git-add--interactive.perl b/git-add--interactive.perl index 10fd30ae16a3bd..f36c0078ac9a71 100755 --- a/git-add--interactive.perl +++ b/git-add--interactive.perl @@ -754,16 +754,18 @@ sub parse_diff_header { my $head = { TEXT => [], DISPLAY => [], TYPE => 'header' }; my $mode = { TEXT => [], DISPLAY => [], TYPE => 'mode' }; my $deletion = { TEXT => [], DISPLAY => [], TYPE => 'deletion' }; + my $addition = { TEXT => [], DISPLAY => [], TYPE => 'addition' }; for (my $i = 0; $i < @{$src->{TEXT}}; $i++) { my $dest = $src->{TEXT}->[$i] =~ /^(old|new) mode (\d+)$/ ? $mode : $src->{TEXT}->[$i] =~ /^deleted file/ ? $deletion : + $src->{TEXT}->[$i] =~ /^new file/ ? $addition : $head; push @{$dest->{TEXT}}, $src->{TEXT}->[$i]; push @{$dest->{DISPLAY}}, $src->{DISPLAY}->[$i]; } - return ($head, $mode, $deletion); + return ($head, $mode, $deletion, $addition); } sub hunk_splittable { @@ -1427,46 +1429,55 @@ sub display_hunks { stage => { mode => N__("Stage mode change [y,n,q,a,d%s,?]? "), deletion => N__("Stage deletion [y,n,q,a,d%s,?]? "), + addition => N__("Stage addition [y,n,q,a,d%s,?]? "), hunk => N__("Stage this hunk [y,n,q,a,d%s,?]? "), }, stash => { mode => N__("Stash mode change [y,n,q,a,d%s,?]? "), deletion => N__("Stash deletion [y,n,q,a,d%s,?]? "), + addition => N__("Stash addition [y,n,q,a,d%s,?]? "), hunk => N__("Stash this hunk [y,n,q,a,d%s,?]? "), }, reset_head => { mode => N__("Unstage mode change [y,n,q,a,d%s,?]? "), deletion => N__("Unstage deletion [y,n,q,a,d%s,?]? "), + addition => N__("Unstage addition [y,n,q,a,d%s,?]? "), hunk => N__("Unstage this hunk [y,n,q,a,d%s,?]? "), }, reset_nothead => { mode => N__("Apply mode change to index [y,n,q,a,d%s,?]? "), deletion => N__("Apply deletion to index [y,n,q,a,d%s,?]? "), + addition => N__("Apply addition to index [y,n,q,a,d%s,?]? "), hunk => N__("Apply this hunk to index [y,n,q,a,d%s,?]? "), }, checkout_index => { mode => N__("Discard mode change from worktree [y,n,q,a,d%s,?]? "), deletion => N__("Discard deletion from worktree [y,n,q,a,d%s,?]? "), + addition => N__("Discard addition from worktree [y,n,q,a,d%s,?]? "), hunk => N__("Discard this hunk from worktree [y,n,q,a,d%s,?]? "), }, checkout_head => { mode => N__("Discard mode change from index and worktree [y,n,q,a,d%s,?]? "), deletion => N__("Discard deletion from index and worktree [y,n,q,a,d%s,?]? "), + addition => N__("Discard addition from index and worktree [y,n,q,a,d%s,?]? "), hunk => N__("Discard this hunk from index and worktree [y,n,q,a,d%s,?]? "), }, checkout_nothead => { mode => N__("Apply mode change to index and worktree [y,n,q,a,d%s,?]? "), deletion => N__("Apply deletion to index and worktree [y,n,q,a,d%s,?]? "), + addition => N__("Apply addition to index and worktree [y,n,q,a,d%s,?]? "), hunk => N__("Apply this hunk to index and worktree [y,n,q,a,d%s,?]? "), }, worktree_head => { mode => N__("Discard mode change from worktree [y,n,q,a,d%s,?]? "), deletion => N__("Discard deletion from worktree [y,n,q,a,d%s,?]? "), + addition => N__("Discard addition from worktree [y,n,q,a,d%s,?]? "), hunk => N__("Discard this hunk from worktree [y,n,q,a,d%s,?]? "), }, worktree_nothead => { mode => N__("Apply mode change to worktree [y,n,q,a,d%s,?]? "), deletion => N__("Apply deletion to worktree [y,n,q,a,d%s,?]? "), + addition => N__("Apply addition to worktree [y,n,q,a,d%s,?]? "), hunk => N__("Apply this hunk to worktree [y,n,q,a,d%s,?]? "), }, ); @@ -1476,7 +1487,7 @@ sub patch_update_file { my ($ix, $num); my $path = shift; my ($head, @hunk) = parse_diff($path); - ($head, my $mode, my $deletion) = parse_diff_header($head); + ($head, my $mode, my $deletion, my $addition) = parse_diff_header($head); for (@{$head->{DISPLAY}}) { print; } @@ -1490,6 +1501,12 @@ sub patch_update_file { push @{$deletion->{DISPLAY}}, @{$hunk->{DISPLAY}}; } @hunk = ($deletion); + } elsif (@{$addition->{TEXT}}) { + foreach my $hunk (@hunk) { + push @{$addition->{TEXT}}, @{$hunk->{TEXT}}; + push @{$addition->{DISPLAY}}, @{$hunk->{DISPLAY}}; + } + @hunk = ($addition); } $num = scalar @hunk; diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index ac43f835a5b077..7bc0c3fe6efb5c 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -403,6 +403,25 @@ test_expect_success 'deleting an empty file' ' diff_cmp expected diff ' +test_expect_success 'adding an empty file' ' + git init added && + ( + cd added && + test_commit initial && + >empty && + git add empty && + test_tick && + git commit -m empty && + git tag added-file && + git reset --hard HEAD^ && + test_path_is_missing empty && + + echo y | git checkout -p added-file -- >actual && + test_path_is_file empty && + test_i18ngrep "Apply addition to index and worktree" actual + ) +' + test_expect_success 'split hunk setup' ' git reset --hard && test_write_lines 10 20 30 40 50 60 >test && From a44a0a9fc4353ff4ccd4b2308db2844d7f4185f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= Date: Tue, 26 May 2020 19:13:17 +0000 Subject: [PATCH 096/447] completion: use native ZSH array pattern matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When clearing the builtin operations on re-sourcing in the ZSH case we can use the native ${parameters} associative array keys values to get the currently `__gitcomp_builtin_*` operations using pattern matching instead of using sed. As also stated in commit 94408dc7, introducing this change the usage of sed has some overhead implications, while ZSH can do this check just using its native syntax. Signed-off-by: Marco Trevisan (Treviño) Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 70ad04e1b2a8c6..ad6934a3864fdd 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -373,7 +373,7 @@ __gitcomp () # Clear the variables caching builtins' options when (re-)sourcing # the completion script. if [[ -n ${ZSH_VERSION-} ]]; then - unset $(set |sed -ne 's/^\(__gitcomp_builtin_[a-zA-Z0-9_][a-zA-Z0-9_]*\)=.*/\1/p') 2>/dev/null + unset ${(M)${(k)parameters[@]}:#__gitcomp_builtin_*} 2>/dev/null else unset $(compgen -v __gitcomp_builtin_) fi From fab466f91dca22f8ed43148bcde6aac856c5c671 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:33 -0700 Subject: [PATCH 097/447] completion: add test showing subpar git switch completion When provided with no options, git switch only allows switching between branches. The one exception to this is the "Do What I Mean" logic that allows a unique remote branch name to be interpreted as a request to create a branch of the same name that is tracking that remote branch. Unfortunately, the logic for the completion of git switch results in completing not just branch names, but also pseudorefs like HEAD, tags, and fully specified / references. For example, we currently complete the following: $git switch HEAD branch-in-other master master-in-other matching-branch matching-tag other/branch-in-other other/master-in-other Indeed, if one were to attempt to use git switch with some of these provided options, git will reject the request: $git switch HEAD fatal: a branch is expected, got 'HEAD $git switch matching-tag fatal: a branch is expected, got tag 'matching-tag' $git switch other/branch-in-other fatal: a branch is expected, got remote branch 'other/branch-in-other' Ideally, git switch without options ought to complete only words which will be accepted. Without options, this means to list local branch names and the unique remote branch names without their remote name pre-pended. $git switch branch-in-other master master-in-other matching-branch Add a test case that highlights this subpar completion. Also add a similar test for git checkout completion that shows that due to the complex nature of git checkout, it must complete all references. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 5505e5aa249e43..1a022630d069af 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1240,6 +1240,29 @@ test_expect_success '__git_complete_fetch_refspecs - fully qualified & prefix' ' test_cmp expected out ' +#TODO: git switch completion includes unexpected references +test_expect_failure 'git switch - with no options, complete local branches and unique remote branch names for DWIM logic' ' + test_completion "git switch " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +test_expect_success 'git checkout - completes refs and unique remote branches for DWIM' ' + test_completion "git checkout " <<-\EOF + HEAD Z + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From ab58e90f8b27f420659345ee462f6a69e26e379b Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:34 -0700 Subject: [PATCH 098/447] completion: add tests showing subpar DWIM logic for switch/checkout When provided with a single argument that is the name of a remote branch that does not yet exist locally, both git switch and git checkout can interpret this as a request to create a local branch that tracks that remote branch. We call this behavior "Do What I Mean", or DWIM for short. To aid in using this DWIM, it makes sense for completion to list these unique remote branch names when completing possible arguments for git switch and git checkout. Indeed, both _git_checkout and _git_switch implement support for completing such DWIM branch names. In other words, in addition to the usual completions provided for git switch, this "DWIM" logic means completion will include the names of branches on remotes that are unique and thus there can be no ambiguity of which remote to track when creating the local branch. However, the DWIM logic is not always active. Many options, such as --no-guess, --no-track, and --track disable this DWIM logic, as they cause git switch and git checkout to behave in different modes. Additionally, some completion users do not wish to have tab completion include these remote names by default, and thus introduced GIT_COMPLETION_CHECKOUT_NO_GUESS as an optional way to configure the completion support to disable this feature of completion support. For this reason, _git_checkout and _git_switch have many rules about when to enable or disable completing of these remote refs. The two commands follow similar but not identical rules. Set aside the question of command modes that do not accept this DWIM logic (--track, -c, --orphan, --detach) for now. Thinking just about the main mode of git checkout and git switch, the following guidelines will help explain the basic rules we ought to support when deciding whether to list the remote branches for DWIM in completion. 1. if --guess is enabled, we should list DWIM remote branch names, even if something else would disable it 2. if --no-guess, --no-track or GIT_COMPLETION_CHECKOUT_NO_GUESS=1, then we should disable listing DWIM remote branch names. 3. Since the '--guess' option is a boolean option, a later --guess should override --no-guess, and a later --no-guess should override --guess. Putting all of these together, add some tests that highlight the expected behavior of this DWIM logic. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 105 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 1a022630d069af..d858a914caf065 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1263,6 +1263,111 @@ test_expect_success 'git checkout - completes refs and unique remote branches fo EOF ' +test_expect_success 'git switch - with --no-guess, complete only local branches' ' + test_completion "git switch --no-guess " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git switch - with GIT_COMPLETION_CHECKOUT_NO_GUESS=1, complete only local branches' ' + GIT_COMPLETION_CHECKOUT_NO_GUESS=1 test_completion "git switch " <<-\EOF + master Z + matching-branch Z + EOF +' + +#TODO: --guess/--no-guess ordering is not taken into account +#TODO: git switch completion includes unexpected references +test_expect_failure 'git switch - --guess overrides GIT_COMPLETION_CHECKOUT_NO_GUESS=1, complete local branches and unique remote names for DWIM logic' ' + GIT_COMPLETION_CHECKOUT_NO_GUESS=1 test_completion "git switch --guess " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: --guess/--no-guess ordering is not taken into account +#TODO: git switch completion includes unexpected references +test_expect_failure 'git switch - a later --guess overrides previous --no-guess, complete local and remote unique branches for DWIM' ' + test_completion "git switch --no-guess --guess " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: --guess/--no-guess ordering is not taken into account +test_expect_failure 'git switch - a later --no-guess overrides previous --guess, complete only local branches' ' + test_completion "git switch --guess --no-guess " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git checkout - with GIT_COMPLETION_NO_GUESS=1 only completes refs' ' + GIT_COMPLETION_CHECKOUT_NO_GUESS=1 test_completion "git checkout " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: git checkout does not override variable when --guess is provided +test_expect_failure 'git checkout - --guess overrides GIT_COMPLETION_NO_GUESS=1, complete refs and unique remote branches for DWIM' ' + GIT_COMPLETION_CHECKOUT_NO_GUESS=1 test_completion "git checkout --guess " <<-\EOF + HEAD Z + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - with --no-guess, only completes refs' ' + test_completion "git checkout --no-guess " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: --guess/--no-guess ordering is not taken into account +test_expect_failure 'git checkout - a later --guess overrides previous --no-guess, complete refs and unique remote branches for DWIM' ' + test_completion "git checkout --no-guess --guess " <<-\EOF + HEAD Z + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - a later --no-guess overrides previous --guess, complete only refs' ' + test_completion "git checkout --guess --no-guess " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From e69fb0a16abb6412fa6c297cfe7ff43ba762a9b3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:35 -0700 Subject: [PATCH 099/447] completion: add tests showing subar checkout --detach logic When completing words for git switch, the completion function correctly disables the DWIM remote branch names when in the '--detach' mode. These DWIM remote branch names will not work when the --detach option is specified, so it does not make sense to complete them. git checkout, however, does not disable the completion of DWIM remote branch names in this case. Add test cases for both git switch and git checkout showing the expected behavior. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index d858a914caf065..e8350b3e963613 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1368,6 +1368,52 @@ test_expect_success 'git checkout - a later --no-guess overrides previous --gues EOF ' +test_expect_success 'git switch - with --detach, complete all references' ' + test_completion "git switch --detach " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: checkout --detach incorrectly includes DWIM remote branch names +test_expect_failure 'git checkout - with --detach, complete only references' ' + test_completion "git checkout --detach " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git switch - with -d, complete all references' ' + test_completion "git switch -d " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: checkout -d incorrectly includes DWIM remote branch names +test_expect_failure 'git checkout - with -d, complete only references' ' + test_completion "git checkout -d " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From c55b99c3d3359debb8601b6f687c94deab59ad2d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:36 -0700 Subject: [PATCH 100/447] completion: add tests showing subpar switch/checkout --track logic When the --track option is provided to git switch or git checkout, and no branch is specified by -c or -b, git will interpret the tracking branch to determine the local branch name to use. This "Do What I Mean" logic is similar but distinct from the default DWIM logic of interpreting a unique remote branch name as a request to create and track that branch. For example, `git switch --track origin/master` is interpreted as a request to create a local branch named master that is tracking origin/master. The current completion for git checkout in this regard is only somewhat poor: $git checkout --track HEAD master matching-branch matching-tag other/branch-in-other other/master-in-other At least it still includes remote references. The clutter from including all references isn't too bad. However, git switch completion is terrible: $git switch --track master matching-branch It only shows local branches, not even allowing any form of completion of the remote references! Add tests which highlight the expected behavior of completing --track on its own. Note that when -c/-C or -b/-B are provided we do expect completing more references, but this will be discussed in a future change that addresses these options specifically. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index e8350b3e963613..411b19637f54d9 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1414,6 +1414,40 @@ test_expect_failure 'git checkout - with -d, complete only references' ' EOF ' +#TODO: --track should only complete fully specified remote branches +test_expect_failure 'git switch - with --track, complete only remote branches' ' + test_completion "git switch --track " <<-\EOF + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: --track should only complete fully specified remote branches +test_expect_failure 'git checkout - with --track, complete only remote branches' ' + test_completion "git checkout --track " <<-\EOF + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git switch - with --no-track, complete only local branch names' ' + test_completion "git switch --no-track " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git checkout - with --no-track, complete only local references' ' + test_completion "git checkout --no-track " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From b07d77a2baae626370be8ae35efd3ebaf9befbc2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:37 -0700 Subject: [PATCH 101/447] completion: add tests showing subpar -c/-C startpoint completion When using the branch creation argument for git switch or git checkout, -c/-C or -b/-B, the commands operate in a different mode: `git switch -c ` means to create a branch named at the commit referred to by . When completing the start-point, we ought to always complete all valid references. Add tests for the completion of the start-point to -c/-C and -b/-B. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 140 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 411b19637f54d9..4b506eb81f1eb4 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1448,6 +1448,146 @@ test_expect_success 'git checkout - with --no-track, complete only local referen EOF ' +#TODO: completing the start point of -c/-C should not include DWIM references +test_expect_failure 'git switch - with -c, complete all references' ' + test_completion "git switch -c new-branch " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -c/-C should not include DWIM references +test_expect_failure 'git switch - with -C, complete all references' ' + test_completion "git switch -C new-branch " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -c/-C should include all references, not just local branches +test_expect_failure 'git switch - with -c and --track, complete all references' ' + test_completion "git switch -c new-branch --track " <<-EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -c/-C should include all references, not just local branches +test_expect_failure 'git switch - with -C and --track, complete all references' ' + test_completion "git switch -C new-branch --track " <<-EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -c/-C should include all references, not just local branches +test_expect_failure 'git switch - with -c and --no-track, complete all references' ' + test_completion "git switch -c new-branch --no-track " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -c/-C should include all references, not just local branches +test_expect_failure 'git switch - with -C and --no-track, complete all references' ' + test_completion "git switch -C new-branch --no-track " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -b/-B should not include DWIM references +test_expect_failure 'git checkout - with -b, complete all references' ' + test_completion "git checkout -b new-branch " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +#TODO: completing the start point of -b/-B should not include DWIM references +test_expect_failure 'git checkout - with -B, complete all references' ' + test_completion "git checkout -B new-branch " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - with -b and --track, complete all references' ' + test_completion "git checkout -b new-branch --track " <<-EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - with -B and --track, complete all references' ' + test_completion "git checkout -B new-branch --track " <<-EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - with -b and --no-track, complete all references' ' + test_completion "git checkout -b new-branch --no-track " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + +test_expect_success 'git checkout - with -B and --no-track, complete all references' ' + test_completion "git checkout -B new-branch --no-track " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From 7f59d604292774a2c5c79a644419a9aa6c720365 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:38 -0700 Subject: [PATCH 102/447] completion: add tests showing subpar -c/C argument completion When using the branch creation argument for git switch or git checkout (-c/-C or -b/-B), the commands switch to a different mode: `git switch -c ` means to create a branch named at the commit referred to by . When completing git switch or git checkout, it makes sense to complete the branch name differently from the start point. When completing a branch, one might consider that we do not have anything worth completing. After all, a new branch must have an entirely new name. Consider, however, that if a user names branches using some similar scheme, they might wish to name a new branch by modifying the name of an existing branch. To avoid overloading completion for the argument, it seems reasonable to complete only the local branch names and the valid "Do What I Mean" remote branch names. Add tests for the completion of the argument to -c/-C and -b/-B, highlighting this preferred completion behavior. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 100 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 4b506eb81f1eb4..dbe6e4dcb8ec51 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1588,6 +1588,106 @@ test_expect_success 'git checkout - with -B and --no-track, complete all referen EOF ' +#TODO: -c/-C argument completion should not include all references +test_expect_failure 'git switch - for -c, complete local branches and unique remote branches' ' + test_completion "git switch -c " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: -c/-C argument completion should not include all references +test_expect_failure 'git switch - for -C, complete local branches and unique remote branches' ' + test_completion "git switch -C " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +test_expect_success 'git switch - for -c with --no-guess, complete local branches only' ' + test_completion "git switch --no-guess -c " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git switch - for -C with --no-guess, complete local branches only' ' + test_completion "git switch --no-guess -C " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git switch - for -c with --no-track, complete local branches only' ' + test_completion "git switch --no-track -c " <<-\EOF + master Z + matching-branch Z + EOF +' + +test_expect_success 'git switch - for -C with --no-track, complete local branches only' ' + test_completion "git switch --no-track -C " <<-\EOF + master Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -b, complete local branches and unique remote branches' ' + test_completion "git checkout -b " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -B, complete local branches and unique remote branches' ' + test_completion "git checkout -B " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -b with --no-guess, complete local branches only' ' + test_completion "git checkout --no-guess -b " <<-\EOF + master Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -B with --no-guess, complete local branches only' ' + test_completion "git checkout --no-guess -B " <<-\EOF + master Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -b with --no-track, complete local branches only' ' + test_completion "git checkout --no-track -b " <<-\EOF + master Z + matching-branch Z + EOF +' + +#TODO: -b/-B argument completion should not include all references +test_expect_failure 'git checkout - for -B with --no-track, complete local branches only' ' + test_completion "git checkout --no-track -B " <<-\EOF + master Z + matching-branch Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From c81ca56bca2d4b9b2606211106ca4ae0fdded834 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:39 -0700 Subject: [PATCH 103/447] completion: add tests showing subpar switch/checkout --orphan logic Similar to -c/-C, --orphan takes an argument which is the branch name to use. We ought to complete this branch name using similar rules as to how we complete new branch names for -c/-C and -b/-B. Namely, limit the total number of options provided by completing to the local branches. Additionally, git switch --orphan does not take any start point and will always create using the empty-tree. Thus, after the branch name is completed, git switch --orphan should not complete any references. Add test cases showing the expected behavior of --orphan, for both the argument and starting point. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index dbe6e4dcb8ec51..8375c4df94d582 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1688,6 +1688,45 @@ test_expect_failure 'git checkout - for -B with --no-track, complete local branc EOF ' +#TODO: --orphan argument completion should not include all references +test_expect_failure 'git switch - with --orphan completes local branch names and unique remote branch names' ' + test_completion "git switch --orphan " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: switch --orphan does not take a start-point and thus has nothing to complete +test_expect_failure 'git switch - --orphan with branch already provided completes nothing else' ' + test_completion "git switch --orphan master " <<-\EOF + + EOF +' + +#TODO: --orphan argument completion should not include all references +test_expect_failure 'git checkout - with --orphan completes local branch names and unique remote branch names' ' + test_completion "git checkout --orphan " <<-\EOF + branch-in-other Z + master Z + master-in-other Z + matching-branch Z + EOF +' + +#TODO: checkout --orphan start-point completion should not included DWIM remote unique branch names +test_expect_failure 'git checkout - --orphan with branch already provided completes local refs for a start-point' ' + test_completion "git checkout --orphan master " <<-\EOF + HEAD Z + master Z + matching-branch Z + matching-tag Z + other/branch-in-other Z + other/master-in-other Z + EOF +' + test_expect_success 'teardown after ref completion' ' git branch -d matching-branch && git tag -d matching-tag && From 0408c6b412a6b6f5fa7d1c33424ef00ad8c93533 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 28 May 2020 11:10:40 -0700 Subject: [PATCH 104/447] completion: replace overloaded track term for __git_complete_refs The __git_complete_refs uses the "--track" option to specify when to enable listing of unique remote branches which are used by the DWIM logic of git checkout and git switch. Using the term '--track' here is confusing because the git commands themselves have '--track' as an argument. Additionally, the completion logic for _git_switch also checks for --track. Keeping the meaning of track_opt and --track for __git_complete_refs straight from the --track git switch and git checkout option is difficult when reading this code. Use the option '--dwim' instead, indicating this is about enabling or disabling logic related to DWIM mode. Also rename the local variable track_opt to dwim_opt to further reduce the confusion when reading the completion code for _git_switch. Because it is plausible for users to have developed their own completions which rely on __git_complete_ref, keep --track as a synonym for --dwim, even though we no longer use it in any of the core git completion logic. Add a comment explaining why it remains as an alternative spelling for --dwim. Signed-off-by: Jacob Keller Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 28 ++++++++++++++------------ 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index c21786f2fd0026..6b44d369984783 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -749,7 +749,7 @@ __git_refs () # Usage: __git_complete_refs [