diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go b/backend/plugins/gitextractor/parser/repo_gogit.go index baaa4e42b17..21cf09cd9e9 100644 --- a/backend/plugins/gitextractor/parser/repo_gogit.go +++ b/backend/plugins/gitextractor/parser/repo_gogit.go @@ -23,6 +23,7 @@ import ( "encoding/hex" "fmt" "regexp" + "strings" "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" @@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx plugin.SubTaskContext) e func(r *plumbing.Reference) bool { return r.Name().IsBranch() || r.Name().IsRemote() }, refIter) - if err != nil { - return err - } headRef, err := r.repo.Head() if err != nil { return err @@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e if err != nil { return err } else { + excluded := map[string]struct{}{} + for _, ext := range taskOpts.ExcludeFileExtensions { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} + } for _, stat := range stats { + nameLower := strings.ToLower(stat.Name) + skip := false + for ext := range excluded { + if strings.HasSuffix(nameLower, ext) { + skip = true + break + } + } + if skip { + continue + } codeCommit.Additions += stat.Addition // In some repos, deletion may be zero, which is different from git log --stat. // It seems go-git doesn't get the correct changes. @@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e return err } if !*taskOpts.SkipCommitFiles { - if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err != nil { + if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit, taskOpts.ExcludeFileExtensions); err != nil { return err } } @@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx context.Context, commit return commitTree, firstParentTree, nil } -func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit) (err error) { +func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit, excludeExts []string) (err error) { commitTree, firstParentTree, err := r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit) if err != nil { return err @@ -433,12 +450,34 @@ func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plu if err != nil { return err } + // normalize exclusions + excluded := map[string]struct{}{} + for _, ext := range excludeExts { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} + } for _, p := range patch.Stats() { commitFile := &code.CommitFile{ CommitSha: commit.Hash.String(), } fileName := p.Name commitFile.FilePath = fileName + if len(excluded) > 0 { + lower := strings.ToLower(fileName) + skip := false + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + skip = true + break + } + } + if skip { + continue + } + } commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName) commitFile.Deletions = p.Deletion commitFile.Additions = p.Addition diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go b/backend/plugins/gitextractor/parser/repo_libgit2.go index 78451feb017..69d82e253d2 100644 --- a/backend/plugins/gitextractor/parser/repo_libgit2.go +++ b/backend/plugins/gitextractor/parser/repo_libgit2.go @@ -25,6 +25,7 @@ import ( "regexp" "sort" "strconv" + "strings" "github.com/apache/incubator-devlake/core/dal" "github.com/apache/incubator-devlake/core/errors" @@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) if !*taskOpts.SkipCommitStat { var stats *git.DiffStats - if stats, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil { + var addIncluded, delIncluded int + if stats, addIncluded, delIncluded, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil { return err } r.logger.Debug("state: %#+v\n", stats.Deletions()) - c.Additions += stats.Insertions() - c.Deletions += stats.Deletions() + c.Additions += addIncluded + c.Deletions += delIncluded } err = r.store.Commits(c) @@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) storeParentCommits(commitSha string, commit *git. return r.store.CommitParents(commitParents) } -func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) { +func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, int, int, errors.Error) { var err error var parentTree, tree *git.Tree if parent != nil { parentTree, err = parent.Tree() } if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } tree, err = commit.Tree() if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } var diff *git.Diff diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts) if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) + } + // build excluded extension set + excluded := map[string]struct{}{} + for _, ext := range taskOpts.ExcludeFileExtensions { + e := strings.ToLower(strings.TrimSpace(ext)) + if e == "" { + continue + } + excluded[e] = struct{}{} } if !*taskOpts.SkipCommitFiles { - err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap) + err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap, excluded) if err != nil { - return nil, errors.Convert(err) + return nil, 0, 0, errors.Convert(err) } } var stats *git.DiffStats stats, err = diff.Stats() if err != nil { - return nil, errors.Convert(err) - } - return stats, nil + return nil, 0, 0, errors.Convert(err) + } + // calculate included totals with exclusions + addIncluded := 0 + delIncluded := 0 + if len(excluded) == 0 { + addIncluded = stats.Insertions() + delIncluded = stats.Deletions() + return stats, addIncluded, delIncluded, nil + } + _ = diff.ForEach(func(file git.DiffDelta, progress float64) (git.DiffForEachHunkCallback, error) { + // choose path to check based on delta status; for deletions use old path + pathForCheck := file.NewFile.Path + if file.Status == git.DeltaDeleted || pathForCheck == "" { + pathForCheck = file.OldFile.Path + } + lower := strings.ToLower(pathForCheck) + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + // skip all lines for excluded files + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { return nil }, nil + }, nil + } + } + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { + if line.Origin == git.DiffLineAddition { + addIncluded += line.NumLines + } + if line.Origin == git.DiffLineDeletion { + delIncluded += line.NumLines + } + return nil + }, nil + }, nil + }, git.DiffDetailLines) + return stats, addIncluded, delIncluded, nil } -func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp) errors.Error { +func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp, excluded map[string]struct{}) errors.Error { var commitFile *code.CommitFile var commitFileComponent *code.CommitFileComponent var err error @@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff * } } + // skip files by extension if configured + if len(excluded) > 0 { + pathForCheck := file.NewFile.Path + if file.Status == git.DeltaDeleted || pathForCheck == "" { + pathForCheck = file.OldFile.Path + } + lower := strings.ToLower(pathForCheck) + for ext := range excluded { + if strings.HasSuffix(lower, ext) { + // skip this file entirely + return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) { + return func(line git.DiffLine) error { return nil }, nil + }, nil + } + } + } + commitFile = new(code.CommitFile) commitFile.CommitSha = commitSha + // prefer new path; for deletions fall back to old path commitFile.FilePath = file.NewFile.Path + if commitFile.FilePath == "" { + commitFile.FilePath = file.OldFile.Path + } // With some long path,the varchar(255) was not enough both ID and file_path // So we use the hash to compress the path in ID and add length of file_path. // Use commitSha and the sha256 of FilePath to create id shaFilePath := sha256.New() - shaFilePath.Write([]byte(file.NewFile.Path)) + shaFilePath.Write([]byte(commitFile.FilePath)) commitFile.Id = commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil)) commitFileComponent = new(code.CommitFileComponent) diff --git a/backend/plugins/gitextractor/parser/taskdata.go b/backend/plugins/gitextractor/parser/taskdata.go index 8dccf5ffe9f..bdfdbd2ae1b 100644 --- a/backend/plugins/gitextractor/parser/taskdata.go +++ b/backend/plugins/gitextractor/parser/taskdata.go @@ -47,4 +47,6 @@ type GitExtractorOptions struct { NoShallowClone bool `json:"noShallowClone" mapstructure:"noShallowClone"` ConnectionId uint64 `json:"connectionId" mapstructure:"connectionId,omitempty"` PluginName string `json:"pluginName" mapstructure:"pluginName,omitempty"` + // Configured by upstream plugin (e.g., GitLab) to exclude file extensions from commit stats + ExcludeFileExtensions []string `json:"excludeFileExtensions" mapstructure:"excludeFileExtensions"` } diff --git a/backend/plugins/gitlab/api/blueprint_v200.go b/backend/plugins/gitlab/api/blueprint_v200.go index b891f72b3ef..dbe14905df1 100644 --- a/backend/plugins/gitlab/api/blueprint_v200.go +++ b/backend/plugins/gitlab/api/blueprint_v200.go @@ -132,17 +132,22 @@ func makePipelinePlanV200( return nil, err } cloneUrl.User = url.UserPassword("git", connection.Token) + gitextOpts := map[string]interface{}{ + "url": cloneUrl.String(), + "name": gitlabProject.Name, + "fullName": gitlabProject.PathWithNamespace, + "repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId), + "proxy": connection.Proxy, + "connectionId": gitlabProject.ConnectionId, + "pluginName": "gitlab", + } + if len(scopeConfig.PrSizeExcludedFileExtensions) > 0 { + // pass excluded file extensions to gitextractor to support PR Size exclusion + gitextOpts["excludeFileExtensions"] = scopeConfig.PrSizeExcludedFileExtensions + } stage = append(stage, &coreModels.PipelineTask{ - Plugin: "gitextractor", - Options: map[string]interface{}{ - "url": cloneUrl.String(), - "name": gitlabProject.Name, - "fullName": gitlabProject.PathWithNamespace, - "repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId), - "proxy": connection.Proxy, - "connectionId": gitlabProject.ConnectionId, - "pluginName": "gitlab", - }, + Plugin: "gitextractor", + Options: gitextOpts, }) } diff --git a/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go new file mode 100644 index 00000000000..a0971e38a4d --- /dev/null +++ b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go @@ -0,0 +1,50 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrationscripts + +import ( + "github.com/apache/incubator-devlake/core/context" + "github.com/apache/incubator-devlake/core/errors" + "github.com/apache/incubator-devlake/core/plugin" + "github.com/apache/incubator-devlake/helpers/migrationhelper" +) + +var _ plugin.MigrationScript = (*addPrSizeExcludedFileExtensions)(nil) + +type gitlabScopeConfig20250921 struct { + PrSizeExcludedFileExtensions []string `gorm:"type:json" json:"prSizeExcludedFileExtensions" mapstructure:"prSizeExcludedFileExtensions"` +} + +func (gitlabScopeConfig20250921) TableName() string { + return "_tool_gitlab_scope_configs" +} + +type addPrSizeExcludedFileExtensions struct{} + +func (script *addPrSizeExcludedFileExtensions) Up(basicRes context.BasicRes) errors.Error { + return migrationhelper.AutoMigrateTables( + basicRes, + &gitlabScopeConfig20250921{}, + ) +} + +func (*addPrSizeExcludedFileExtensions) Version() uint64 { return 20250921100000 } + +func (*addPrSizeExcludedFileExtensions) Name() string { + return "add pr_size_excluded_file_extensions to _tool_gitlab_scope_configs" +} diff --git a/backend/plugins/gitlab/models/migrationscripts/register.go b/backend/plugins/gitlab/models/migrationscripts/register.go index 1d89b250512..30a76f63ed9 100644 --- a/backend/plugins/gitlab/models/migrationscripts/register.go +++ b/backend/plugins/gitlab/models/migrationscripts/register.go @@ -52,5 +52,6 @@ func All() []plugin.MigrationScript { new(addGitlabAssigneeAndReviewerPrimaryKey), new(changeIssueComponentType), new(addIsChildToPipelines240906), + new(addPrSizeExcludedFileExtensions), } } diff --git a/backend/plugins/gitlab/models/scope_config.go b/backend/plugins/gitlab/models/scope_config.go index 78cfd7f2d1d..525720c3282 100644 --- a/backend/plugins/gitlab/models/scope_config.go +++ b/backend/plugins/gitlab/models/scope_config.go @@ -37,6 +37,8 @@ type GitlabScopeConfig struct { ProductionPattern string `mapstructure:"productionPattern,omitempty" json:"productionPattern" gorm:"type:varchar(255)"` EnvNamePattern string `mapstructure:"envNamePattern,omitempty" json:"envNamePattern" gorm:"type:varchar(255)"` Refdiff datatypes.JSONMap `mapstructure:"refdiff,omitempty" json:"refdiff" swaggertype:"object" format:"json"` + // A list of file extensions to exclude when calculating PR Size (affects commit additions/deletions used by dashboards) + PrSizeExcludedFileExtensions []string `mapstructure:"prSizeExcludedFileExtensions" json:"prSizeExcludedFileExtensions" gorm:"type:json;serializer:json"` } func (t GitlabScopeConfig) TableName() string { diff --git a/config-ui/src/plugins/register/gitlab/config.tsx b/config-ui/src/plugins/register/gitlab/config.tsx index 9caf5260af6..b8924d7f750 100644 --- a/config-ui/src/plugins/register/gitlab/config.tsx +++ b/config-ui/src/plugins/register/gitlab/config.tsx @@ -78,6 +78,7 @@ export const GitLabConfig: IPluginConfig = { envNamePattern: '(?i)prod(.*)', deploymentPattern: '', productionPattern: '', + prSizeExcludedFileExtensions: [], }, }, }; diff --git a/config-ui/src/plugins/register/gitlab/transformation.tsx b/config-ui/src/plugins/register/gitlab/transformation.tsx index 3ad9b97c6f5..2405e661454 100644 --- a/config-ui/src/plugins/register/gitlab/transformation.tsx +++ b/config-ui/src/plugins/register/gitlab/transformation.tsx @@ -178,4 +178,34 @@ const renderCollapseItems = ({ > ), }, + { + key: 'CODEREVIEW', + label: 'Code Review', + style: panelStyle, + children: ( + <> +