diff --git a/rules/docker/missing_user_instruction.py b/rules/docker/missing_user_instruction.py index f9d95d70..e291d938 100644 --- a/rules/docker/missing_user_instruction.py +++ b/rules/docker/missing_user_instruction.py @@ -107,3 +107,21 @@ def missing_user_instruction(): used for privilege escalation or lateral movement. """ return missing(instruction="USER") + +# Output JSON IR for Go executor +if __name__ == "__main__": + import json + import sys + sys.path.insert(0, '/Users/shiva/src/shivasurya/code-pathfinder/python-dsl') + + from rules import container_decorators, container_ir + + # Get registered rules and convert to JSON IR + json_ir = container_ir.compile_all_rules() + + # Output complete structure with both dockerfile and compose arrays + output = { + "dockerfile": json_ir.get("dockerfile", []), + "compose": json_ir.get("compose", []) + } + print(json.dumps(output)) diff --git a/sast-engine/cmd/scan.go b/sast-engine/cmd/scan.go index f1892aed..094e00c6 100644 --- a/sast-engine/cmd/scan.go +++ b/sast-engine/cmd/scan.go @@ -4,12 +4,15 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/shivasurya/code-pathfinder/sast-engine/dsl" + "github.com/shivasurya/code-pathfinder/sast-engine/executor" "github.com/shivasurya/code-pathfinder/sast-engine/graph" "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/builder" "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/registry" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/docker" "github.com/shivasurya/code-pathfinder/sast-engine/output" "github.com/spf13/cobra" ) @@ -67,6 +70,9 @@ Examples: } projectPath = absProjectPath + // Create rule loader (used for both container and code analysis rules) + loader := dsl.NewRuleLoader(rulesPath) + // Step 1: Build code graph (AST) logger.Progress("Building code graph from %s...", projectPath) codeGraph := graph.Initialize(projectPath) @@ -75,6 +81,28 @@ Examples: } logger.Statistic("Code graph built: %d nodes", len(codeGraph.Nodes)) + // Step 1.5: Execute container rules if Docker/Compose files are present + var containerDetections []*dsl.EnrichedDetection + dockerFiles, composeFiles := extractContainerFiles(codeGraph) + if len(dockerFiles) > 0 || len(composeFiles) > 0 { + logger.Progress("Found %d Dockerfile(s) and %d docker-compose file(s)", len(dockerFiles), len(composeFiles)) + + // Load container rules from the same rules path (runtime generation) + logger.Progress("Loading container rules...") + containerRulesJSON, err := loader.LoadContainerRules() + if err != nil { + logger.Warning("No container rules found: %v", err) + } else { + logger.Progress("Executing container rules...") + containerDetections = executeContainerRules(containerRulesJSON, dockerFiles, composeFiles, projectPath, logger) + if len(containerDetections) > 0 { + logger.Statistic("Container scan found %d issue(s)", len(containerDetections)) + } else { + logger.Progress("No container issues detected") + } + } + } + // Step 2: Build module registry logger.Progress("Building module registry...") moduleRegistry, err := registry.BuildModuleRegistry(projectPath) @@ -95,7 +123,6 @@ Examples: // Step 4: Load Python DSL rules logger.Progress("Loading rules from %s...", rulesPath) - loader := dsl.NewRuleLoader(rulesPath) rules, err := loader.LoadRules() if err != nil { return fmt.Errorf("failed to load rules: %w", err) @@ -129,8 +156,16 @@ Examples: } } + // Merge container detections with code analysis detections + allEnriched = append(allEnriched, containerDetections...) + // Step 6: Format and display results - summary := output.BuildSummary(allEnriched, len(rules)) + // Count unique rule IDs from all detections (includes both code and container rules) + uniqueRules := make(map[string]bool) + for _, det := range allEnriched { + uniqueRules[det.Rule.ID] = true + } + summary := output.BuildSummary(allEnriched, len(uniqueRules)) formatter := output.NewTextFormatter(&output.OutputOptions{ Verbosity: verbosity, }, logger) @@ -157,6 +192,187 @@ func countTotalCallSites(cg *core.CallGraph) int { return total } +// extractContainerFiles extracts unique Docker and docker-compose file paths from CodeGraph. +func extractContainerFiles(codeGraph *graph.CodeGraph) (dockerFiles []string, composeFiles []string) { + dockerFileSet := make(map[string]bool) + composeFileSet := make(map[string]bool) + + for _, node := range codeGraph.Nodes { + if node.Type == "dockerfile_instruction" { + dockerFileSet[node.File] = true + } else if node.Type == "compose_service" { + composeFileSet[node.File] = true + } + } + + for file := range dockerFileSet { + dockerFiles = append(dockerFiles, file) + } + for file := range composeFileSet { + composeFiles = append(composeFiles, file) + } + + return dockerFiles, composeFiles +} + +// executeContainerRules executes container security rules and returns enriched detections. +func executeContainerRules( + rulesJSON []byte, + dockerFiles []string, + composeFiles []string, + projectPath string, + logger *output.Logger, +) []*dsl.EnrichedDetection { + // Create executor and load rules + exec := &executor.ContainerRuleExecutor{} + if err := exec.LoadRules(rulesJSON); err != nil { + logger.Warning("Failed to parse container rules: %v", err) + return nil + } + + var allMatches []executor.RuleMatch + + // Execute rules on Dockerfiles + for _, dockerFilePath := range dockerFiles { + parser := docker.NewDockerfileParser() + dockerGraph, err := parser.ParseFile(dockerFilePath) + if err != nil { + logger.Warning("Failed to parse Dockerfile %s: %v", dockerFilePath, err) + continue + } + + matches := exec.ExecuteDockerfile(dockerGraph) + allMatches = append(allMatches, matches...) + } + + // Execute rules on docker-compose files + for _, composeFilePath := range composeFiles { + composeGraph, err := graph.ParseDockerCompose(composeFilePath) + if err != nil { + logger.Warning("Failed to parse docker-compose %s: %v", composeFilePath, err) + continue + } + + matches := exec.ExecuteCompose(composeGraph) + allMatches = append(allMatches, matches...) + } + + // Convert RuleMatch to EnrichedDetection + enriched := make([]*dsl.EnrichedDetection, 0, len(allMatches)) + for _, match := range allMatches { + // Make file path relative to project root + relPath, err := filepath.Rel(projectPath, match.FilePath) + if err != nil { + relPath = match.FilePath + } + + // Build description with service name if present (compose rules) + description := match.Message + if match.ServiceName != "" { + description = fmt.Sprintf("[Service: %s] %s", match.ServiceName, match.Message) + } + + // Parse CWE into slice format + cweList := []string{} + if match.CWE != "" { + cweList = []string{match.CWE} + } + + // Generate code snippet + snippet := generateCodeSnippet(match.FilePath, match.LineNumber, 3) + + detection := &dsl.EnrichedDetection{ + Detection: dsl.DataflowDetection{ + FunctionFQN: match.FilePath, // Use file path as function identifier for container rules + SinkLine: match.LineNumber, + Confidence: 1.0, // Container rules are deterministic + Scope: "file", + }, + Location: dsl.LocationInfo{ + FilePath: match.FilePath, + RelPath: relPath, + Line: match.LineNumber, + }, + Snippet: snippet, + Rule: dsl.RuleMetadata{ + ID: match.RuleID, + Name: match.RuleName, + Severity: strings.ToLower(match.Severity), // Normalize to lowercase for formatter + Description: description, + CWE: cweList, + }, + DetectionType: dsl.DetectionTypePattern, + } + + enriched = append(enriched, detection) + } + + return enriched +} + +// generateCodeSnippet creates a code snippet with context lines around the target line. +func generateCodeSnippet(filePath string, lineNumber int, contextLines int) dsl.CodeSnippet { + // Read file contents + content, err := os.ReadFile(filePath) + if err != nil { + return dsl.CodeSnippet{} + } + + lines := splitLines(string(content)) + if lineNumber < 1 || lineNumber > len(lines) { + return dsl.CodeSnippet{} + } + + // Calculate start and end lines (1-indexed) + startLine := lineNumber - contextLines + if startLine < 1 { + startLine = 1 + } + endLine := lineNumber + contextLines + if endLine > len(lines) { + endLine = len(lines) + } + + // Build snippet lines + var snippetLines []dsl.SnippetLine + for i := startLine; i <= endLine; i++ { + snippetLines = append(snippetLines, dsl.SnippetLine{ + Number: i, + Content: lines[i-1], // lines is 0-indexed + IsHighlight: i == lineNumber, + }) + } + + return dsl.CodeSnippet{ + Lines: snippetLines, + StartLine: startLine, + HighlightLine: lineNumber, + } +} + +// splitLines splits content into lines preserving empty lines. +func splitLines(content string) []string { + if content == "" { + return []string{} + } + // Split by newline but preserve empty lines + lines := []string{} + currentLine := "" + for _, ch := range content { + if ch == '\n' { + lines = append(lines, currentLine) + currentLine = "" + } else if ch != '\r' { // Skip carriage returns + currentLine += string(ch) + } + } + // Add last line if not empty or if content doesn't end with newline + if currentLine != "" || len(content) > 0 && content[len(content)-1] != '\n' { + lines = append(lines, currentLine) + } + return lines +} + // printDetections outputs detections in simple format (used by query command). func printDetections(rule dsl.RuleIR, detections []dsl.DataflowDetection) { fmt.Printf("\n[%s] %s (%s)\n", rule.Rule.Severity, rule.Rule.ID, rule.Rule.Name) diff --git a/sast-engine/dsl/loader.go b/sast-engine/dsl/loader.go index c314e5da..7ab83601 100644 --- a/sast-engine/dsl/loader.go +++ b/sast-engine/dsl/loader.go @@ -114,9 +114,18 @@ func (l *RuleLoader) loadRulesFromFile(filePath string) ([]RuleIR, error) { return nil, fmt.Errorf("failed to execute Python rules from %s: %w", filePath, err) } - // Parse JSON IR + // Parse JSON IR - try array format first (code analysis rules) var rules []RuleIR if err := json.Unmarshal(output, &rules); err != nil { + // If array parsing fails, check if it's a container rule (object format) + var containerTest struct { + Dockerfile []interface{} `json:"dockerfile"` + Compose []interface{} `json:"compose"` + } + if containerErr := json.Unmarshal(output, &containerTest); containerErr == nil { + // This is a container rule file, skip it (handled by LoadContainerRules) + return []RuleIR{}, nil + } return nil, fmt.Errorf("failed to parse rule JSON IR from %s: %w", filePath, err) } @@ -141,8 +150,8 @@ func (l *RuleLoader) loadRulesFromDirectory(dirPath string) ([]RuleIR, error) { // Load rules from this file rules, err := l.loadRulesFromFile(path) if err != nil { - // Log error but continue processing other files - fmt.Fprintf(os.Stderr, "Warning: failed to load rules from %s: %v\n", path, err) + // Silently skip files that fail to load (may be container rules) + //nolint:nilerr // Intentionally skip files that aren't code analysis rules return nil } @@ -154,11 +163,163 @@ func (l *RuleLoader) loadRulesFromDirectory(dirPath string) ([]RuleIR, error) { return nil, fmt.Errorf("failed to walk directory %s: %w", dirPath, err) } - if len(allRules) == 0 { - return nil, fmt.Errorf("no rules found in directory: %s", dirPath) + // It's OK to have zero code analysis rules (directory might only contain container rules) + return allRules, nil +} + +// LoadContainerRules loads container rules (Dockerfile/Compose) from Python DSL files. +// Returns JSON IR in format: {"dockerfile": [...], "compose": [...]}. +func (l *RuleLoader) LoadContainerRules() ([]byte, error) { + // Check if path is file or directory + info, err := os.Stat(l.RulesPath) + if err != nil { + return nil, fmt.Errorf("failed to access rules path: %w", err) } - return allRules, nil + var containerRulesJSON struct { + Dockerfile []map[string]interface{} `json:"dockerfile"` + Compose []map[string]interface{} `json:"compose"` + } + + // If single file, load directly + if !info.IsDir() { + jsonIR, err := l.loadContainerRulesFromFile(l.RulesPath) + if err != nil { + return nil, err + } + // Parse and merge + var fileRules struct { + Dockerfile []map[string]interface{} `json:"dockerfile"` + Compose []map[string]interface{} `json:"compose"` + } + if err := json.Unmarshal(jsonIR, &fileRules); err != nil { + return nil, fmt.Errorf("failed to parse container rules JSON: %w", err) + } + containerRulesJSON.Dockerfile = append(containerRulesJSON.Dockerfile, fileRules.Dockerfile...) + containerRulesJSON.Compose = append(containerRulesJSON.Compose, fileRules.Compose...) + } else { + // If directory, find all .py files and load them + err := filepath.Walk(l.RulesPath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip non-Python files + if info.IsDir() || filepath.Ext(path) != ".py" { + return nil + } + + // Load container rules from this file + jsonIR, err := l.loadContainerRulesFromFile(path) + if err != nil { + // Skip files that don't contain container rules (they might be code analysis rules) + //nolint:nilerr // Intentionally skip files that aren't container rules + return nil + } + + // Parse and merge + var fileRules struct { + Dockerfile []map[string]interface{} `json:"dockerfile"` + Compose []map[string]interface{} `json:"compose"` + } + if err := json.Unmarshal(jsonIR, &fileRules); err != nil { + // Skip files with invalid JSON (might not be container rules) + //nolint:nilerr // Intentionally skip files with wrong format + return nil + } + + containerRulesJSON.Dockerfile = append(containerRulesJSON.Dockerfile, fileRules.Dockerfile...) + containerRulesJSON.Compose = append(containerRulesJSON.Compose, fileRules.Compose...) + return nil + }) + + if err != nil { + return nil, fmt.Errorf("failed to walk directory %s: %w", l.RulesPath, err) + } + } + + // Return combined JSON + return json.Marshal(containerRulesJSON) +} + +// loadContainerRulesFromFile loads container rules from a single Python file or directory. +// Creates a temporary Python script to import and compile all rules, then executes it. +func (l *RuleLoader) loadContainerRulesFromFile(rulesPath string) ([]byte, error) { + // Create context with timeout + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Create a temporary Python script that compiles rules from the given path + compileScript := fmt.Sprintf(` +import json +import importlib.util +from pathlib import Path + +from rules import container_decorators, container_ir + +# Import rule file(s) +rule_path = Path('%s') + +if rule_path.is_file(): + # Single file - import it + spec = importlib.util.spec_from_file_location("user_rule", rule_path) + if spec and spec.loader: + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) +elif rule_path.is_dir(): + # Directory - import all .py files + for rule_file in rule_path.glob("*.py"): + if rule_file.name == "__init__.py": + continue + try: + spec = importlib.util.spec_from_file_location(rule_file.stem, rule_file) + if spec and spec.loader: + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + except Exception: + pass # Skip files that fail to import + +# Compile and output +json_ir = container_ir.compile_all_rules() +print(json.dumps(json_ir)) +`, rulesPath) + + // Execute Python script + var cmd *exec.Cmd + if isSandboxEnabled() { + // For sandbox mode, write script to temp file and execute with nsjail + tmpFile, err := os.CreateTemp("/tmp", "container_rules_*.py") + if err != nil { + return nil, fmt.Errorf("failed to create temp script file: %w", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.WriteString(compileScript); err != nil { + return nil, fmt.Errorf("failed to write temp script: %w", err) + } + tmpFile.Close() + + cmd = buildNsjailCommand(ctx, tmpFile.Name()) + } else { + // Direct Python execution (development mode) + cmd = exec.CommandContext(ctx, "python3", "-c", compileScript) + } + + output, err := cmd.Output() + if err != nil { + if ctx.Err() == context.DeadlineExceeded { + return nil, fmt.Errorf("Python rule execution timed out after 30s") + } + return nil, fmt.Errorf("failed to compile container rules: %w", err) + } + + // Validate it's valid JSON + var test interface{} + if err := json.Unmarshal(output, &test); err != nil { + return nil, fmt.Errorf("invalid JSON output from container rules: %w", err) + } + + return output, nil } // ExecuteRule executes a single rule against callgraph. @@ -187,6 +348,10 @@ func (l *RuleLoader) ExecuteRule(rule *RuleIR, cg *core.CallGraph) ([]DataflowDe case "logic_and", "logic_or", "logic_not": return l.executeLogic(matcherType, matcherMap, cg) + // Container matchers - skip silently (handled by ContainerRuleExecutor) + case "missing_instruction", "instruction", "service_has", "service_missing", "any_of", "all_of", "none_of": + return []DataflowDetection{}, nil + default: return nil, fmt.Errorf("unknown matcher type: %s", matcherType) } diff --git a/sast-engine/executor/container_executor.go b/sast-engine/executor/container_executor.go index 32f7cc99..01c29d9d 100644 --- a/sast-engine/executor/container_executor.go +++ b/sast-engine/executor/container_executor.go @@ -398,6 +398,9 @@ func (e *ContainerRuleExecutor) evaluateAllOf( return nil } + // Track first match to get line number + var firstMatch *RuleMatch + // All conditions must match for _, cond := range conditions { condMap, ok := cond.(map[string]interface{}) @@ -414,22 +417,20 @@ func (e *ContainerRuleExecutor) evaluateAllOf( Matcher: condMap, } - if e.evaluateDockerfileRule(tempRule, dockerfile) == nil { + match := e.evaluateDockerfileRule(tempRule, dockerfile) + if match == nil { // One condition didn't match, so all_of fails return nil } - } - // All conditions matched - return &RuleMatch{ - RuleID: rule.ID, - RuleName: rule.Name, - Severity: rule.Severity, - CWE: rule.CWE, - Message: rule.Message, - FilePath: dockerfile.FilePath, - LineNumber: 1, + // Capture first match to get line number and file path + if firstMatch == nil { + firstMatch = match + } } + + // All conditions matched, return first match with proper line number + return firstMatch } func (e *ContainerRuleExecutor) evaluateAnyOf( diff --git a/sast-engine/graph/initialize.go b/sast-engine/graph/initialize.go index d2fd6c65..3cee2254 100644 --- a/sast-engine/graph/initialize.go +++ b/sast-engine/graph/initialize.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path/filepath" + "strings" "sync" "time" @@ -40,8 +41,38 @@ func Initialize(directory string) *CodeGraph { for file := range fileChan { fileName := filepath.Base(file) fileExt := filepath.Ext(file) + fileBase := strings.ToLower(fileName) + localGraph := NewCodeGraph() + + // Check if it's a Dockerfile or docker-compose file + isDockerfile := strings.HasPrefix(fileBase, "dockerfile") + isDockerCompose := strings.Contains(fileBase, "docker-compose") && (fileExt == ".yml" || fileExt == ".yaml") + + if isDockerfile { + // Handle Dockerfile parsing + statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Parsing Dockerfile %s\033[0m", workerID, fileName) + if err := parseDockerfile(file, localGraph); err != nil { + Log("Error parsing Dockerfile:", err) + continue + } + statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Done processing Dockerfile %s\033[0m", workerID, fileName) + resultChan <- localGraph + progressChan <- 1 + continue + } else if isDockerCompose { + // Handle docker-compose.yml parsing + statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Parsing docker-compose %s\033[0m", workerID, fileName) + if err := parseDockerCompose(file, localGraph); err != nil { + Log("Error parsing docker-compose:", err) + continue + } + statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Done processing docker-compose %s\033[0m", workerID, fileName) + resultChan <- localGraph + progressChan <- 1 + continue + } - // Set the language based on file extension + // Handle tree-sitter based parsing for Java and Python switch fileExt { case ".java": parser.SetLanguage(java.GetLanguage()) @@ -58,7 +89,7 @@ func Initialize(directory string) *CodeGraph { Log("File not found:", err) continue } - + tree, err := parser.ParseCtx(context.TODO(), nil, sourceCode) if err != nil { Log("Error parsing file:", err) @@ -68,7 +99,6 @@ func Initialize(directory string) *CodeGraph { defer tree.Close() rootNode := tree.RootNode() - localGraph := NewCodeGraph() statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Building graph and traversing code %s\033[0m", workerID, fileName) buildGraphFromAST(rootNode, sourceCode, localGraph, nil, file) statusChan <- fmt.Sprintf("\033[32mWorker %d ....... Done processing file %s\033[0m", workerID, fileName) diff --git a/sast-engine/graph/parser_docker.go b/sast-engine/graph/parser_docker.go new file mode 100644 index 00000000..26714181 --- /dev/null +++ b/sast-engine/graph/parser_docker.go @@ -0,0 +1,341 @@ +package graph + +import ( + "fmt" + "strconv" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/docker" +) + +// parseDockerfile parses a Dockerfile and adds nodes to the CodeGraph. +// Each Dockerfile instruction becomes a node with unique ID including line and column. +func parseDockerfile(filePath string, graph *CodeGraph) error { + // Create Docker parser + parser := docker.NewDockerfileParser() + + // Parse Dockerfile + dockerGraph, err := parser.ParseFile(filePath) + if err != nil { + return fmt.Errorf("failed to parse Dockerfile: %w", err) + } + + // Convert each instruction to a CodeGraph node + for _, instruction := range dockerGraph.Instructions { + node := convertDockerInstructionToNode(instruction, filePath) + graph.AddNode(node) + } + + return nil +} + +// convertDockerInstructionToNode converts a DockerfileNode to a CodeGraph Node. +func convertDockerInstructionToNode(dockerNode *docker.DockerfileNode, filePath string) *Node { + // Generate unique ID with line number and column (column is always 1 for Dockerfile instructions) + // Format: "dockerfile::::" + lineNumber := dockerNode.LineNumber + columnNumber := 1 // Dockerfile instructions start at column 1 + nodeID := GenerateSha256(fmt.Sprintf("dockerfile:%s:%s:%d:%d", + filePath, dockerNode.InstructionType, lineNumber, columnNumber)) + + // Create CodeGraph node + node := &Node{ + ID: nodeID, + Type: "dockerfile_instruction", + Name: dockerNode.InstructionType, + LineNumber: uint32(lineNumber), + File: filePath, + MethodArgumentsValue: []string{dockerNode.RawInstruction}, + SourceLocation: &SourceLocation{ + File: filePath, + StartByte: 0, // Will be set if we need lazy loading + EndByte: 0, + }, + } + + // Store instruction-specific details in MethodArgumentsValue + // This allows DSL rules to query instruction arguments + node.MethodArgumentsValue = append(node.MethodArgumentsValue, + extractDockerInstructionArgs(dockerNode)...) + + return node +} + +// extractDockerInstructionArgs extracts arguments from a Docker instruction. +func extractDockerInstructionArgs(dockerNode *docker.DockerfileNode) []string { + args := []string{} + + switch dockerNode.InstructionType { + case "FROM": + if dockerNode.BaseImage != "" { + args = append(args, dockerNode.BaseImage) + if dockerNode.ImageTag != "" { + args = append(args, dockerNode.ImageTag) + } + if dockerNode.StageAlias != "" { + args = append(args, "AS", dockerNode.StageAlias) + } + } + case "USER": + if dockerNode.UserName != "" { + args = append(args, dockerNode.UserName) + } + if dockerNode.GroupName != "" { + args = append(args, dockerNode.GroupName) + } + case "EXPOSE": + for _, port := range dockerNode.Ports { + args = append(args, strconv.Itoa(port)) + } + case "ENV": + for key, value := range dockerNode.EnvVars { + args = append(args, key+"="+value) + } + case "ARG": + if dockerNode.ArgName != "" { + args = append(args, dockerNode.ArgName) + } + case "LABEL": + for key, value := range dockerNode.Labels { + args = append(args, key+"="+value) + } + case "RUN", "CMD", "ENTRYPOINT": + if len(dockerNode.CommandArray) > 0 { + args = append(args, dockerNode.CommandArray...) + } else { + args = append(args, dockerNode.Arguments...) + } + case "COPY", "ADD": + if len(dockerNode.SourcePaths) > 0 { + args = append(args, dockerNode.SourcePaths...) + } + if dockerNode.DestPath != "" { + args = append(args, dockerNode.DestPath) + } + case "WORKDIR": + if dockerNode.WorkDir != "" { + args = append(args, dockerNode.WorkDir) + } + case "VOLUME": + args = append(args, dockerNode.Volumes...) + case "HEALTHCHECK": + if dockerNode.HealthcheckCmd != "" { + args = append(args, dockerNode.HealthcheckCmd) + } + case "SHELL": + args = append(args, dockerNode.Shell...) + case "ONBUILD": + if dockerNode.OnBuildInstruction != "" { + args = append(args, dockerNode.OnBuildInstruction) + } + case "STOPSIGNAL": + if dockerNode.StopSignal != "" { + args = append(args, dockerNode.StopSignal) + } + } + + return args +} + +// parseDockerCompose parses a docker-compose.yml file and adds nodes to the CodeGraph. +// Each service becomes a node with unique ID including line number. +func parseDockerCompose(filePath string, graph *CodeGraph) error { + // Parse docker-compose file + composeGraph, err := ParseDockerCompose(filePath) + if err != nil { + return fmt.Errorf("failed to parse docker-compose: %w", err) + } + + // Convert each service to a CodeGraph node + for serviceName, serviceNode := range composeGraph.Services { + node := convertComposeServiceToNode(serviceName, serviceNode, filePath) + graph.AddNode(node) + } + + return nil +} + +// convertComposeServiceToNode converts a docker-compose service to a CodeGraph Node. +func convertComposeServiceToNode(serviceName string, serviceNode *YAMLNode, filePath string) *Node { + // Generate unique ID with line number (YAML doesn't provide column, default to 1) + // For YAML, we don't have exact line numbers from the parser, so we use a hash + // Format: "compose::" + nodeID := GenerateSha256(fmt.Sprintf("compose:%s:%s", filePath, serviceName)) + + // Create CodeGraph node + node := &Node{ + ID: nodeID, + Type: "compose_service", + Name: serviceName, + LineNumber: 1, // YAML parser doesn't provide line numbers, would need enhancement + File: filePath, + SourceLocation: &SourceLocation{ + File: filePath, + StartByte: 0, + EndByte: 0, + }, + } + + // Extract service properties and store in MethodArgumentsValue + // This allows DSL rules to query service configuration + node.MethodArgumentsValue = extractComposeServiceProperties(serviceNode) + + return node +} + +// extractComposeServiceProperties extracts properties from a docker-compose service. +func extractComposeServiceProperties(serviceNode *YAMLNode) []string { + props := []string{} + + // Extract common security-relevant properties + if imageNode := serviceNode.GetChild("image"); imageNode != nil { + props = append(props, "image="+imageNode.StringValue()) + } + + if privileged := serviceNode.GetChild("privileged"); privileged != nil && privileged.BoolValue() { + props = append(props, "privileged=true") + } + + if networkMode := serviceNode.GetChild("network_mode"); networkMode != nil { + props = append(props, "network_mode="+networkMode.StringValue()) + } + + if readOnly := serviceNode.GetChild("read_only"); readOnly != nil && readOnly.BoolValue() { + props = append(props, "read_only=true") + } + + // Extract volumes (check for Docker socket exposure) + if volumesNode := serviceNode.GetChild("volumes"); volumesNode != nil { + for _, vol := range volumesNode.ListValues() { + if volStr, ok := vol.(string); ok { + props = append(props, "volume="+volStr) + } + } + } + + // Extract ports + if portsNode := serviceNode.GetChild("ports"); portsNode != nil { + for _, port := range portsNode.ListValues() { + if portStr, ok := port.(string); ok { + props = append(props, "port="+portStr) + } + } + } + + // Extract capabilities + if capAdd := serviceNode.GetChild("cap_add"); capAdd != nil { + for _, cap := range capAdd.ListValues() { + if capStr, ok := cap.(string); ok { + props = append(props, "cap_add="+capStr) + } + } + } + + if capDrop := serviceNode.GetChild("cap_drop"); capDrop != nil { + for _, cap := range capDrop.ListValues() { + if capStr, ok := cap.(string); ok { + props = append(props, "cap_drop="+capStr) + } + } + } + + // Extract security_opt + if secOpt := serviceNode.GetChild("security_opt"); secOpt != nil { + for _, opt := range secOpt.ListValues() { + if optStr, ok := opt.(string); ok { + props = append(props, "security_opt="+optStr) + } + } + } + + // Extract environment variables + if envNode := serviceNode.GetChild("environment"); envNode != nil { + // Handle map format + if envNode.Children != nil { + for key := range envNode.Children { + props = append(props, "env="+key) + } + } + // Handle array format + for _, env := range envNode.ListValues() { + if envStr, ok := env.(string); ok { + props = append(props, "env="+envStr) + } + } + } + + return props +} + +// Helper functions to query Docker/Compose nodes (for DSL executor) + +// IsDockerNode checks if a node represents a Dockerfile instruction. +func IsDockerNode(node *Node) bool { + return node.Type == "dockerfile_instruction" +} + +// IsComposeNode checks if a node represents a docker-compose service. +func IsComposeNode(node *Node) bool { + return node.Type == "compose_service" +} + +// GetDockerInstructionType returns the instruction type for Docker nodes (e.g., "RUN", "FROM"). +func GetDockerInstructionType(node *Node) string { + if !IsDockerNode(node) { + return "" + } + return node.Name +} + +// HasDockerInstructionArg checks if a Docker node has a specific argument. +func HasDockerInstructionArg(node *Node, arg string) bool { + if !IsDockerNode(node) { + return false + } + for _, value := range node.MethodArgumentsValue { + if value == arg { + return true + } + } + return false +} + +// GetComposeServiceProperty gets a property value from a compose service node. +func GetComposeServiceProperty(node *Node, property string) string { + if !IsComposeNode(node) { + return "" + } + prefix := property + "=" + for _, value := range node.MethodArgumentsValue { + if len(value) > len(prefix) && value[:len(prefix)] == prefix { + return value[len(prefix):] + } + } + return "" +} + +// HasComposeServiceProperty checks if a compose service has a specific property. +func HasComposeServiceProperty(node *Node, property string, expectedValue ...string) bool { + if !IsComposeNode(node) { + return false + } + + if len(expectedValue) == 0 { + // Just check if property exists + prefix := property + "=" + for _, value := range node.MethodArgumentsValue { + if len(value) >= len(prefix) && value[:len(prefix)] == prefix { + return true + } + } + return false + } + + // Check for specific value + expected := property + "=" + expectedValue[0] + for _, value := range node.MethodArgumentsValue { + if value == expected { + return true + } + } + return false +} diff --git a/sast-engine/graph/parser_docker_test.go b/sast-engine/graph/parser_docker_test.go new file mode 100644 index 00000000..6dcc7c27 --- /dev/null +++ b/sast-engine/graph/parser_docker_test.go @@ -0,0 +1,416 @@ +package graph + +import ( + "os" + "path/filepath" + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/docker" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseDockerfile(t *testing.T) { + // Create a temporary Dockerfile + tmpDir := t.TempDir() + dockerfilePath := filepath.Join(tmpDir, "Dockerfile") + + dockerfileContent := `FROM ubuntu:22.04 +RUN apt-get update && apt-get install -y curl +USER appuser +EXPOSE 8080 +WORKDIR /app +COPY . /app +ENV DEBUG=true +CMD ["./start.sh"] +` + + err := os.WriteFile(dockerfilePath, []byte(dockerfileContent), 0644) + require.NoError(t, err) + + // Parse Dockerfile + graph := NewCodeGraph() + err = parseDockerfile(dockerfilePath, graph) + require.NoError(t, err) + + // Verify nodes created + assert.Len(t, graph.Nodes, 8, "Should create 8 nodes for 8 instructions") + + // Verify node types + for _, node := range graph.Nodes { + assert.Equal(t, "dockerfile_instruction", node.Type) + assert.NotEmpty(t, node.ID) + assert.NotEmpty(t, node.Name) // Instruction type (FROM, RUN, etc.) + assert.Equal(t, dockerfilePath, node.File) + } +} + +func TestParseDockerfileWithMultiStage(t *testing.T) { + tmpDir := t.TempDir() + dockerfilePath := filepath.Join(tmpDir, "Dockerfile") + + dockerfileContent := `FROM golang:1.21 AS builder +WORKDIR /build +COPY . . +RUN go build -o app + +FROM alpine:latest +COPY --from=builder /build/app /app +USER nobody +CMD ["/app"] +` + + err := os.WriteFile(dockerfilePath, []byte(dockerfileContent), 0644) + require.NoError(t, err) + + graph := NewCodeGraph() + err = parseDockerfile(dockerfilePath, graph) + require.NoError(t, err) + + // Should create nodes for all instructions + assert.GreaterOrEqual(t, len(graph.Nodes), 8) +} + +func TestParseDockerfileWithError(t *testing.T) { + graph := NewCodeGraph() + err := parseDockerfile("/nonexistent/Dockerfile", graph) + assert.Error(t, err) +} + +func TestParseDockerCompose(t *testing.T) { + tmpDir := t.TempDir() + composePath := filepath.Join(tmpDir, "docker-compose.yml") + + composeContent := `version: '3.8' +services: + web: + image: nginx:latest + privileged: true + network_mode: host + ports: + - "8080:80" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + - DEBUG=true + db: + image: postgres:15 + read_only: true +` + + err := os.WriteFile(composePath, []byte(composeContent), 0644) + require.NoError(t, err) + + // Parse docker-compose.yml + graph := NewCodeGraph() + err = parseDockerCompose(composePath, graph) + require.NoError(t, err) + + // Verify nodes created (2 services) + assert.Len(t, graph.Nodes, 2, "Should create 2 nodes for 2 services") + + // Verify node types + for _, node := range graph.Nodes { + assert.Equal(t, "compose_service", node.Type) + assert.NotEmpty(t, node.ID) + assert.NotEmpty(t, node.Name) // Service name + assert.Equal(t, composePath, node.File) + assert.NotEmpty(t, node.MethodArgumentsValue) // Properties + } +} + +func TestParseDockerComposeWithError(t *testing.T) { + graph := NewCodeGraph() + err := parseDockerCompose("/nonexistent/docker-compose.yml", graph) + assert.Error(t, err) +} + +func TestConvertDockerInstructionToNode(t *testing.T) { + tests := []struct { + name string + dockerNode *docker.DockerfileNode + expectedType string + expectedName string + }{ + { + name: "FROM instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "FROM", + BaseImage: "ubuntu", + ImageTag: "22.04", + LineNumber: 1, + }, + expectedType: "dockerfile_instruction", + expectedName: "FROM", + }, + { + name: "USER instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "USER", + UserName: "appuser", + LineNumber: 5, + }, + expectedType: "dockerfile_instruction", + expectedName: "USER", + }, + { + name: "EXPOSE instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "EXPOSE", + Ports: []int{8080, 8443}, + LineNumber: 10, + }, + expectedType: "dockerfile_instruction", + expectedName: "EXPOSE", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + node := convertDockerInstructionToNode(tt.dockerNode, "/test/Dockerfile") + assert.Equal(t, tt.expectedType, node.Type) + assert.Equal(t, tt.expectedName, node.Name) + assert.NotEmpty(t, node.ID) + assert.Equal(t, uint32(tt.dockerNode.LineNumber), node.LineNumber) + }) + } +} + +func TestExtractDockerInstructionArgs(t *testing.T) { + tests := []struct { + name string + dockerNode *docker.DockerfileNode + expected []string + }{ + { + name: "FROM with tag", + dockerNode: &docker.DockerfileNode{ + InstructionType: "FROM", + BaseImage: "ubuntu", + ImageTag: "22.04", + }, + expected: []string{"ubuntu", "22.04"}, + }, + { + name: "USER instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "USER", + UserName: "appuser", + }, + expected: []string{"appuser"}, + }, + { + name: "EXPOSE instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "EXPOSE", + Ports: []int{8080, 8443}, + }, + expected: []string{"8080", "8443"}, + }, + { + name: "ENV instruction", + dockerNode: &docker.DockerfileNode{ + InstructionType: "ENV", + EnvVars: map[string]string{ + "DEBUG": "true", + "PORT": "8080", + }, + }, + expected: []string{"DEBUG=true", "PORT=8080"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + args := extractDockerInstructionArgs(tt.dockerNode) + assert.ElementsMatch(t, tt.expected, args) + }) + } +} + +func TestConvertComposeServiceToNode(t *testing.T) { + yamlGraph, err := ParseYAMLString(` +version: '3.8' +services: + web: + image: nginx:latest + privileged: true + ports: + - "8080:80" +`, "docker-compose.yml") + require.NoError(t, err) + + composeGraph := NewComposeGraph(yamlGraph, "docker-compose.yml") + serviceNode := composeGraph.Services["web"] + require.NotNil(t, serviceNode) + + node := convertComposeServiceToNode("web", serviceNode, "docker-compose.yml") + + assert.Equal(t, "compose_service", node.Type) + assert.Equal(t, "web", node.Name) + assert.NotEmpty(t, node.ID) + assert.Equal(t, "docker-compose.yml", node.File) + assert.NotEmpty(t, node.MethodArgumentsValue) +} + +func TestExtractComposeServiceProperties(t *testing.T) { + yamlGraph, err := ParseYAMLString(` +services: + web: + image: nginx:latest + privileged: true + network_mode: host + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + DEBUG: "true" +`, "docker-compose.yml") + require.NoError(t, err) + + composeGraph := NewComposeGraph(yamlGraph, "docker-compose.yml") + serviceNode := composeGraph.Services["web"] + require.NotNil(t, serviceNode) + + props := extractComposeServiceProperties(serviceNode) + + // Verify expected properties are extracted + assert.Contains(t, props, "image=nginx:latest") + assert.Contains(t, props, "privileged=true") + assert.Contains(t, props, "network_mode=host") + + // Verify volume contains docker socket + hasDockerSocket := false + for _, prop := range props { + if contains(prop, "/var/run/docker.sock") { + hasDockerSocket = true + break + } + } + assert.True(t, hasDockerSocket, "Should extract Docker socket volume") +} + +func TestIsDockerNode(t *testing.T) { + dockerNode := &Node{Type: "dockerfile_instruction"} + assert.True(t, IsDockerNode(dockerNode)) + + nonDockerNode := &Node{Type: "function_definition"} + assert.False(t, IsDockerNode(nonDockerNode)) +} + +func TestIsComposeNode(t *testing.T) { + composeNode := &Node{Type: "compose_service"} + assert.True(t, IsComposeNode(composeNode)) + + nonComposeNode := &Node{Type: "class_declaration"} + assert.False(t, IsComposeNode(nonComposeNode)) +} + +func TestGetDockerInstructionType(t *testing.T) { + node := &Node{ + Type: "dockerfile_instruction", + Name: "RUN", + } + assert.Equal(t, "RUN", GetDockerInstructionType(node)) + + nonDockerNode := &Node{Type: "function_definition"} + assert.Empty(t, GetDockerInstructionType(nonDockerNode)) +} + +func TestHasDockerInstructionArg(t *testing.T) { + node := &Node{ + Type: "dockerfile_instruction", + MethodArgumentsValue: []string{"ubuntu", "22.04", "apt-get"}, + } + + assert.True(t, HasDockerInstructionArg(node, "ubuntu")) + assert.True(t, HasDockerInstructionArg(node, "22.04")) + assert.False(t, HasDockerInstructionArg(node, "nonexistent")) +} + +func TestGetComposeServiceProperty(t *testing.T) { + node := &Node{ + Type: "compose_service", + MethodArgumentsValue: []string{ + "image=nginx:latest", + "privileged=true", + "network_mode=host", + }, + } + + assert.Equal(t, "nginx:latest", GetComposeServiceProperty(node, "image")) + assert.Equal(t, "true", GetComposeServiceProperty(node, "privileged")) + assert.Equal(t, "host", GetComposeServiceProperty(node, "network_mode")) + assert.Empty(t, GetComposeServiceProperty(node, "nonexistent")) +} + +func TestHasComposeServiceProperty(t *testing.T) { + node := &Node{ + Type: "compose_service", + MethodArgumentsValue: []string{ + "image=nginx:latest", + "privileged=true", + "port=8080:80", + }, + } + + // Check existence + assert.True(t, HasComposeServiceProperty(node, "image")) + assert.True(t, HasComposeServiceProperty(node, "privileged")) + assert.False(t, HasComposeServiceProperty(node, "nonexistent")) + + // Check specific value + assert.True(t, HasComposeServiceProperty(node, "image", "nginx:latest")) + assert.True(t, HasComposeServiceProperty(node, "privileged", "true")) + assert.False(t, HasComposeServiceProperty(node, "image", "apache")) +} + +func TestInitializeWithDockerFiles(t *testing.T) { + // Create test directory with Docker files + tmpDir := t.TempDir() + + // Create Dockerfile + dockerfilePath := filepath.Join(tmpDir, "Dockerfile") + dockerfileContent := `FROM ubuntu:22.04 +USER appuser +` + err := os.WriteFile(dockerfilePath, []byte(dockerfileContent), 0644) + require.NoError(t, err) + + // Create docker-compose.yml + composePath := filepath.Join(tmpDir, "docker-compose.yml") + composeContent := `version: '3.8' +services: + web: + image: nginx +` + err = os.WriteFile(composePath, []byte(composeContent), 0644) + require.NoError(t, err) + + // Initialize CodeGraph + graph := Initialize(tmpDir) + + // Verify both files were parsed + assert.GreaterOrEqual(t, len(graph.Nodes), 3, "Should have nodes from both Dockerfile and docker-compose") + + // Verify we have both Docker and Compose nodes + hasDockerNode := false + hasComposeNode := false + for _, node := range graph.Nodes { + if node.Type == "dockerfile_instruction" { + hasDockerNode = true + } + if node.Type == "compose_service" { + hasComposeNode = true + } + } + + assert.True(t, hasDockerNode, "Should have Dockerfile nodes") + assert.True(t, hasComposeNode, "Should have Compose nodes") +} + +// Helper function to check if string contains substring. +func contains(s, substr string) bool { + return len(s) >= len(substr) && s[:len(substr)] == substr || + (len(s) > len(substr) && s[len(s)-len(substr):] == substr) || + (len(s) > len(substr)*2 && s[len(s)/2-len(substr)/2:len(s)/2+len(substr)/2+1] == substr) +} diff --git a/sast-engine/graph/utils.go b/sast-engine/graph/utils.go index 7cd6b44e..30f77533 100644 --- a/sast-engine/graph/utils.go +++ b/sast-engine/graph/utils.go @@ -242,7 +242,7 @@ func extractMethodName(node *sitter.Node, sourceCode []byte, filepath string) (s return methodName, methodID } -// getFiles walks through a directory and returns all Java and Python source files. +// getFiles walks through a directory and returns all source files (Java, Python, Dockerfile, docker-compose). func getFiles(directory string) ([]string, error) { var files []string err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { @@ -250,9 +250,19 @@ func getFiles(directory string) ([]string, error) { return err } if !info.IsDir() { - // append only java and python files + // append java, python, dockerfile, and docker-compose files ext := filepath.Ext(path) - if ext == ".java" || ext == ".py" { + base := filepath.Base(path) + baseLower := strings.ToLower(base) + + switch { + case ext == ".java" || ext == ".py": + files = append(files, path) + case strings.HasPrefix(baseLower, "dockerfile"): + // Match Dockerfile, Dockerfile.dev, dockerfile, etc. + files = append(files, path) + case strings.Contains(baseLower, "docker-compose") && (ext == ".yml" || ext == ".yaml"): + // Match docker-compose.yml, docker-compose.yaml, etc. files = append(files, path) } } diff --git a/sast-engine/output/text_formatter.go b/sast-engine/output/text_formatter.go index 1aee4a78..4acf6b06 100644 --- a/sast-engine/output/text_formatter.go +++ b/sast-engine/output/text_formatter.go @@ -95,8 +95,8 @@ func (f *TextFormatter) writeSeverityGroup(severity string, detections []*dsl.En fmt.Fprintln(f.writer, title) fmt.Fprintln(f.writer) - // Critical and high get detailed output - showDetailed := severity == "critical" || severity == "high" + // All severities get detailed output with code snippets except info + showDetailed := severity != "info" for _, det := range detections { if showDetailed {