SecurityLab-UCD
diff --git a/‎fuzz/ast_utils.py‎
Lines changed: 201 additions & 0 deletions b/‎fuzz/ast_utils.py‎
Lines changed: 201 additions & 0 deletions
diff --git a/‎fuzz/clean_fuzz_dir.py‎
Lines changed: 46 additions & 0 deletions b/‎fuzz/clean_fuzz_dir.py‎
Lines changed: 46 additions & 0 deletions
@@ -0,0 +1,201 @@
+# ast_utils.py
+import ast
+import astunparse
+import logging
+import os
+import re
+
+
+class TestFunctionTransformer(ast.NodeTransformer):
+    """AST transformer for test function conversion"""
+
+    def visit_FunctionDef(self, node):
+        # First, process main function (remove it)
+        if node.name == "main":
+            return None
+
+        # Process TestInput/TestOneInput functions
+        if node.name in ["TestInput", "TestOneInput"]:
+            # a. Record parameter name (assume only one parameter)
+            param_name = None
+            if node.args.args:
+                param_name = node.args.args[0].arg
+
+            # b. Rename function to test_
+            node.name = "test_"
+
+            # c. Remove parameters (set argument list to empty)
+            node.args = ast.arguments(
+                posonlyargs=[],
+                args=[],
+                vararg=None,
+                kwonlyargs=[],
+                kw_defaults=[],
+                kwarg=None,
+                defaults=[],
+            )
+
+            # d. Insert param_name = b"" at the beginning of the function body
+            if param_name:
+                self.add_param_assignment(node, param_name)
+
+        # Ensure traversing child nodes continues
+        self.generic_visit(node)
+        return node
+
+    def add_param_assignment(self, node, param_name):
+        """Add param_name = b"..." at the beginning of the function body with an inline comment"""
+        # Create a compound value containing assignment and comment
+        value_with_comment = ast.JoinedStr(
+            values=[
+                ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
+                ast.Constant(value="  # This is a test template"),
+            ]
+        )
+
+        # Create an assignment node
+        assign_node = ast.Assign(
+            targets=[ast.Name(id=param_name, ctx=ast.Store())], value=value_with_comment
+        )
+
+        # If there is a docstring, insert after the docstring
+        if (
+            node.body
+            and isinstance(node.body[0], ast.Expr)
+            and isinstance(node.body[0].value, ast.Constant)
+            and isinstance(node.body[0].value.value, str)
+        ):
+            # Insert right after the docstring
+            node.body.insert(1, assign_node)
+        else:
+            # Insert at the beginning of the function
+            node.body.insert(0, assign_node)
+
+    def remove_print_param(self, node, param_name):
+        """Remove print statements for the specific parameter"""
+        new_body = []
+        for stmt in node.body:
+            # Skip print(param_name) calls
+            if (
+                isinstance(stmt, ast.Expr)
+                and isinstance(stmt.value, ast.Call)
+                and isinstance(stmt.value.func, ast.Name)
+                and stmt.value.func.id == "print"
+                and any(
+                    isinstance(arg, ast.Name) and arg.id == param_name
+                    for arg in stmt.value.args
+                )
+            ):
+                continue
+            new_body.append(stmt)
+        node.body = new_body
+
+    def visit_If(self, node):
+        """Remove if __name__ == '__main__' blocks"""
+        # Check if this is the main function guard
+        if (
+            isinstance(node.test, ast.Compare)
+            and isinstance(node.test.left, ast.Name)
+            and node.test.left.id == "__name__"
+            and isinstance(node.test.ops[0], ast.Eq)
+            and isinstance(node.test.comparators[0], ast.Constant)
+            and node.test.comparators[0].value == "__main__"
+        ):
+
+            # Remove the entire if block
+            return None
+
+        # Ensure traversing child nodes continues
+        self.generic_visit(node)
+        return node
+
+
+class TestGenTransformer(ast.NodeTransformer):
+    def __init__(self, idx, fuzz_input):
+        self.idx = idx
+        self.fuzz_input = fuzz_input
+        self.found_test_function = False
+
+    def visit_FunctionDef(self, node):
+        if node.name == "test_":
+            self.found_test_function = True
+
+            # 1. Modify function name
+            node.name = f"test_{self.idx}"
+
+            # 2. Find and replace assignment statements with the special comment
+            for i, stmt in enumerate(node.body):
+                # Check if it's an assignment statement
+                if isinstance(stmt, ast.Assign):
+                    # Check if the value is a compound value with a comment
+                    if (
+                        isinstance(stmt.value, ast.JoinedStr)
+                        and len(stmt.value.values) >= 2
+                        and isinstance(stmt.value.values[1], ast.Constant)
+                        and stmt.value.values[1].value == "  # This is a test template"
+                    ):
+                        # Replace with new fuzz input
+                        stmt.value = ast.Constant(value=self.fuzz_input)
+                        break
+        return node
+
+
+def generate_test_template(target_name: str, repo_path: str):
+    """
+    Generate Python test template using AST for more precise code transformations
+    """
+    src_file = os.path.join(repo_path, target_name)
+    logging.info(f"Generating test template for {src_file}")
+    if not src_file.endswith(".py"):
+        src_file += ".py"
+    if not os.path.exists(src_file):
+        logging.error(f"Source target file not found: {src_file}")
+        return None
+
+    with open(src_file, "r", encoding="utf-8") as f:
+        original_code = f.read()
+
+    # --- 1. Keep shebang but remove license comments ---
+    shebang = ""
+    if original_code.startswith("#!"):
+        shebang, original_code = original_code.split("\n", 1)
+        shebang += "\n"
+
+    license_pattern = re.compile(
+        r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
+        re.IGNORECASE | re.MULTILINE,
+    )
+    code_no_license = re.sub(license_pattern, "", original_code, count=1)
+
+    # --- 2. Parse code to AST ---
+    try:
+        tree = ast.parse(code_no_license)
+    except SyntaxError as e:
+        logging.error(f"Syntax error in {src_file}: {e}")
+        return None
+
+    # --- 3. AST transformation ---
+    transformer = TestFunctionTransformer()
+    new_tree = transformer.visit(tree)
+    ast.fix_missing_locations(new_tree)
+
+    # --- 4. Generate cleaned code ---
+    cleaned_code = astunparse.unparse(new_tree)
+
+    # --- 5. Output to tests-gen directory ---
+    template_dir = os.path.join(repo_path, "tests-gen")
+    os.makedirs(template_dir, exist_ok=True)
+
+    init_path = os.path.join(template_dir, "__init__.py")
+    if not os.path.exists(init_path):
+        with open(init_path, "w", encoding="utf-8") as f:
+            f.write("")
+
+    # Use the base part of target_name (remove extension) as the output file name
+    base_target_name = os.path.splitext(target_name)[0]
+    template_path = os.path.join(template_dir, f"{base_target_name}.py")
+    with open(template_path, "w", encoding="utf-8") as f:
+        f.write(shebang + cleaned_code.strip() + "\n")
+
+    logging.info(f"Generated cleaned template: {template_path}")
+    return template_path
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+import os
+import shutil
+import fire
+
+def clean_project_dirs(root_dir="fuzz/oss-fuzz/projects"):
+    """
+    清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹
+
+    Args:
+        root_dir (str): 项目的根目录路径
+    """
+    removed_files = 0
+    removed_dirs = 0
+
+    for project in os.listdir(root_dir):
+        project_path = os.path.join(root_dir, project)
+        if not os.path.isdir(project_path):
+            continue
+
+        # 删除 fuzz_inputs 文件夹
+        fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs")
+        if os.path.isdir(fuzz_inputs_path):
+            shutil.rmtree(fuzz_inputs_path)
+            print(f"🗑️ Removed dir: {fuzz_inputs_path}")
+            removed_dirs += 1
+
+        # 删除 tests-gen 文件夹
+        tests_gen_path = os.path.join(project_path, "tests-gen")
+        if os.path.isdir(tests_gen_path):
+            shutil.rmtree(tests_gen_path)
+            print(f"🗑️ Removed dir: {tests_gen_path}")
+            removed_dirs += 1
+
+        # 如果需要删除 .inputs.py 文件，取消注释以下代码
+        # for fname in os.listdir(project_path):
+        #     if fname.endswith(".inputs.py"):
+        #         file_path = os.path.join(project_path, fname)
+        #         os.remove(file_path)
+        #         print(f"🗑️ Removed file: {file_path}")
+        #         removed_files += 1
+
+    print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
+
+if __name__ == "__main__":
+    fire.Fire(clean_project_dirs)