Skip to content

Commit f80b2a9

Browse files
authored
Python branch2 (#23)
* Upload the python project Fuzz test script valid_projects.txt: Python project list script_fuzz_py_final.sh: Single project test script script_fuzz_py_batch_final.sh: Batch projects test script * feat: Add OSS-Fuzz submodule tracking main branch * chore: Switch oss-fuzz submodule to personal fork * Switch oss-fuzz submodule to personal fork * move the valid_project file * move the .py file * create build_oss_fuzz.py * create run_fuzz_target.py * split the pool.py into build_oss_fuzz and run_fuzz_target * delete the .sh files * translate to english * fuzz_runner_pool.py:74 * edit stdout * 添加空值检查 * modify stdout, delete pool.py * indentation level check * Remove build log write files * Remove build log write files * use logging mdule * use precise logging * use logging * use precise exception log info * correct type problems * correct some mistakes * correct some mistakes * correct * modify discover fuzz target * modify the oss-fuzz dir * Redirect the output to an empty device without retaining any output * add always yes * split the build script * split the build script * build scripts test successfully * build.py * collect targets first and then run * list, tuple, ptional * list,tuple,optional * translate * build_fuzz.py, run_fuzz_all_target.py * correct * original * record input * Fatal error in main program: cannot unpack non-iterable NoneType object * name 'target_functions' is not defined fuzz_util_instrumented.py does not seem to exist * 准备大改 * create modify file script add"print(data)" to each fuzz_.py * build_fuzzer script * modify tuple dict list * remove stdout stderr in build fuzz * test successfully * rename run fuzz ds to run fuzz print1 * add print(data ) to fuzz target and rename the file with "_print1" * oss -fuzz change * rename the print1.py * modify the exegesis * modify * modify log name * type error * list dict tuple * type error * construct errors module * run_command module * combine the run_command instrument to one file * remove the run_command * modify * mytype check * mytype * mytype * mytype * translate * remove run command * timeout - shell instrument * correct in out error and return Popen directly * ready to change from rust script * 修改build_image * y/n * correct repo_id and repo_name in main * test build_image 构建日志 * add build_fuzzer * fuzz and testgen * correct run_one_target * fuzz ok * transform * testgen need to ^ help: add `;` here * test successful * example output project * type error * English ver * delete privious scripts * python template * python template * correct the template * ver2 wrong template * ok * testgen file change into copy the original and then add input_data =b"" * only read b' ' inputs * remove transform * clean the inputs and testgen * set max_file * max input file * input b"" * modify the method of writing files into PIPE * use max total time; remove size monitor * 修改并行错误, 写入方法还是直接写入文件 延时控制为max total time * 补充日志输出 * 模板生成成功 * testgen完成 * 删除冗余, 修改代码 * 更换为未删除冗余版本 * template插入data=b"" 函数header改为test_() * translation * A complete script for building the processes of build_image, build_fuzzer, fuzz, transform, and testgen, suitable for Python projects. * delete some imports * use ASTfor transform and testgen * use AST * Set up command line arguments * use fire * use FIre * black formatter * deal the data after closing the file * when doing line-matching, check for # This is a test template in the line * when doing line-matching, check for # This is a test template in the line * delete UnicodeDecodeError * apply transformations on the original unmodified fuzz targets. * put all AST related class/module/function in another file and import from there. * put all AST related class/module/function in another file and import from there. * translation * use relative address * use relative address * remove the class outside of the function * add tuple's type * Properly handle indentation and process data after the file is closed. * correct tne relative path * add black to requirements.txt
1 parent aaba59b commit f80b2a9

File tree

5 files changed

+495
-306
lines changed

5 files changed

+495
-306
lines changed

fuzz/ast_utils.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
# ast_utils.py
2+
import ast
3+
import astunparse
4+
import logging
5+
import os
6+
import re
7+
8+
9+
class TestFunctionTransformer(ast.NodeTransformer):
10+
"""AST transformer for test function conversion"""
11+
12+
def visit_FunctionDef(self, node):
13+
# First, process main function (remove it)
14+
if node.name == "main":
15+
return None
16+
17+
# Process TestInput/TestOneInput functions
18+
if node.name in ["TestInput", "TestOneInput"]:
19+
# a. Record parameter name (assume only one parameter)
20+
param_name = None
21+
if node.args.args:
22+
param_name = node.args.args[0].arg
23+
24+
# b. Rename function to test_
25+
node.name = "test_"
26+
27+
# c. Remove parameters (set argument list to empty)
28+
node.args = ast.arguments(
29+
posonlyargs=[],
30+
args=[],
31+
vararg=None,
32+
kwonlyargs=[],
33+
kw_defaults=[],
34+
kwarg=None,
35+
defaults=[],
36+
)
37+
38+
# d. Insert param_name = b"" at the beginning of the function body
39+
if param_name:
40+
self.add_param_assignment(node, param_name)
41+
42+
# Ensure traversing child nodes continues
43+
self.generic_visit(node)
44+
return node
45+
46+
def add_param_assignment(self, node, param_name):
47+
"""Add param_name = b"..." at the beginning of the function body with an inline comment"""
48+
# Create a compound value containing assignment and comment
49+
value_with_comment = ast.JoinedStr(
50+
values=[
51+
ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
52+
ast.Constant(value=" # This is a test template"),
53+
]
54+
)
55+
56+
# Create an assignment node
57+
assign_node = ast.Assign(
58+
targets=[ast.Name(id=param_name, ctx=ast.Store())], value=value_with_comment
59+
)
60+
61+
# If there is a docstring, insert after the docstring
62+
if (
63+
node.body
64+
and isinstance(node.body[0], ast.Expr)
65+
and isinstance(node.body[0].value, ast.Constant)
66+
and isinstance(node.body[0].value.value, str)
67+
):
68+
# Insert right after the docstring
69+
node.body.insert(1, assign_node)
70+
else:
71+
# Insert at the beginning of the function
72+
node.body.insert(0, assign_node)
73+
74+
def remove_print_param(self, node, param_name):
75+
"""Remove print statements for the specific parameter"""
76+
new_body = []
77+
for stmt in node.body:
78+
# Skip print(param_name) calls
79+
if (
80+
isinstance(stmt, ast.Expr)
81+
and isinstance(stmt.value, ast.Call)
82+
and isinstance(stmt.value.func, ast.Name)
83+
and stmt.value.func.id == "print"
84+
and any(
85+
isinstance(arg, ast.Name) and arg.id == param_name
86+
for arg in stmt.value.args
87+
)
88+
):
89+
continue
90+
new_body.append(stmt)
91+
node.body = new_body
92+
93+
def visit_If(self, node):
94+
"""Remove if __name__ == '__main__' blocks"""
95+
# Check if this is the main function guard
96+
if (
97+
isinstance(node.test, ast.Compare)
98+
and isinstance(node.test.left, ast.Name)
99+
and node.test.left.id == "__name__"
100+
and isinstance(node.test.ops[0], ast.Eq)
101+
and isinstance(node.test.comparators[0], ast.Constant)
102+
and node.test.comparators[0].value == "__main__"
103+
):
104+
105+
# Remove the entire if block
106+
return None
107+
108+
# Ensure traversing child nodes continues
109+
self.generic_visit(node)
110+
return node
111+
112+
113+
class TestGenTransformer(ast.NodeTransformer):
114+
def __init__(self, idx, fuzz_input):
115+
self.idx = idx
116+
self.fuzz_input = fuzz_input
117+
self.found_test_function = False
118+
119+
def visit_FunctionDef(self, node):
120+
if node.name == "test_":
121+
self.found_test_function = True
122+
123+
# 1. Modify function name
124+
node.name = f"test_{self.idx}"
125+
126+
# 2. Find and replace assignment statements with the special comment
127+
for i, stmt in enumerate(node.body):
128+
# Check if it's an assignment statement
129+
if isinstance(stmt, ast.Assign):
130+
# Check if the value is a compound value with a comment
131+
if (
132+
isinstance(stmt.value, ast.JoinedStr)
133+
and len(stmt.value.values) >= 2
134+
and isinstance(stmt.value.values[1], ast.Constant)
135+
and stmt.value.values[1].value == " # This is a test template"
136+
):
137+
# Replace with new fuzz input
138+
stmt.value = ast.Constant(value=self.fuzz_input)
139+
break
140+
return node
141+
142+
143+
def generate_test_template(target_name: str, repo_path: str):
144+
"""
145+
Generate Python test template using AST for more precise code transformations
146+
"""
147+
src_file = os.path.join(repo_path, target_name)
148+
logging.info(f"Generating test template for {src_file}")
149+
if not src_file.endswith(".py"):
150+
src_file += ".py"
151+
if not os.path.exists(src_file):
152+
logging.error(f"Source target file not found: {src_file}")
153+
return None
154+
155+
with open(src_file, "r", encoding="utf-8") as f:
156+
original_code = f.read()
157+
158+
# --- 1. Keep shebang but remove license comments ---
159+
shebang = ""
160+
if original_code.startswith("#!"):
161+
shebang, original_code = original_code.split("\n", 1)
162+
shebang += "\n"
163+
164+
license_pattern = re.compile(
165+
r"^(?:\s*#.*\n)*\s*#.*limitations\s+under\s+the\s+license.*\n",
166+
re.IGNORECASE | re.MULTILINE,
167+
)
168+
code_no_license = re.sub(license_pattern, "", original_code, count=1)
169+
170+
# --- 2. Parse code to AST ---
171+
try:
172+
tree = ast.parse(code_no_license)
173+
except SyntaxError as e:
174+
logging.error(f"Syntax error in {src_file}: {e}")
175+
return None
176+
177+
# --- 3. AST transformation ---
178+
transformer = TestFunctionTransformer()
179+
new_tree = transformer.visit(tree)
180+
ast.fix_missing_locations(new_tree)
181+
182+
# --- 4. Generate cleaned code ---
183+
cleaned_code = astunparse.unparse(new_tree)
184+
185+
# --- 5. Output to tests-gen directory ---
186+
template_dir = os.path.join(repo_path, "tests-gen")
187+
os.makedirs(template_dir, exist_ok=True)
188+
189+
init_path = os.path.join(template_dir, "__init__.py")
190+
if not os.path.exists(init_path):
191+
with open(init_path, "w", encoding="utf-8") as f:
192+
f.write("")
193+
194+
# Use the base part of target_name (remove extension) as the output file name
195+
base_target_name = os.path.splitext(target_name)[0]
196+
template_path = os.path.join(template_dir, f"{base_target_name}.py")
197+
with open(template_path, "w", encoding="utf-8") as f:
198+
f.write(shebang + cleaned_code.strip() + "\n")
199+
200+
logging.info(f"Generated cleaned template: {template_path}")
201+
return template_path

fuzz/clean_fuzz_dir.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import shutil
4+
import fire
5+
6+
def clean_project_dirs(root_dir="fuzz/oss-fuzz/projects"):
7+
"""
8+
清理 oss-fuzz 项目目录下的 fuzz_inputs 和 tests-gen 文件夹
9+
10+
Args:
11+
root_dir (str): 项目的根目录路径
12+
"""
13+
removed_files = 0
14+
removed_dirs = 0
15+
16+
for project in os.listdir(root_dir):
17+
project_path = os.path.join(root_dir, project)
18+
if not os.path.isdir(project_path):
19+
continue
20+
21+
# 删除 fuzz_inputs 文件夹
22+
fuzz_inputs_path = os.path.join(project_path, "fuzz_inputs")
23+
if os.path.isdir(fuzz_inputs_path):
24+
shutil.rmtree(fuzz_inputs_path)
25+
print(f"🗑️ Removed dir: {fuzz_inputs_path}")
26+
removed_dirs += 1
27+
28+
# 删除 tests-gen 文件夹
29+
tests_gen_path = os.path.join(project_path, "tests-gen")
30+
if os.path.isdir(tests_gen_path):
31+
shutil.rmtree(tests_gen_path)
32+
print(f"🗑️ Removed dir: {tests_gen_path}")
33+
removed_dirs += 1
34+
35+
# 如果需要删除 .inputs.py 文件,取消注释以下代码
36+
# for fname in os.listdir(project_path):
37+
# if fname.endswith(".inputs.py"):
38+
# file_path = os.path.join(project_path, fname)
39+
# os.remove(file_path)
40+
# print(f"🗑️ Removed file: {file_path}")
41+
# removed_files += 1
42+
43+
print(f"\n✅ Done. Removed {removed_files} files and {removed_dirs} directories.")
44+
45+
if __name__ == "__main__":
46+
fire.Fire(clean_project_dirs)

0 commit comments

Comments
 (0)