Skip to content

Commit 7cf2bca

Browse files
committed
feat: implement lazy file loading to prevent 'too many open files' errors
- Add FileDescriptorManager singleton to track and limit open file descriptors - Implement LazyFileLoader class that opens files only when needed for reading - Add configurable max_open_files parameter to fullscans.post() and diffscans.create_from_repo() - Auto-close files when fully read and use LRU eviction when limit reached - Add comprehensive documentation with v3.0 migration notes - Maintain backward compatibility with use_lazy_loading=False default - Support cross-platform operation (Unix/Linux/macOS/Windows) - Include retry logic with garbage collection for edge cases This prevents file descriptor exhaustion when uploading large numbers of manifest files (e.g., 1956 files) on systems with low ulimit values.
1 parent 9a76695 commit 7cf2bca

File tree

7 files changed

+351
-71
lines changed

7 files changed

+351
-71
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ dist
1212
*.build
1313
*.dist
1414
*.egg-info
15-
*.cpython-312.pyc
15+
*.cpython-312.pyc
16+
example-socket-export.py

socketdev/dependencies/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from urllib.parse import urlencode
33
import logging
44
from socketdev.tools import load_files
5+
from ..utils import Utils
56

67
log = logging.getLogger("socketdev")
78

@@ -12,9 +13,13 @@ class Dependencies:
1213
def __init__(self, api):
1314
self.api = api
1415

15-
def post(self, files: list, params: dict) -> dict:
16-
loaded_files = []
17-
loaded_files = load_files(files, loaded_files)
16+
def post(self, files: list, params: dict, use_lazy_loading: bool = False, workspace: str = None) -> dict:
17+
if use_lazy_loading:
18+
loaded_files = Utils.load_files_for_sending_lazy(files, workspace)
19+
else:
20+
loaded_files = []
21+
loaded_files = load_files(files, loaded_files)
22+
1823
path = "dependencies/upload?" + urlencode(params)
1924
response = self.api.do_request(path=path, files=loaded_files, method="POST")
2025
if response.status_code == 200:

socketdev/diffscans/__init__.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import logging
33
from typing import Any, Dict, Optional, Union
4+
from ..utils import Utils
45

56
log = logging.getLogger("socketdev")
67

@@ -29,13 +30,44 @@ def get(self, org_slug: str, diff_scan_id: str) -> dict:
2930
log.error(f"Error fetching diff scan: {response.status_code}, message: {response.text}")
3031
return {}
3132

32-
def create_from_repo(self, org_slug: str, repo_slug: str, files: list, params: Optional[Dict[str, Any]] = None) -> dict:
33-
"""Create a diff scan from repo HEAD, uploading files as multipart form data."""
33+
def create_from_repo(self, org_slug: str, repo_slug: str, files: list, params: Optional[Dict[str, Any]] = None, use_lazy_loading: bool = False, workspace: str = None, max_open_files: int = 100) -> dict:
34+
"""
35+
Create a diff scan from repo HEAD, uploading files as multipart form data.
36+
37+
Args:
38+
org_slug: Organization slug
39+
repo_slug: Repository slug
40+
files: List of file paths to upload for scanning
41+
params: Optional query parameters for the request
42+
use_lazy_loading: Whether to use lazy file loading to prevent "too many open files"
43+
errors when uploading large numbers of files (default: False)
44+
NOTE: In version 3.0, this will default to True for better performance
45+
workspace: Base directory path to make file paths relative to
46+
max_open_files: Maximum number of files to keep open simultaneously when using
47+
lazy loading. Useful for systems with low ulimit values (default: 100)
48+
49+
Returns:
50+
dict: API response containing diff scan results
51+
52+
Note:
53+
When use_lazy_loading=True, files are opened only when needed during upload,
54+
preventing file descriptor exhaustion. The max_open_files parameter controls how many
55+
files can be open simultaneously - set this lower on systems with restrictive ulimits.
56+
57+
For large file uploads (>100 files), it's recommended to set use_lazy_loading=True.
58+
"""
3459
import urllib.parse
3560
path = f"orgs/{org_slug}/diff-scans/from-repo/{repo_slug}"
3661
if params:
3762
path += "?" + urllib.parse.urlencode(params)
38-
response = self.api.do_request(path=path, method="POST", files=files)
63+
64+
# Use lazy loading if requested
65+
if use_lazy_loading:
66+
prepared_files = Utils.load_files_for_sending_lazy(files, workspace, max_open_files)
67+
else:
68+
prepared_files = files
69+
70+
response = self.api.do_request(path=path, method="POST", files=prepared_files)
3971
if response.status_code in (200, 201):
4072
return response.json()
4173
log.error(f"Error creating diff scan from repo: {response.status_code}, message: {response.text}")

socketdev/fullscans/__init__.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -720,15 +720,45 @@ def get(self, org_slug: str, params: dict, use_types: bool = False) -> Union[dic
720720
)
721721
return {}
722722

723-
def post(self, files: list, params: FullScanParams, use_types: bool = False) -> Union[dict, CreateFullScanResponse]:
723+
def post(self, files: list, params: FullScanParams, use_types: bool = False, use_lazy_loading: bool = False, workspace: str = None, max_open_files: int = 100) -> Union[dict, CreateFullScanResponse]:
724+
"""
725+
Create a new full scan by uploading manifest files.
726+
727+
Args:
728+
files: List of file paths to upload for scanning
729+
params: FullScanParams object containing scan configuration
730+
use_types: Whether to return typed response objects (default: False)
731+
use_lazy_loading: Whether to use lazy file loading to prevent "too many open files"
732+
errors when uploading large numbers of files (default: False)
733+
NOTE: In version 3.0, this will default to True for better performance
734+
workspace: Base directory path to make file paths relative to
735+
max_open_files: Maximum number of files to keep open simultaneously when using
736+
lazy loading. Useful for systems with low ulimit values (default: 100)
737+
738+
Returns:
739+
dict or CreateFullScanResponse: API response containing scan results
740+
741+
Note:
742+
When use_lazy_loading=True, files are opened only when needed during upload,
743+
preventing file descriptor exhaustion. The max_open_files parameter controls how many
744+
files can be open simultaneously - set this lower on systems with restrictive ulimits.
745+
746+
For large file uploads (>100 files), it's recommended to set use_lazy_loading=True.
747+
"""
724748
Utils.validate_integration_type(params.integration_type if params.integration_type else "api")
725749
org_slug = str(params.org_slug)
726750
params_dict = params.to_dict()
727751
params_dict.pop("org_slug")
728752
params_arg = urllib.parse.urlencode(params_dict)
729753
path = "orgs/" + org_slug + "/full-scans?" + str(params_arg)
730754

731-
response = self.api.do_request(path=path, method="POST", files=files)
755+
# Use lazy loading if requested
756+
if use_lazy_loading:
757+
prepared_files = Utils.load_files_for_sending_lazy(files, workspace, max_open_files)
758+
else:
759+
prepared_files = files
760+
761+
response = self.api.do_request(path=path, method="POST", files=prepared_files)
732762

733763
if response.status_code == 201:
734764
result = response.json()

0 commit comments

Comments
 (0)