diff --git a/docs/content/en/integrations/parsers/file/noseyparker.md b/docs/content/en/integrations/parsers/file/noseyparker.md new file mode 100644 index 00000000000..fc08cbf03b5 --- /dev/null +++ b/docs/content/en/integrations/parsers/file/noseyparker.md @@ -0,0 +1,32 @@ +--- +title: "Nosey Parker" +toc_hide: true +--- +Input Type: +- +This parser takes JSON Lines Output from Nosey Parker: https://github.com/praetorian-inc/noseyparkerSupports + +Supports version 0.16.0: +https://github.com/praetorian-inc/noseyparker/releases/tag/v0.16.0 + +Things to note about the Nosey Parker Parser: +- +- All findings are marked with a severity of 'High' +- The deduplication algorithm marks a unique finding by the secret, filepath, and line number all together +- The Nosey Parker tool allows for both full history scans of a repo and targeted branch scans + - The Parser does NOT differentiate between the 2 scan types (may be future functionality) + + - **For full history scans:** + - The scan will pick up secrets committed in the past that have since been removed + - If a secret is removed from source code, it will still show up in the next scan + - When importing findings via the Dojo API, make sure to use the parameter `do_not_reactivate` which will keep existing findings closed, without reactivating them + - **For targeted branch scans:** + - Keep in mind there may be active secrets that are either in the git history or not in the current branch + +JSON Lines Format: +- +The parser only accepts .jsonl reports. Each line of the JSON Lines file from NoseyParker corresponds to a unique secret found with metadata for every match. + + +### Sample Scan Data +Sample scan data for testing purposes can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/noseyparker). \ No newline at end of file diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index cf240d878dd..390fbefca82 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1455,6 +1455,7 @@ def saml2_attrib_map_format(dict): 'HCLAppScan XML': DEDUPE_ALGO_HASH_CODE, 'KICS Scan': DEDUPE_ALGO_HASH_CODE, 'MobSF Scan': DEDUPE_ALGO_HASH_CODE, + 'Nosey Parker Scan': DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE, } # Override the hardcoded settings here via the env var diff --git a/dojo/tools/noseyparker/__init__.py b/dojo/tools/noseyparker/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dojo/tools/noseyparker/parser.py b/dojo/tools/noseyparker/parser.py new file mode 100644 index 00000000000..acb28056f10 --- /dev/null +++ b/dojo/tools/noseyparker/parser.py @@ -0,0 +1,101 @@ +import hashlib +import json + +from datetime import datetime +from dojo.models import Finding + + +class NoseyParkerParser(object): + """ + Scanning secrets from repos + """ + + def get_scan_types(self): + return ["Nosey Parker Scan"] + + def get_label_for_scan_types(self, scan_type): + return "Nosey Parker Scan" + + def get_description_for_scan_types(self, scan_type): + return "Nosey Parker report file can be imported in JSON Lines format (option --jsonl). " \ + "Supports v0.16.0 of https://github.com/praetorian-inc/noseyparker" + + def get_findings(self, file, test): + """ + Returns findings from jsonlines file and uses filter + to skip findings and determine severity + """ + dupes = {} + + # Turn JSONL file into DataFrame + if file is None: + return + elif file.name.lower().endswith(".jsonl"): + # Process JSON lines into Dict + data = [json.loads(line) for line in file] + + # Check for empty file + if len(data[0]) == 0: + return [] + + # Parse through each secret in each JSON line + for line in data: + # Set rule to the current secret type (e.g. AWS S3 Bucket) + try: + rule_name = line['rule_name'] + secret = line['match_content'] + except Exception: + raise ValueError("Invalid Nosey Parker data, make sure to use Nosey Parker v0.16.0") + + # Set Finding details + for match in line['matches']: + # The following path is to account for the variability in the JSON lines output + num_elements = len(match['provenance']) - 1 + json_path = match['provenance'][num_elements] + + title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}" + filepath = json_path['commit_provenance']['blob_path'] + line_num = match['location']['source_span']['start']['line'] + description = f"Secret found of type: {rule_name} \n" \ + f"SECRET starts with: '{secret[:3]}' \n" \ + f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']} \n" \ + f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \ + f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']} \n" \ + f"Location: {filepath} line #{line_num} \n " \ + f"Line #{line_num} \n " \ + f"Code Snippet Containing Secret: {match['snippet']['before']}***SECRET***{match['snippet']['after']} \n" + + # Internal de-duplication + key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest() + + # If secret already exists with the same filepath/secret/linenum + if key in dupes: + finding = dupes[key] + finding.nb_occurences += 1 + dupes[key] = finding + else: + dupes[key] = True + # Create Finding object + finding = Finding( + test=test, + cwe=798, + title=title, + description=description, + severity='High', + mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.", + date=datetime.today().strftime("%Y-%m-%d"), + verified=False, + active=True, + is_mitigated=False, + file_path=filepath, + line=line_num, + static_finding=True, + nb_occurences=1, + dynamic_finding=False + + ) + dupes[key] = finding + else: + raise ValueError("JSON lines format not recognized (.jsonl file extension). Make sure to use Nosey Parker v0.16.0") + + return list(dupes.values()) diff --git a/unittests/scans/noseyparker/empty_with_error.json b/unittests/scans/noseyparker/empty_with_error.json new file mode 100644 index 00000000000..6617e9b45ea --- /dev/null +++ b/unittests/scans/noseyparker/empty_with_error.json @@ -0,0 +1,5 @@ +{"type":"warning","data":"package.json: No license field"} +{"type":"warning","data":"No license field"} +{"type":"error","data":"An unexpected error occurred: \"https://registry.yarnpkg.com/-/npm/v1/security/audits: tunneling socket could not be established, cause=connect ECONNREFUSED 127.0.0.1:80\"."} +{"type":"info","data":"If you think this is a bug, please open a bug report with the information provided in \"/yarn-error.log\"."} +{"type":"info","data":"Visit https://yarnpkg.com/en/docs/cli/audit for documentation about this command."} diff --git a/unittests/scans/noseyparker/noseyparker_many_vul.jsonl b/unittests/scans/noseyparker/noseyparker_many_vul.jsonl new file mode 100644 index 00000000000..44999cafb55 --- /dev/null +++ b/unittests/scans/noseyparker/noseyparker_many_vul.jsonl @@ -0,0 +1,5 @@ +{"type":"finding","rule_name":"Generic API Key","match_content":"32ui1ffdasfhu239b4df2ac6609a9919","num_matches":1,"matches":[{"provenance":[{"kind":"file","path":"./app/schema/config.py"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","committer_name":"Princess Leia","committer_email":"leia@test.com","committer_timestamp":"1685495256 +0000","author_name":"Princess Leia","author_email":"leia@test.com","author_timestamp":"1685495256 +0000","message":"framework\n"},"blob_path":"app/schema/config.py"}}],"blob_metadata":{"id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","num_bytes":664,"mime_essence":"text/plain","charset":null},"blob_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","location":{"offset_span":{"start":617,"end":660},"source_span":{"start":{"line":16,"column":17},"end":{"line":16,"column":59}}},"capture_group_index":1,"match_content":"32ui1ffdasfhu239b4df2ac6609a9919","snippet":{"before":"E = \"https://testwebsite.com\"\n ","matching":"API_KEY = \"32ui1ffdasfhu239b4df2ac6609a9919","after":"\"\n\n\n"},"rule_name":"Generic API Key"}]} +{"type":"finding","rule_name":"Generic Username and Password (unquoted)","match_content":"secret","num_matches":1,"matches":[{"provenance":[{"kind":"file","path":"./app/schema/config.py"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","committer_name":"Princess Leia","committer_email":"leia@test.com","committer_timestamp":"1685495256 +0000","author_name":"Princess Leia","author_email":"leia@test.com","author_timestamp":"1685495256 +0000","message":"framework\n"},"blob_path":"app/schema/config.py"}}],"blob_metadata":{"id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","num_bytes":664,"mime_essence":"text/plain","charset":null},"blob_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","location":{"offset_span":{"start":617,"end":660},"source_span":{"start":{"line":16,"column":17},"end":{"line":16,"column":59}}},"capture_group_index":1,"match_content":"secret","snippet":{"before":"E = \"https://testwebsite.com\"\n ","matching":"secret","after":"testing\"\n\n\n"},"rule_name":"Generic Username and Password (unquoted)"}]} +{"type":"finding","rule_name":"Generic Username and Password (unquoted)","match_content":"secret","num_matches":1,"matches":[{"provenance":[{"kind":"file","path":"./app/schema/config.py"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","committer_name":"Princess Leia","committer_email":"leia@test.com","committer_timestamp":"1685495256 +0000","author_name":"Princess Leia","author_email":"leia@test.com","author_timestamp":"1685495256 +0000","message":"framework\n"},"blob_path":"app/schema/config.py"}}],"blob_metadata":{"id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","num_bytes":664,"mime_essence":"text/plain","charset":null},"blob_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","location":{"offset_span":{"start":617,"end":660},"source_span":{"start":{"line":16,"column":17},"end":{"line":16,"column":59}}},"capture_group_index":1,"match_content":"secret","snippet":{"before":"E = \"https://testwebsite.com\"\n ","matching":"secret","after":"testing\"\n\n\n"},"rule_name":"Generic Username and Password (unquoted)"}]} +{"type":"finding","rule_name":"Generic Password (double quoted)","match_content":"Password","num_matches":12,"status":null,"comment":null,"matches":[{"provenance":[{"kind":"file","path":"./references/Microsoft.json"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"776f9a49398cb90f9a95f4f321bcc2009d84","committer_name":"Yoda","committer_email":"yoda@test.com","committer_timestamp":"1748581495 +0000","author_name":"Yoda","author_email":"yoda@test.com","author_timestamp":"1748581495 +0000","message":"testing\n"},"blob_path":"./references/Microsoft.json"}}],"blob_metadata":{"id":"7769b26e8694073f3270674bb2dedda8309749e4","num_bytes":14909,"mime_essence":"application/json","charset":null},"blob_id":"7769b26e8694073f3270674bb2dedda8309749e4","location":{"offset_span":{"start":7896,"end":7917},"source_span":{"start":{"line":161,"column":30},"end":{"line":161,"column":50}}},"capture_group_index":1,"match_content":"Password","snippet":{"before":" \"name\": \"vmCredentials\",\n \"type\": \"Compute.CredentialsCombocrosoft\",\n \"label\": {\n \"authenticationType\": \"Authentication type\",\n \"","matching":"password\": \"Password\"","after":",\n \"confirmPassword\": \"Confirm password\",\n \"sshPublicKey\": \"SSH public key\"\n },\n \"toolTip\": {\n \"authenticationType\": \"\",\n "}}, {"provenance":[{"kind":"file","path":"./references/Microsoft.json"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"776f9a49398cb90f9a95f4f321bcc2009d84","committer_name":"Yoda","committer_email":"yoda@test.com","committer_timestamp":"1748581495 +0000","author_name":"Yoda","author_email":"yoda@test.com","author_timestamp":"1748581495 +0000","message":"testing\n"},"blob_path":"./references/Microsoft.json"}}],"blob_metadata":{"id":"7769b26e8694073f3270674bb2dedda8309749e4","num_bytes":14909,"mime_essence":"application/json","charset":null},"blob_id":"7769b26e8694073f3270674bb2dedda8309749e4","location":{"offset_span":{"start":7896,"end":7917},"source_span":{"start":{"line":161,"column":30},"end":{"line":161,"column":50}}},"capture_group_index":1,"match_content":"Password","snippet":{"before":" \"name\": \"vmCredentials\",\n \"type\": \"Compute.CredentialsCombocrosoft\",\n \"label\": {\n \"authenticationType\": \"Authentication type\",\n \"","matching":"password\": \"Password\"","after":",\n \"confirmPassword\": \"Confirm password\",\n \"sshPublicKey\": \"SSH public key\"\n },\n \"toolTip\": {\n \"authenticationType\": \"\",\n "}}]} +{"type": "finding", "rule_name": "Generic Password (double quoted)", "match_content": "32ui1ffdasfhu239b4df2ac6609a9919", "num_matches": 2, "status": null, "comment": null, "matches": [ { "provenance": [ { "kind": "file", "path": "app/schema/config.py" }, { "kind": "git_repo", "repo_path": "./.git", "commit_provenance": { "commit_kind": "first_seen", "commit_metadata": { "commit_id": "0ef84b84c29924b210e3576f69d1e8632948bedc", "committer_name": "Princess Leia", "committer_email": "leia@test.com", "committer_timestamp": "1685495256 +0000", "author_name": "Princess Leia", "author_email": "leia@test.com", "author_timestamp": "1685495256 +0000", "message": "first commit\n" }, "blob_path": "app/schema/config.py" } } ], "blob_metadata": { "id": "0ee84b84c29924b210e3576fe9d1e8632948bedc", "num_bytes": 664, "mime_essence": "text/plain", "charset": null }, "blob_id": "0ee84b84c29924b210e3576fe9d1e8632948bedc", "location": { "offset_span": { "start": 617, "end": 660 }, "source_span": { "start": { "line": 16, "column": 17 }, "end": { "line": 16, "column": 59 } } }, "capture_group_index": 1, "match_content": "32ui1ffdasfhu239b4df2ac6609a9919", "snippet": { "before": "E = \"https://testwebsite.com\"\n ", "matching": "API_KEY = \"32ui1ffdasfhu239b4df2ac6609a9919", "after": "\"\n\n\n" }, "rule_name": "Generic API Key" } ] } \ No newline at end of file diff --git a/unittests/scans/noseyparker/noseyparker_one_vul.jsonl b/unittests/scans/noseyparker/noseyparker_one_vul.jsonl new file mode 100644 index 00000000000..4c514e0f22b --- /dev/null +++ b/unittests/scans/noseyparker/noseyparker_one_vul.jsonl @@ -0,0 +1 @@ +{"type":"finding","rule_name":"Generic API Key","match_content":"32ui1ffdasfhu239b4df2ac6609a9919","num_matches":1,"matches":[{"provenance":[{"kind":"file","path":"./app/schema/config.py"},{"kind":"git_repo","repo_path":"./.git","commit_provenance":{"commit_kind":"first_seen","commit_metadata":{"commit_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","committer_name":"Princess Leia","committer_email":"leia@test.com","committer_timestamp":"1685495256 +0000","author_name":"Princess Leia","author_email":"leia@test.com","author_timestamp":"1685495256 +0000","message":"framework\n"},"blob_path":"app/schema/config.py"}}],"blob_metadata":{"id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","num_bytes":664,"mime_essence":"text/plain","charset":null},"blob_id":"0ee84b84c29924b210e3576fe9d1e8632948bedc","location":{"offset_span":{"start":617,"end":660},"source_span":{"start":{"line":16,"column":17},"end":{"line":16,"column":59}}},"capture_group_index":1,"match_content":"32ui1ffdasfhu239b4df2ac6609a9919","snippet":{"before":"E = \"https://testwebsite.com\"\n ","matching":"API_KEY = \"32ui1ffdasfhu239b4df2ac6609a9919","after":"\"\n\n\n"},"rule_name":"Generic API Key"}]} \ No newline at end of file diff --git a/unittests/scans/noseyparker/noseyparker_zero_vul.jsonl b/unittests/scans/noseyparker/noseyparker_zero_vul.jsonl new file mode 100644 index 00000000000..9e26dfeeb6e --- /dev/null +++ b/unittests/scans/noseyparker/noseyparker_zero_vul.jsonl @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/unittests/tools/test_noseyparker_parser.py b/unittests/tools/test_noseyparker_parser.py new file mode 100644 index 00000000000..cb837ee23d4 --- /dev/null +++ b/unittests/tools/test_noseyparker_parser.py @@ -0,0 +1,47 @@ +from django.test import TestCase +from dojo.tools.noseyparker.parser import NoseyParkerParser +from dojo.models import Test + + +class TestNoseyParkerParser(TestCase): + + def test_noseyparker_parser__no_vulns(self): + testfile = open("unittests/scans/noseyparker/noseyparker_zero_vul.jsonl") + parser = NoseyParkerParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(0, len(findings)) + testfile.close() + + def test_noseyparker_parser_one_vuln(self): + testfile = open("unittests/scans/noseyparker/noseyparker_one_vul.jsonl") + parser = NoseyParkerParser() + findings = parser.get_findings(testfile, Test()) + testfile.close() + finding = findings[0] + self.assertEqual("app/schema/config.py", finding.file_path) + self.assertEqual("High", finding.severity) + self.assertEqual(798, finding.cwe) + self.assertEqual(1, len(findings)) + + def test_noseyparker_parser_many_vulns(self): + # Testfile contains 5 lines (Middle 2 are duplicates and line #4 has 2 of the same exact matches) + testfile = open("unittests/scans/noseyparker/noseyparker_many_vul.jsonl") + parser = NoseyParkerParser() + findings = parser.get_findings(testfile, Test()) + testfile.close() + for finding in findings: + self.assertEqual("High", finding.severity) + self.assertEqual(798, finding.cwe) + self.assertEqual(3, len(findings)) + + def test_noseyparker_parser_error(self): + with self.assertRaises(ValueError) as context: + testfile = open("unittests/scans/noseyparker/empty_with_error.json") + parser = NoseyParkerParser() + findings = parser.get_findings(testfile, Test()) + testfile.close() + self.assertEqual(0, len(findings)) + self.assertTrue( + "Invalid Nosey Parker data, make sure to use Nosey Parker v0.16.0" in str(context.exception) + ) + self.assertTrue("ECONNREFUSED" in str(context.exception))