From 2b7ae0131e582d339e62947d83e7281723a32064 Mon Sep 17 00:00:00 2001 From: Samiat Date: Tue, 28 Apr 2026 11:09:45 +0100 Subject: [PATCH] fix(sonarqube): mdDesc fallback --- dojo/tools/api_sonarqube/importer.py | 86 +++++++++++++++++-- .../api_sonarqube/rule_md_desc_only.json | 32 +++++++ .../tools/test_api_sonarqube_importer.py | 84 ++++++++++++++++++ 3 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 unittests/scans/api_sonarqube/rule_md_desc_only.json diff --git a/dojo/tools/api_sonarqube/importer.py b/dojo/tools/api_sonarqube/importer.py index b8d068b3cc7..d0ae635cfa0 100644 --- a/dojo/tools/api_sonarqube/importer.py +++ b/dojo/tools/api_sonarqube/importer.py @@ -2,7 +2,9 @@ import re import textwrap +import bleach import html2text +import markdown from django.conf import settings from django.core.exceptions import ValidationError from lxml import etree @@ -16,6 +18,44 @@ class SonarQubeApiImporter: + ALLOWED_RULE_DESCRIPTION_TAGS = [ + "a", + "b", + "blockquote", + "br", + "code", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "i", + "li", + "ol", + "p", + "pre", + "strong", + "table", + "tbody", + "td", + "th", + "thead", + "tr", + "ul", + ] + ALLOWED_RULE_DESCRIPTION_ATTRIBUTES = { + "a": [ + "href", + "title", + ], + } + ALLOWED_RULE_DESCRIPTION_PROTOCOLS = [ + "http", + "https", + "mailto", + ] """ This class imports from SonarQube (SQ) all open/confirmed SQ issues related to the project related to the test as @@ -153,13 +193,13 @@ def import_issues(self, test): except KeyError: sonarqube_permalink = "No permalink \n" - # custom (user defined) SQ rules may not have 'htmlDesc' - if "htmlDesc" in rule: + rule_details = self.get_rule_details(rule) + if rule_details: description = self.clean_rule_description_html( - rule["htmlDesc"], + rule_details, ) - cwe = self.clean_cwe(rule["htmlDesc"]) - references = sonarqube_permalink + self.get_references(rule["htmlDesc"]) + cwe = self.clean_cwe(rule_details) + references = sonarqube_permalink + self.get_references(rule_details) else: description = "" cwe = None @@ -338,8 +378,10 @@ def import_hotspots(self, test): @staticmethod def clean_rule_description_html(raw_html): + if not raw_html: + return "" search = re.search( - r"^(.*?)(?:(

See

)|(References))", + r"^(.*?)(?:(

See

)|(

References

)|(References))", raw_html, re.DOTALL, ) @@ -356,6 +398,36 @@ def clean_cwe(raw_html): return int(search.group(1)) return None + @staticmethod + def get_rule_details(rule): + if html_desc := rule.get("htmlDesc"): + return SonarQubeApiImporter.sanitize_rule_details(html_desc) + if not (md_desc := rule.get("mdDesc")): + return "" + # SonarQube 2025.x can return markdown-only rule descriptions, including + # inline HTML that should still be treated as markdown content. + return SonarQubeApiImporter.sanitize_rule_details( + markdown.markdown(md_desc, extensions=["extra"]), + ) + + @staticmethod + def sanitize_rule_details(description): + if not description: + return "" + sanitized_description = re.sub( + r"<(script|style)\b[^>]*>.*?", + "", + description, + flags=re.DOTALL | re.IGNORECASE, + ) + return bleach.clean( + sanitized_description, + tags=SonarQubeApiImporter.ALLOWED_RULE_DESCRIPTION_TAGS, + attributes=SonarQubeApiImporter.ALLOWED_RULE_DESCRIPTION_ATTRIBUTES, + protocols=SonarQubeApiImporter.ALLOWED_RULE_DESCRIPTION_PROTOCOLS, + strip=True, + ) + @staticmethod def convert_sonar_severity(sonar_severity): sev = sonar_severity.lower() @@ -382,6 +454,8 @@ def convert_scanner_confidence(sonar_scanner_confidence): @staticmethod def get_references(vuln_details): + if not vuln_details: + return "" parser = etree.HTMLParser() details = etree.fromstring(vuln_details, parser) diff --git a/unittests/scans/api_sonarqube/rule_md_desc_only.json b/unittests/scans/api_sonarqube/rule_md_desc_only.json new file mode 100644 index 00000000000..246e2a42e9d --- /dev/null +++ b/unittests/scans/api_sonarqube/rule_md_desc_only.json @@ -0,0 +1,32 @@ +{ + "key": "typescript:S1854", + "repo": "typescript", + "name": "Dead stores should be removed", + "createdAt": "2018-01-17T10:11:21-0500", + "mdDesc": "A dead store happens when a local variable is assigned a value that is not read by any subsequent instruction. Calculating or retrieving a value only to then overwrite it or throw it away, could indicate a serious error in the code. Even if it's not an error, it is at best a waste of resources. Therefore all calculated values should be used.\n\n## Noncompliant Code Example\n\ni = a + b; // Noncompliant; calculation result not used before value is overwritten\ni = compute();\n\n## Compliant Solution\n\ni = a + b;\ni += compute();\n\n## Exceptions\n\nThis rule ignores initializations to -1, 0, 1, `null`, `true`, `false`, `\"\"`, `[]` and `{}`.\n\n## See\n\n- [MITRE, CWE-563](http://cwe.mitre.org/data/definitions/563.html) - Assignment to Variable without Use ('Unused Variable')\n- [CERT, MSC13-C.](https://www.securecoding.cert.org/confluence/x/QYA5) - Detect and remove unused values\n- [CERT, MSC56-J.](https://www.securecoding.cert.org/confluence/x/uQCSBg) - Detect and remove superfluous code and values\n", + "severity": "MAJOR", + "status": "READY", + "isTemplate": false, + "tags": [], + "sysTags": [ + "cert", + "cwe", + "unused" + ], + "lang": "ts", + "langName": "TypeScript", + "params": [], + "defaultDebtRemFnType": "CONSTANT_ISSUE", + "defaultDebtRemFnOffset": "15min", + "debtOverloaded": false, + "debtRemFnType": "CONSTANT_ISSUE", + "debtRemFnOffset": "15min", + "defaultRemFnType": "CONSTANT_ISSUE", + "defaultRemFnBaseEffort": "15min", + "remFnType": "CONSTANT_ISSUE", + "remFnBaseEffort": "15min", + "remFnOverloaded": false, + "scope": "MAIN", + "isExternal": false, + "type": "CODE_SMELL" +} diff --git a/unittests/tools/test_api_sonarqube_importer.py b/unittests/tools/test_api_sonarqube_importer.py index 041387e218f..8dd3b9eafa0 100644 --- a/unittests/tools/test_api_sonarqube_importer.py +++ b/unittests/tools/test_api_sonarqube_importer.py @@ -28,6 +28,11 @@ def dummy_rule_wo_html_desc(self, *args, **kwargs): return json.load(json_file) +def dummy_rule_md_desc_only(self, *args, **kwargs): + with (get_unit_tests_scans_path("api_sonarqube") / "rule_md_desc_only.json").open(encoding="utf-8") as json_file: + return json.load(json_file) + + def dummy_no_hotspot(self, *args, **kwargs): with (get_unit_tests_scans_path("api_sonarqube") / "hotspots" / "no_vuln.json").open(encoding="utf-8") as json_file: return json.load(json_file) @@ -293,6 +298,85 @@ def test_parser(self): self.assertEqual("internal.dummy.project:src/main/javascript/TranslateDirective.ts", finding.file_path) +class TestSonarqubeImporterMarkdownRuleDescription(DojoTestCase): + fixtures = [ + "unit_sonarqube_toolType.json", + "unit_sonarqube_toolConfig1.json", + "unit_sonarqube_toolConfig2.json", + "unit_sonarqube_product.json", + "unit_sonarqube_sqcNoKey.json", + "unit_sonarqube_sqcWithKey.json", + ] + + def setUp(self): + product = Product.objects.get(name="product") + engagement = Engagement(product=product) + self.test = Test( + engagement=engagement, + api_scan_configuration=Product_API_Scan_Configuration.objects.all().last(), + ) + + @mock.patch("dojo.tools.api_sonarqube.api_client.SonarQubeAPI.get_project", dummy_product) + @mock.patch("dojo.tools.api_sonarqube.api_client.SonarQubeAPI.get_rule", dummy_rule_md_desc_only) + @mock.patch("dojo.tools.api_sonarqube.api_client.SonarQubeAPI.find_issues", dummy_issues) + @mock.patch("dojo.tools.api_sonarqube.api_client.SonarQubeAPI.get_hotspot_rule", dummy_hotspot_rule) + @mock.patch("dojo.tools.api_sonarqube.api_client.SonarQubeAPI.find_hotspots", empty_list) + def test_parser(self): + parser = SonarQubeApiImporter() + findings = parser.get_findings(None, self.test) + self.assertEqual(2, len(findings)) + finding = findings[0] + self.assertEqual(563, finding.cwe) + self.assertIn( + "A dead store happens when a local variable is assigned a value", + finding.description, + ) + self.assertIn( + "[MITRE, CWE-563](http://cwe.mitre.org/data/definitions/563.html)", + finding.references, + ) + + +class TestSonarqubeImporterRuleDetailsSanitization(DojoTestCase): + def test_get_rule_details_sanitizes_markdown_html(self): + rule_details = SonarQubeApiImporter.get_rule_details( + { + "mdDesc": ( + "# Heading\n\n" + "\n\n" + "[safe](https://example.com)\n\n" + 'unsafe' + ), + }, + ) + + self.assertIn("

Heading

", rule_details) + self.assertIn('safe', rule_details) + self.assertIn("unsafe", rule_details) + self.assertNotIn("References" + '

OWASP

' + "" + '

unsafe

' + ), + }, + ) + + self.assertIn("

References

", rule_details) + self.assertIn('OWASP', rule_details) + self.assertIn("unsafe", rule_details) + self.assertNotIn("