diff --git a/src/backend/InvenTree/InvenTree/helpers.py b/src/backend/InvenTree/InvenTree/helpers.py
index 61b16d8145..2cdaf82913 100644
--- a/src/backend/InvenTree/InvenTree/helpers.py
+++ b/src/backend/InvenTree/InvenTree/helpers.py
@@ -21,6 +21,7 @@ from django.core.files.storage import Storage, default_storage
from django.http import StreamingHttpResponse
from django.utils.translation import gettext_lazy as _
+import bleach
import pytz
import regex
from bleach import clean
@@ -816,7 +817,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string.
"""
import markdown
- from markdownify.templatetags.markdownify import markdownify
try:
markdownify_settings = settings.MARKDOWNIFY['default']
@@ -835,8 +835,34 @@ def clean_markdown(value: str):
output_format='html',
)
- # Clean the HTML content (for comparison). Ideally, this should be the same as the original content
- clean_html = markdownify(value)
+ # Bleach settings
+ whitelist_tags = markdownify_settings.get(
+ 'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
+ )
+ whitelist_attrs = markdownify_settings.get(
+ 'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
+ )
+ whitelist_styles = markdownify_settings.get(
+ 'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
+ )
+ whitelist_protocols = markdownify_settings.get(
+ 'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
+ )
+ strip = markdownify_settings.get('STRIP', True)
+
+ css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
+ allowed_css_properties=whitelist_styles
+ )
+ cleaner = bleach.Cleaner(
+ tags=whitelist_tags,
+ attributes=whitelist_attrs,
+ css_sanitizer=css_sanitizer,
+ protocols=whitelist_protocols,
+ strip=strip,
+ )
+
+ # Clean the HTML content (for comparison). This must be the same as the original content
+ clean_html = cleaner.clean(html)
if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content'))
diff --git a/src/backend/InvenTree/company/test_api.py b/src/backend/InvenTree/company/test_api.py
index ec06ddc40f..e6013f0f8f 100644
--- a/src/backend/InvenTree/company/test_api.py
+++ b/src/backend/InvenTree/company/test_api.py
@@ -159,6 +159,7 @@ class CompanyTest(InvenTreeAPITestCase):
def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk
+ url = reverse('api-company-detail', kwargs={'pk': pk})
# Attempt to inject malicious markdown into the "notes" field
xss = [
@@ -168,16 +169,23 @@ class CompanyTest(InvenTreeAPITestCase):
]
for note in xss:
- response = self.patch(
- reverse('api-company-detail', kwargs={'pk': pk}),
- {'notes': note},
- expected_code=400,
- )
+ response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn(
'Data contains prohibited markdown content', str(response.data)
)
+ # Tests with disallowed tags
+ invalid_tags = [
+ '',
+ '',
+ ]
+
+ for note in invalid_tags:
+ response = self.patch(url, {'notes': note}, expected_code=400)
+
+ self.assertIn('Remove HTML tags from this value', str(response.data))
+
# The following markdown is safe, and should be accepted
good = [
'This is a **bold** statement',
@@ -186,14 +194,11 @@ class CompanyTest(InvenTreeAPITestCase):
'This is an ',
'This is a `code` block',
'This text has ~~strikethrough~~ formatting',
+ 'This text has a raw link - https://www.google.com - and should still pass the test',
]
for note in good:
- response = self.patch(
- reverse('api-company-detail', kwargs={'pk': pk}),
- {'notes': note},
- expected_code=200,
- )
+ response = self.patch(url, {'notes': note}, expected_code=200)
self.assertEqual(response.data['notes'], note)