diff --git a/src/backend/InvenTree/InvenTree/helpers.py b/src/backend/InvenTree/InvenTree/helpers.py
index aa890cf6ad..69b13e96ea 100644
--- a/src/backend/InvenTree/InvenTree/helpers.py
+++ b/src/backend/InvenTree/InvenTree/helpers.py
@@ -21,6 +21,7 @@ from django.http import StreamingHttpResponse
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
+import bleach
import pytz
import regex
from bleach import clean
@@ -829,7 +830,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string.
"""
import markdown
- from markdownify.templatetags.markdownify import markdownify
try:
markdownify_settings = settings.MARKDOWNIFY['default']
@@ -848,8 +848,34 @@ def clean_markdown(value: str):
output_format='html',
)
- # Clean the HTML content (for comparison). Ideally, this should be the same as the original content
- clean_html = markdownify(value)
+ # Bleach settings
+ whitelist_tags = markdownify_settings.get(
+ 'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
+ )
+ whitelist_attrs = markdownify_settings.get(
+ 'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
+ )
+ whitelist_styles = markdownify_settings.get(
+ 'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
+ )
+ whitelist_protocols = markdownify_settings.get(
+ 'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
+ )
+ strip = markdownify_settings.get('STRIP', True)
+
+ css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
+ allowed_css_properties=whitelist_styles
+ )
+ cleaner = bleach.Cleaner(
+ tags=whitelist_tags,
+ attributes=whitelist_attrs,
+ css_sanitizer=css_sanitizer,
+ protocols=whitelist_protocols,
+ strip=strip,
+ )
+
+ # Clean the HTML content (for comparison). This must be the same as the original content
+ clean_html = cleaner.clean(html)
if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content'))
diff --git a/src/backend/InvenTree/company/test_api.py b/src/backend/InvenTree/company/test_api.py
index c9d4037dc7..63d9a4d0ff 100644
--- a/src/backend/InvenTree/company/test_api.py
+++ b/src/backend/InvenTree/company/test_api.py
@@ -157,6 +157,7 @@ class CompanyTest(InvenTreeAPITestCase):
def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk
+ url = reverse('api-company-detail', kwargs={'pk': pk})
# Attempt to inject malicious markdown into the "notes" field
xss = [
@@ -166,16 +167,23 @@ class CompanyTest(InvenTreeAPITestCase):
]
for note in xss:
- response = self.patch(
- reverse('api-company-detail', kwargs={'pk': pk}),
- {'notes': note},
- expected_code=400,
- )
+ response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn(
'Data contains prohibited markdown content', str(response.data)
)
+ # Tests with disallowed tags
+ invalid_tags = [
+ '',
+ '',
+ ]
+
+ for note in invalid_tags:
+ response = self.patch(url, {'notes': note}, expected_code=400)
+
+ self.assertIn('Remove HTML tags from this value', str(response.data))
+
# The following markdown is safe, and should be accepted
good = [
'This is a **bold** statement',
@@ -184,14 +192,11 @@ class CompanyTest(InvenTreeAPITestCase):
'This is an ',
'This is a `code` block',
'This text has ~~strikethrough~~ formatting',
+ 'This text has a raw link - https://www.google.com - and should still pass the test',
]
for note in good:
- response = self.patch(
- reverse('api-company-detail', kwargs={'pk': pk}),
- {'notes': note},
- expected_code=200,
- )
+ response = self.patch(url, {'notes': note}, expected_code=200)
self.assertEqual(response.data['notes'], note)