diff --git a/src/backend/InvenTree/InvenTree/helpers.py b/src/backend/InvenTree/InvenTree/helpers.py index 4558594436..61b16d8145 100644 --- a/src/backend/InvenTree/InvenTree/helpers.py +++ b/src/backend/InvenTree/InvenTree/helpers.py @@ -810,6 +810,40 @@ def remove_non_printable_characters( return cleaned +def clean_markdown(value: str): + """Clean a markdown string. + + This function will remove javascript and other potentially harmful content from the markdown string. + """ + import markdown + from markdownify.templatetags.markdownify import markdownify + + try: + markdownify_settings = settings.MARKDOWNIFY['default'] + except (AttributeError, KeyError): + markdownify_settings = {} + + extensions = markdownify_settings.get('MARKDOWN_EXTENSIONS', []) + extension_configs = markdownify_settings.get('MARKDOWN_EXTENSION_CONFIGS', {}) + + # Generate raw HTML from provided markdown (without sanitizing) + # Note: The 'html' output_format is required to generate self closing tags, e.g. instead of + html = markdown.markdown( + value or '', + extensions=extensions, + extension_configs=extension_configs, + output_format='html', + ) + + # Clean the HTML content (for comparison). Ideally, this should be the same as the original content + clean_html = markdownify(value) + + if html != clean_html: + raise ValidationError(_('Data contains prohibited markdown content')) + + return value + + def hash_barcode(barcode_data): """Calculate a 'unique' hash for a barcode string. diff --git a/src/backend/InvenTree/InvenTree/mixins.py b/src/backend/InvenTree/InvenTree/mixins.py index fd63726d96..d02a96dfac 100644 --- a/src/backend/InvenTree/InvenTree/mixins.py +++ b/src/backend/InvenTree/InvenTree/mixins.py @@ -6,7 +6,11 @@ from rest_framework import generics, mixins, status from rest_framework.response import Response from InvenTree.fields import InvenTreeNotesField -from InvenTree.helpers import remove_non_printable_characters, strip_html_tags +from InvenTree.helpers import ( + clean_markdown, + remove_non_printable_characters, + strip_html_tags, +) class CleanMixin: @@ -57,6 +61,7 @@ class CleanMixin: # By default, newline characters are removed remove_newline = True + is_markdown = False try: if hasattr(self, 'serializer_class'): @@ -64,11 +69,12 @@ class CleanMixin: field = model._meta.get_field(field) # The following field types allow newline characters - allow_newline = [InvenTreeNotesField] + allow_newline = [(InvenTreeNotesField, True)] for field_type in allow_newline: - if issubclass(type(field), field_type): + if issubclass(type(field), field_type[0]): remove_newline = False + is_markdown = field_type[1] break except AttributeError: @@ -80,6 +86,9 @@ class CleanMixin: cleaned, remove_newline=remove_newline ) + if is_markdown: + cleaned = clean_markdown(cleaned) + return cleaned def clean_data(self, data: dict) -> dict: diff --git a/src/backend/InvenTree/InvenTree/settings.py b/src/backend/InvenTree/InvenTree/settings.py index 6cccc3bd49..7d38b7cff9 100644 --- a/src/backend/InvenTree/InvenTree/settings.py +++ b/src/backend/InvenTree/InvenTree/settings.py @@ -1231,23 +1231,29 @@ MARKDOWNIFY = { 'abbr', 'b', 'blockquote', + 'code', 'em', 'h1', 'h2', 'h3', + 'h4', + 'h5', + 'hr', 'i', 'img', 'li', 'ol', 'p', + 'pre', + 's', 'strong', - 'ul', 'table', 'thead', 'tbody', 'th', 'tr', 'td', + 'ul', ], } } diff --git a/src/backend/InvenTree/company/test_api.py b/src/backend/InvenTree/company/test_api.py index 78064a3bc5..ec06ddc40f 100644 --- a/src/backend/InvenTree/company/test_api.py +++ b/src/backend/InvenTree/company/test_api.py @@ -156,6 +156,47 @@ class CompanyTest(InvenTreeAPITestCase): len(self.get(url, data={'active': False}, expected_code=200).data), 1 ) + def test_company_notes(self): + """Test the markdown 'notes' field for the Company model.""" + pk = Company.objects.first().pk + + # Attempt to inject malicious markdown into the "notes" field + xss = [ + '[Click me](javascript:alert(123))', + '![x](javascript:alert(123))', + '![Uh oh...]("onerror="alert(\'XSS\'))', + ] + + for note in xss: + response = self.patch( + reverse('api-company-detail', kwargs={'pk': pk}), + {'notes': note}, + expected_code=400, + ) + + self.assertIn( + 'Data contains prohibited markdown content', str(response.data) + ) + + # The following markdown is safe, and should be accepted + good = [ + 'This is a **bold** statement', + 'This is a *italic* statement', + 'This is a [link](https://www.google.com)', + 'This is an ![image](https://www.google.com/test.jpg)', + 'This is a `code` block', + 'This text has ~~strikethrough~~ formatting', + ] + + for note in good: + response = self.patch( + reverse('api-company-detail', kwargs={'pk': pk}), + {'notes': note}, + expected_code=200, + ) + + self.assertEqual(response.data['notes'], note) + class ContactTest(InvenTreeAPITestCase): """Tests for the Contact models."""