2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-04-28 11:36:44 +00:00

Markdown xss backport (#8244)

* Update helpers.py

* Update mixins.py

* format

* format

* Allow horizontal rule in markdown

* More instructive error msg

* Specify output_format to markdown.markdown

Ref: https://python-markdown.github.io/reference/markdown/serializers/

* Cleanup

* Adjust allowable markdown tags

* Add unit test for malicious markdown XSS

* Allow <pre> tag

---------

Co-authored-by: Matthias Mair <code@mjmair.com>
This commit is contained in:
Oliver 2024-10-07 20:03:39 +11:00 committed by GitHub
parent 1c6d25ce33
commit 6e37f0cd8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 94 additions and 4 deletions

View File

@ -810,6 +810,40 @@ def remove_non_printable_characters(
return cleaned return cleaned
def clean_markdown(value: str):
"""Clean a markdown string.
This function will remove javascript and other potentially harmful content from the markdown string.
"""
import markdown
from markdownify.templatetags.markdownify import markdownify
try:
markdownify_settings = settings.MARKDOWNIFY['default']
except (AttributeError, KeyError):
markdownify_settings = {}
extensions = markdownify_settings.get('MARKDOWN_EXTENSIONS', [])
extension_configs = markdownify_settings.get('MARKDOWN_EXTENSION_CONFIGS', {})
# Generate raw HTML from provided markdown (without sanitizing)
# Note: The 'html' output_format is required to generate self closing tags, e.g. <tag> instead of <tag />
html = markdown.markdown(
value or '',
extensions=extensions,
extension_configs=extension_configs,
output_format='html',
)
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content
clean_html = markdownify(value)
if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content'))
return value
def hash_barcode(barcode_data): def hash_barcode(barcode_data):
"""Calculate a 'unique' hash for a barcode string. """Calculate a 'unique' hash for a barcode string.

View File

@ -6,7 +6,11 @@ from rest_framework import generics, mixins, status
from rest_framework.response import Response from rest_framework.response import Response
from InvenTree.fields import InvenTreeNotesField from InvenTree.fields import InvenTreeNotesField
from InvenTree.helpers import remove_non_printable_characters, strip_html_tags from InvenTree.helpers import (
clean_markdown,
remove_non_printable_characters,
strip_html_tags,
)
class CleanMixin: class CleanMixin:
@ -57,6 +61,7 @@ class CleanMixin:
# By default, newline characters are removed # By default, newline characters are removed
remove_newline = True remove_newline = True
is_markdown = False
try: try:
if hasattr(self, 'serializer_class'): if hasattr(self, 'serializer_class'):
@ -64,11 +69,12 @@ class CleanMixin:
field = model._meta.get_field(field) field = model._meta.get_field(field)
# The following field types allow newline characters # The following field types allow newline characters
allow_newline = [InvenTreeNotesField] allow_newline = [(InvenTreeNotesField, True)]
for field_type in allow_newline: for field_type in allow_newline:
if issubclass(type(field), field_type): if issubclass(type(field), field_type[0]):
remove_newline = False remove_newline = False
is_markdown = field_type[1]
break break
except AttributeError: except AttributeError:
@ -80,6 +86,9 @@ class CleanMixin:
cleaned, remove_newline=remove_newline cleaned, remove_newline=remove_newline
) )
if is_markdown:
cleaned = clean_markdown(cleaned)
return cleaned return cleaned
def clean_data(self, data: dict) -> dict: def clean_data(self, data: dict) -> dict:

View File

@ -1231,23 +1231,29 @@ MARKDOWNIFY = {
'abbr', 'abbr',
'b', 'b',
'blockquote', 'blockquote',
'code',
'em', 'em',
'h1', 'h1',
'h2', 'h2',
'h3', 'h3',
'h4',
'h5',
'hr',
'i', 'i',
'img', 'img',
'li', 'li',
'ol', 'ol',
'p', 'p',
'pre',
's',
'strong', 'strong',
'ul',
'table', 'table',
'thead', 'thead',
'tbody', 'tbody',
'th', 'th',
'tr', 'tr',
'td', 'td',
'ul',
], ],
} }
} }

View File

@ -156,6 +156,47 @@ class CompanyTest(InvenTreeAPITestCase):
len(self.get(url, data={'active': False}, expected_code=200).data), 1 len(self.get(url, data={'active': False}, expected_code=200).data), 1
) )
def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk
# Attempt to inject malicious markdown into the "notes" field
xss = [
'[Click me](javascript:alert(123))',
'![x](javascript:alert(123))',
'![Uh oh...]("onerror="alert(\'XSS\'))',
]
for note in xss:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=400,
)
self.assertIn(
'Data contains prohibited markdown content', str(response.data)
)
# The following markdown is safe, and should be accepted
good = [
'This is a **bold** statement',
'This is a *italic* statement',
'This is a [link](https://www.google.com)',
'This is an ![image](https://www.google.com/test.jpg)',
'This is a `code` block',
'This text has ~~strikethrough~~ formatting',
]
for note in good:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=200,
)
self.assertEqual(response.data['notes'], note)
class ContactTest(InvenTreeAPITestCase): class ContactTest(InvenTreeAPITestCase):
"""Tests for the Contact models.""" """Tests for the Contact models."""