2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-05-01 13:06:45 +00:00

Markdown link fix (#8328)

* Improve cleaning of markdown content

* Update unit test with new check
This commit is contained in:
Oliver 2024-10-22 13:06:43 +11:00 committed by GitHub
parent ddea9fa4b9
commit cb0248d159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 13 deletions

View File

@ -21,6 +21,7 @@ from django.http import StreamingHttpResponse
from django.utils import timezone from django.utils import timezone
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
import bleach
import pytz import pytz
import regex import regex
from bleach import clean from bleach import clean
@ -829,7 +830,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string. This function will remove javascript and other potentially harmful content from the markdown string.
""" """
import markdown import markdown
from markdownify.templatetags.markdownify import markdownify
try: try:
markdownify_settings = settings.MARKDOWNIFY['default'] markdownify_settings = settings.MARKDOWNIFY['default']
@ -848,8 +848,34 @@ def clean_markdown(value: str):
output_format='html', output_format='html',
) )
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content # Bleach settings
clean_html = markdownify(value) whitelist_tags = markdownify_settings.get(
'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
)
whitelist_attrs = markdownify_settings.get(
'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
)
whitelist_styles = markdownify_settings.get(
'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
)
whitelist_protocols = markdownify_settings.get(
'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
)
strip = markdownify_settings.get('STRIP', True)
css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
allowed_css_properties=whitelist_styles
)
cleaner = bleach.Cleaner(
tags=whitelist_tags,
attributes=whitelist_attrs,
css_sanitizer=css_sanitizer,
protocols=whitelist_protocols,
strip=strip,
)
# Clean the HTML content (for comparison). This must be the same as the original content
clean_html = cleaner.clean(html)
if html != clean_html: if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content')) raise ValidationError(_('Data contains prohibited markdown content'))

View File

@ -157,6 +157,7 @@ class CompanyTest(InvenTreeAPITestCase):
def test_company_notes(self): def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model.""" """Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk pk = Company.objects.first().pk
url = reverse('api-company-detail', kwargs={'pk': pk})
# Attempt to inject malicious markdown into the "notes" field # Attempt to inject malicious markdown into the "notes" field
xss = [ xss = [
@ -166,16 +167,23 @@ class CompanyTest(InvenTreeAPITestCase):
] ]
for note in xss: for note in xss:
response = self.patch( response = self.patch(url, {'notes': note}, expected_code=400)
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=400,
)
self.assertIn( self.assertIn(
'Data contains prohibited markdown content', str(response.data) 'Data contains prohibited markdown content', str(response.data)
) )
# Tests with disallowed tags
invalid_tags = [
'<iframe src="javascript:alert(123)"></iframe>',
'<canvas>A disallowed tag!</canvas>',
]
for note in invalid_tags:
response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn('Remove HTML tags from this value', str(response.data))
# The following markdown is safe, and should be accepted # The following markdown is safe, and should be accepted
good = [ good = [
'This is a **bold** statement', 'This is a **bold** statement',
@ -184,14 +192,11 @@ class CompanyTest(InvenTreeAPITestCase):
'This is an ![image](https://www.google.com/test.jpg)', 'This is an ![image](https://www.google.com/test.jpg)',
'This is a `code` block', 'This is a `code` block',
'This text has ~~strikethrough~~ formatting', 'This text has ~~strikethrough~~ formatting',
'This text has a raw link - https://www.google.com - and should still pass the test',
] ]
for note in good: for note in good:
response = self.patch( response = self.patch(url, {'notes': note}, expected_code=200)
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=200,
)
self.assertEqual(response.data['notes'], note) self.assertEqual(response.data['notes'], note)