2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-04-28 11:36:44 +00:00

Markdown link fix (#8328) (#8329)

* Improve cleaning of markdown content

* Update unit test with new check

(cherry picked from commit cb0248d15944544d66b5b047b905275d76954a00)

Co-authored-by: Oliver <oliver.henry.walters@gmail.com>
This commit is contained in:
github-actions[bot] 2024-10-22 13:17:04 +11:00 committed by GitHub
parent d485c6796b
commit fab846e3cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 13 deletions

View File

@ -21,6 +21,7 @@ from django.core.files.storage import Storage, default_storage
from django.http import StreamingHttpResponse from django.http import StreamingHttpResponse
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
import bleach
import pytz import pytz
import regex import regex
from bleach import clean from bleach import clean
@ -816,7 +817,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string. This function will remove javascript and other potentially harmful content from the markdown string.
""" """
import markdown import markdown
from markdownify.templatetags.markdownify import markdownify
try: try:
markdownify_settings = settings.MARKDOWNIFY['default'] markdownify_settings = settings.MARKDOWNIFY['default']
@ -835,8 +835,34 @@ def clean_markdown(value: str):
output_format='html', output_format='html',
) )
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content # Bleach settings
clean_html = markdownify(value) whitelist_tags = markdownify_settings.get(
'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
)
whitelist_attrs = markdownify_settings.get(
'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
)
whitelist_styles = markdownify_settings.get(
'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
)
whitelist_protocols = markdownify_settings.get(
'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
)
strip = markdownify_settings.get('STRIP', True)
css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
allowed_css_properties=whitelist_styles
)
cleaner = bleach.Cleaner(
tags=whitelist_tags,
attributes=whitelist_attrs,
css_sanitizer=css_sanitizer,
protocols=whitelist_protocols,
strip=strip,
)
# Clean the HTML content (for comparison). This must be the same as the original content
clean_html = cleaner.clean(html)
if html != clean_html: if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content')) raise ValidationError(_('Data contains prohibited markdown content'))

View File

@ -159,6 +159,7 @@ class CompanyTest(InvenTreeAPITestCase):
def test_company_notes(self): def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model.""" """Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk pk = Company.objects.first().pk
url = reverse('api-company-detail', kwargs={'pk': pk})
# Attempt to inject malicious markdown into the "notes" field # Attempt to inject malicious markdown into the "notes" field
xss = [ xss = [
@ -168,16 +169,23 @@ class CompanyTest(InvenTreeAPITestCase):
] ]
for note in xss: for note in xss:
response = self.patch( response = self.patch(url, {'notes': note}, expected_code=400)
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=400,
)
self.assertIn( self.assertIn(
'Data contains prohibited markdown content', str(response.data) 'Data contains prohibited markdown content', str(response.data)
) )
# Tests with disallowed tags
invalid_tags = [
'<iframe src="javascript:alert(123)"></iframe>',
'<canvas>A disallowed tag!</canvas>',
]
for note in invalid_tags:
response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn('Remove HTML tags from this value', str(response.data))
# The following markdown is safe, and should be accepted # The following markdown is safe, and should be accepted
good = [ good = [
'This is a **bold** statement', 'This is a **bold** statement',
@ -186,14 +194,11 @@ class CompanyTest(InvenTreeAPITestCase):
'This is an ![image](https://www.google.com/test.jpg)', 'This is an ![image](https://www.google.com/test.jpg)',
'This is a `code` block', 'This is a `code` block',
'This text has ~~strikethrough~~ formatting', 'This text has ~~strikethrough~~ formatting',
'This text has a raw link - https://www.google.com - and should still pass the test',
] ]
for note in good: for note in good:
response = self.patch( response = self.patch(url, {'notes': note}, expected_code=200)
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=200,
)
self.assertEqual(response.data['notes'], note) self.assertEqual(response.data['notes'], note)