2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-04-28 11:36:44 +00:00

Markdown link fix (#8328) (#8329)

* Improve cleaning of markdown content

* Update unit test with new check

(cherry picked from commit cb0248d15944544d66b5b047b905275d76954a00)

Co-authored-by: Oliver <oliver.henry.walters@gmail.com>
This commit is contained in:
github-actions[bot] 2024-10-22 13:17:04 +11:00 committed by GitHub
parent d485c6796b
commit fab846e3cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 13 deletions

View File

@ -21,6 +21,7 @@ from django.core.files.storage import Storage, default_storage
from django.http import StreamingHttpResponse
from django.utils.translation import gettext_lazy as _
import bleach
import pytz
import regex
from bleach import clean
@ -816,7 +817,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string.
"""
import markdown
from markdownify.templatetags.markdownify import markdownify
try:
markdownify_settings = settings.MARKDOWNIFY['default']
@ -835,8 +835,34 @@ def clean_markdown(value: str):
output_format='html',
)
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content
clean_html = markdownify(value)
# Bleach settings
whitelist_tags = markdownify_settings.get(
'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
)
whitelist_attrs = markdownify_settings.get(
'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
)
whitelist_styles = markdownify_settings.get(
'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
)
whitelist_protocols = markdownify_settings.get(
'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
)
strip = markdownify_settings.get('STRIP', True)
css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
allowed_css_properties=whitelist_styles
)
cleaner = bleach.Cleaner(
tags=whitelist_tags,
attributes=whitelist_attrs,
css_sanitizer=css_sanitizer,
protocols=whitelist_protocols,
strip=strip,
)
# Clean the HTML content (for comparison). This must be the same as the original content
clean_html = cleaner.clean(html)
if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content'))

View File

@ -159,6 +159,7 @@ class CompanyTest(InvenTreeAPITestCase):
def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk
url = reverse('api-company-detail', kwargs={'pk': pk})
# Attempt to inject malicious markdown into the "notes" field
xss = [
@ -168,16 +169,23 @@ class CompanyTest(InvenTreeAPITestCase):
]
for note in xss:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=400,
)
response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn(
'Data contains prohibited markdown content', str(response.data)
)
# Tests with disallowed tags
invalid_tags = [
'<iframe src="javascript:alert(123)"></iframe>',
'<canvas>A disallowed tag!</canvas>',
]
for note in invalid_tags:
response = self.patch(url, {'notes': note}, expected_code=400)
self.assertIn('Remove HTML tags from this value', str(response.data))
# The following markdown is safe, and should be accepted
good = [
'This is a **bold** statement',
@ -186,14 +194,11 @@ class CompanyTest(InvenTreeAPITestCase):
'This is an ![image](https://www.google.com/test.jpg)',
'This is a `code` block',
'This text has ~~strikethrough~~ formatting',
'This text has a raw link - https://www.google.com - and should still pass the test',
]
for note in good:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=200,
)
response = self.patch(url, {'notes': note}, expected_code=200)
self.assertEqual(response.data['notes'], note)