mirror of
https://github.com/inventree/InvenTree.git
synced 2025-04-28 11:36:44 +00:00
* Improve cleaning of markdown content * Update unit test with new check (cherry picked from commit cb0248d15944544d66b5b047b905275d76954a00) Co-authored-by: Oliver <oliver.henry.walters@gmail.com>
This commit is contained in:
parent
d485c6796b
commit
fab846e3cc
@ -21,6 +21,7 @@ from django.core.files.storage import Storage, default_storage
|
|||||||
from django.http import StreamingHttpResponse
|
from django.http import StreamingHttpResponse
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
|
import bleach
|
||||||
import pytz
|
import pytz
|
||||||
import regex
|
import regex
|
||||||
from bleach import clean
|
from bleach import clean
|
||||||
@ -816,7 +817,6 @@ def clean_markdown(value: str):
|
|||||||
This function will remove javascript and other potentially harmful content from the markdown string.
|
This function will remove javascript and other potentially harmful content from the markdown string.
|
||||||
"""
|
"""
|
||||||
import markdown
|
import markdown
|
||||||
from markdownify.templatetags.markdownify import markdownify
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
markdownify_settings = settings.MARKDOWNIFY['default']
|
markdownify_settings = settings.MARKDOWNIFY['default']
|
||||||
@ -835,8 +835,34 @@ def clean_markdown(value: str):
|
|||||||
output_format='html',
|
output_format='html',
|
||||||
)
|
)
|
||||||
|
|
||||||
# Clean the HTML content (for comparison). Ideally, this should be the same as the original content
|
# Bleach settings
|
||||||
clean_html = markdownify(value)
|
whitelist_tags = markdownify_settings.get(
|
||||||
|
'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
|
||||||
|
)
|
||||||
|
whitelist_attrs = markdownify_settings.get(
|
||||||
|
'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
|
||||||
|
)
|
||||||
|
whitelist_styles = markdownify_settings.get(
|
||||||
|
'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
|
||||||
|
)
|
||||||
|
whitelist_protocols = markdownify_settings.get(
|
||||||
|
'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
|
||||||
|
)
|
||||||
|
strip = markdownify_settings.get('STRIP', True)
|
||||||
|
|
||||||
|
css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
|
||||||
|
allowed_css_properties=whitelist_styles
|
||||||
|
)
|
||||||
|
cleaner = bleach.Cleaner(
|
||||||
|
tags=whitelist_tags,
|
||||||
|
attributes=whitelist_attrs,
|
||||||
|
css_sanitizer=css_sanitizer,
|
||||||
|
protocols=whitelist_protocols,
|
||||||
|
strip=strip,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Clean the HTML content (for comparison). This must be the same as the original content
|
||||||
|
clean_html = cleaner.clean(html)
|
||||||
|
|
||||||
if html != clean_html:
|
if html != clean_html:
|
||||||
raise ValidationError(_('Data contains prohibited markdown content'))
|
raise ValidationError(_('Data contains prohibited markdown content'))
|
||||||
|
@ -159,6 +159,7 @@ class CompanyTest(InvenTreeAPITestCase):
|
|||||||
def test_company_notes(self):
|
def test_company_notes(self):
|
||||||
"""Test the markdown 'notes' field for the Company model."""
|
"""Test the markdown 'notes' field for the Company model."""
|
||||||
pk = Company.objects.first().pk
|
pk = Company.objects.first().pk
|
||||||
|
url = reverse('api-company-detail', kwargs={'pk': pk})
|
||||||
|
|
||||||
# Attempt to inject malicious markdown into the "notes" field
|
# Attempt to inject malicious markdown into the "notes" field
|
||||||
xss = [
|
xss = [
|
||||||
@ -168,16 +169,23 @@ class CompanyTest(InvenTreeAPITestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
for note in xss:
|
for note in xss:
|
||||||
response = self.patch(
|
response = self.patch(url, {'notes': note}, expected_code=400)
|
||||||
reverse('api-company-detail', kwargs={'pk': pk}),
|
|
||||||
{'notes': note},
|
|
||||||
expected_code=400,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
'Data contains prohibited markdown content', str(response.data)
|
'Data contains prohibited markdown content', str(response.data)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Tests with disallowed tags
|
||||||
|
invalid_tags = [
|
||||||
|
'<iframe src="javascript:alert(123)"></iframe>',
|
||||||
|
'<canvas>A disallowed tag!</canvas>',
|
||||||
|
]
|
||||||
|
|
||||||
|
for note in invalid_tags:
|
||||||
|
response = self.patch(url, {'notes': note}, expected_code=400)
|
||||||
|
|
||||||
|
self.assertIn('Remove HTML tags from this value', str(response.data))
|
||||||
|
|
||||||
# The following markdown is safe, and should be accepted
|
# The following markdown is safe, and should be accepted
|
||||||
good = [
|
good = [
|
||||||
'This is a **bold** statement',
|
'This is a **bold** statement',
|
||||||
@ -186,14 +194,11 @@ class CompanyTest(InvenTreeAPITestCase):
|
|||||||
'This is an ',
|
'This is an ',
|
||||||
'This is a `code` block',
|
'This is a `code` block',
|
||||||
'This text has ~~strikethrough~~ formatting',
|
'This text has ~~strikethrough~~ formatting',
|
||||||
|
'This text has a raw link - https://www.google.com - and should still pass the test',
|
||||||
]
|
]
|
||||||
|
|
||||||
for note in good:
|
for note in good:
|
||||||
response = self.patch(
|
response = self.patch(url, {'notes': note}, expected_code=200)
|
||||||
reverse('api-company-detail', kwargs={'pk': pk}),
|
|
||||||
{'notes': note},
|
|
||||||
expected_code=200,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(response.data['notes'], note)
|
self.assertEqual(response.data['notes'], note)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user