mirror of
				https://github.com/inventree/InvenTree.git
				synced 2025-10-31 13:15:43 +00:00 
			
		
		
		
	* Improve cleaning of markdown content
* Update unit test with new check
(cherry picked from commit cb0248d159)
Co-authored-by: Oliver <oliver.henry.walters@gmail.com>
			
			
This commit is contained in:
		
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							d485c6796b
						
					
				
				
					commit
					fab846e3cc
				
			| @@ -21,6 +21,7 @@ from django.core.files.storage import Storage, default_storage | |||||||
| from django.http import StreamingHttpResponse | from django.http import StreamingHttpResponse | ||||||
| from django.utils.translation import gettext_lazy as _ | from django.utils.translation import gettext_lazy as _ | ||||||
|  |  | ||||||
|  | import bleach | ||||||
| import pytz | import pytz | ||||||
| import regex | import regex | ||||||
| from bleach import clean | from bleach import clean | ||||||
| @@ -816,7 +817,6 @@ def clean_markdown(value: str): | |||||||
|     This function will remove javascript and other potentially harmful content from the markdown string. |     This function will remove javascript and other potentially harmful content from the markdown string. | ||||||
|     """ |     """ | ||||||
|     import markdown |     import markdown | ||||||
|     from markdownify.templatetags.markdownify import markdownify |  | ||||||
|  |  | ||||||
|     try: |     try: | ||||||
|         markdownify_settings = settings.MARKDOWNIFY['default'] |         markdownify_settings = settings.MARKDOWNIFY['default'] | ||||||
| @@ -835,8 +835,34 @@ def clean_markdown(value: str): | |||||||
|         output_format='html', |         output_format='html', | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|     # Clean the HTML content (for comparison). Ideally, this should be the same as the original content |     # Bleach settings | ||||||
|     clean_html = markdownify(value) |     whitelist_tags = markdownify_settings.get( | ||||||
|  |         'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS | ||||||
|  |     ) | ||||||
|  |     whitelist_attrs = markdownify_settings.get( | ||||||
|  |         'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES | ||||||
|  |     ) | ||||||
|  |     whitelist_styles = markdownify_settings.get( | ||||||
|  |         'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES | ||||||
|  |     ) | ||||||
|  |     whitelist_protocols = markdownify_settings.get( | ||||||
|  |         'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS | ||||||
|  |     ) | ||||||
|  |     strip = markdownify_settings.get('STRIP', True) | ||||||
|  |  | ||||||
|  |     css_sanitizer = bleach.css_sanitizer.CSSSanitizer( | ||||||
|  |         allowed_css_properties=whitelist_styles | ||||||
|  |     ) | ||||||
|  |     cleaner = bleach.Cleaner( | ||||||
|  |         tags=whitelist_tags, | ||||||
|  |         attributes=whitelist_attrs, | ||||||
|  |         css_sanitizer=css_sanitizer, | ||||||
|  |         protocols=whitelist_protocols, | ||||||
|  |         strip=strip, | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     # Clean the HTML content (for comparison). This must be the same as the original content | ||||||
|  |     clean_html = cleaner.clean(html) | ||||||
|  |  | ||||||
|     if html != clean_html: |     if html != clean_html: | ||||||
|         raise ValidationError(_('Data contains prohibited markdown content')) |         raise ValidationError(_('Data contains prohibited markdown content')) | ||||||
|   | |||||||
| @@ -159,6 +159,7 @@ class CompanyTest(InvenTreeAPITestCase): | |||||||
|     def test_company_notes(self): |     def test_company_notes(self): | ||||||
|         """Test the markdown 'notes' field for the Company model.""" |         """Test the markdown 'notes' field for the Company model.""" | ||||||
|         pk = Company.objects.first().pk |         pk = Company.objects.first().pk | ||||||
|  |         url = reverse('api-company-detail', kwargs={'pk': pk}) | ||||||
|  |  | ||||||
|         # Attempt to inject malicious markdown into the "notes" field |         # Attempt to inject malicious markdown into the "notes" field | ||||||
|         xss = [ |         xss = [ | ||||||
| @@ -168,16 +169,23 @@ class CompanyTest(InvenTreeAPITestCase): | |||||||
|         ] |         ] | ||||||
|  |  | ||||||
|         for note in xss: |         for note in xss: | ||||||
|             response = self.patch( |             response = self.patch(url, {'notes': note}, expected_code=400) | ||||||
|                 reverse('api-company-detail', kwargs={'pk': pk}), |  | ||||||
|                 {'notes': note}, |  | ||||||
|                 expected_code=400, |  | ||||||
|             ) |  | ||||||
|  |  | ||||||
|             self.assertIn( |             self.assertIn( | ||||||
|                 'Data contains prohibited markdown content', str(response.data) |                 'Data contains prohibited markdown content', str(response.data) | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|  |         # Tests with disallowed tags | ||||||
|  |         invalid_tags = [ | ||||||
|  |             '<iframe src="javascript:alert(123)"></iframe>', | ||||||
|  |             '<canvas>A disallowed tag!</canvas>', | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |         for note in invalid_tags: | ||||||
|  |             response = self.patch(url, {'notes': note}, expected_code=400) | ||||||
|  |  | ||||||
|  |             self.assertIn('Remove HTML tags from this value', str(response.data)) | ||||||
|  |  | ||||||
|         # The following markdown is safe, and should be accepted |         # The following markdown is safe, and should be accepted | ||||||
|         good = [ |         good = [ | ||||||
|             'This is a **bold** statement', |             'This is a **bold** statement', | ||||||
| @@ -186,14 +194,11 @@ class CompanyTest(InvenTreeAPITestCase): | |||||||
|             'This is an ', |             'This is an ', | ||||||
|             'This is a `code` block', |             'This is a `code` block', | ||||||
|             'This text has ~~strikethrough~~ formatting', |             'This text has ~~strikethrough~~ formatting', | ||||||
|  |             'This text has a raw link - https://www.google.com - and should still pass the test', | ||||||
|         ] |         ] | ||||||
|  |  | ||||||
|         for note in good: |         for note in good: | ||||||
|             response = self.patch( |             response = self.patch(url, {'notes': note}, expected_code=200) | ||||||
|                 reverse('api-company-detail', kwargs={'pk': pk}), |  | ||||||
|                 {'notes': note}, |  | ||||||
|                 expected_code=200, |  | ||||||
|             ) |  | ||||||
|  |  | ||||||
|             self.assertEqual(response.data['notes'], note) |             self.assertEqual(response.data['notes'], note) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user