2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-04-28 11:36:44 +00:00

Remove regex dependency (#8547)

* Remove regex from requirements file

* Fix order of operations

- Remove hidden chars before removing HTML tags

* Remove requirement for regex package

* Additional unit tests

* Remove debug msg

* Fix for newline removal

* Fix variable shadowing

* Defer import of PIL.Image
This commit is contained in:
Oliver 2024-11-26 20:29:51 +11:00 committed by GitHub
parent 9d2ca5e617
commit 368f3b7bd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 31 additions and 122 deletions

View File

@ -23,10 +23,8 @@ from django.utils.translation import gettext_lazy as _
import bleach import bleach
import pytz import pytz
import regex
from bleach import clean from bleach import clean
from djmoney.money import Money from djmoney.money import Money
from PIL import Image
from common.currency import currency_code_default from common.currency import currency_code_default
@ -140,6 +138,8 @@ def getStaticUrl(filename):
def TestIfImage(img): def TestIfImage(img):
"""Test if an image file is indeed an image.""" """Test if an image file is indeed an image."""
from PIL import Image
try: try:
Image.open(img).verify() Image.open(img).verify()
return True return True
@ -781,6 +781,8 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
If raise_error is True, a ValidationError will be thrown if HTML tags are detected If raise_error is True, a ValidationError will be thrown if HTML tags are detected
""" """
value = str(value).strip()
cleaned = clean(value, strip=True, tags=[], attributes=[]) cleaned = clean(value, strip=True, tags=[], attributes=[])
# Add escaped characters back in # Add escaped characters back in
@ -792,39 +794,32 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
# If the length changed, it means that HTML tags were removed! # If the length changed, it means that HTML tags were removed!
if len(cleaned) != len(value) and raise_error: if len(cleaned) != len(value) and raise_error:
field = field_name or 'non_field_errors' field = field_name or 'non_field_errors'
raise ValidationError({field: [_('Remove HTML tags from this value')]}) raise ValidationError({field: [_('Remove HTML tags from this value')]})
return cleaned return cleaned
def remove_non_printable_characters( def remove_non_printable_characters(value: str, remove_newline=True) -> str:
value: str, remove_newline=True, remove_ascii=True, remove_unicode=True
):
"""Remove non-printable / control characters from the provided string.""" """Remove non-printable / control characters from the provided string."""
cleaned = value cleaned = value
if remove_ascii: # Remove ASCII control characters
# Remove ASCII control characters # Note that we do not sub out 0x0A (\n) here, it is done separately below
# Note that we do not sub out 0x0A (\n) here, it is done separately below regex = re.compile(r'[\u0000-\u0009\u000B-\u001F\u007F-\u009F]')
cleaned = regex.sub('[\x00-\x09]+', '', cleaned) cleaned = regex.sub('', cleaned)
cleaned = regex.sub('[\x0b-\x1f\x7f]+', '', cleaned)
# Remove Unicode control characters
regex = re.compile(r'[\u200E\u200F\u202A-\u202E]')
cleaned = regex.sub('', cleaned)
if remove_newline: if remove_newline:
cleaned = regex.sub('[\x0a]+', '', cleaned) regex = re.compile(r'[\x0A]')
cleaned = regex.sub('', cleaned)
if remove_unicode:
# Remove Unicode control characters
if remove_newline:
cleaned = regex.sub(r'[^\P{C}]+', '', cleaned)
else:
# Use 'negative-lookahead' to exclude newline character
cleaned = regex.sub('(?![\x0a])[^\\P{C}]+', '', cleaned)
return cleaned return cleaned
def clean_markdown(value: str): def clean_markdown(value: str) -> str:
"""Clean a markdown string. """Clean a markdown string.
This function will remove javascript and other potentially harmful content from the markdown string. This function will remove javascript and other potentially harmful content from the markdown string.
@ -883,7 +878,7 @@ def clean_markdown(value: str):
return value return value
def hash_barcode(barcode_data): def hash_barcode(barcode_data: str) -> str:
"""Calculate a 'unique' hash for a barcode string. """Calculate a 'unique' hash for a barcode string.
This hash is used for comparison / lookup. This hash is used for comparison / lookup.

View File

@ -57,7 +57,7 @@ class CleanMixin:
Ref: https://github.com/mozilla/bleach/issues/192 Ref: https://github.com/mozilla/bleach/issues/192
""" """
cleaned = strip_html_tags(data, field_name=field) cleaned = data
# By default, newline characters are removed # By default, newline characters are removed
remove_newline = True remove_newline = True
@ -66,13 +66,13 @@ class CleanMixin:
try: try:
if hasattr(self, 'serializer_class'): if hasattr(self, 'serializer_class'):
model = self.serializer_class.Meta.model model = self.serializer_class.Meta.model
field = model._meta.get_field(field) field_base = model._meta.get_field(field)
# The following field types allow newline characters # The following field types allow newline characters
allow_newline = [(InvenTreeNotesField, True)] allow_newline = [(InvenTreeNotesField, True)]
for field_type in allow_newline: for field_type in allow_newline:
if issubclass(type(field), field_type[0]): if issubclass(type(field_base), field_type[0]):
remove_newline = False remove_newline = False
is_markdown = field_type[1] is_markdown = field_type[1]
break break
@ -86,6 +86,8 @@ class CleanMixin:
cleaned, remove_newline=remove_newline cleaned, remove_newline=remove_newline
) )
cleaned = strip_html_tags(cleaned, field_name=field)
if is_markdown: if is_markdown:
cleaned = clean_markdown(cleaned) cleaned = clean_markdown(cleaned)

View File

@ -281,6 +281,15 @@ class PartCategoryAPITest(InvenTreeAPITestCase):
'A\t part\t category\t', 'A\t part\t category\t',
'A pa\rrt cat\r\r\regory', 'A pa\rrt cat\r\r\regory',
'A part\u200e catego\u200fry\u202e', 'A part\u200e catego\u200fry\u202e',
'A\u0000 part\u0000 category',
'A part\u0007 category',
'A\u001f part category',
'A part\u007f category',
'\u0001A part category',
'A part\u0085 category',
'A part category\u200e',
'A part cat\u200fegory',
'A\u0006 part\u007f categ\nory\r',
] ]
for val in values: for val in values:

View File

@ -48,7 +48,6 @@ python-dotenv # Environment variable management
pyyaml>=6.0.1 # YAML parsing pyyaml>=6.0.1 # YAML parsing
qrcode[pil] # QR code generator qrcode[pil] # QR code generator
rapidfuzz # Fuzzy string matching rapidfuzz # Fuzzy string matching
regex # Advanced regular expressions
sentry-sdk # Error reporting (optional) sentry-sdk # Error reporting (optional)
setuptools # Standard dependency setuptools # Standard dependency
tablib[xls,xlsx,yaml] # Support for XLS and XLSX formats tablib[xls,xlsx,yaml] # Support for XLS and XLSX formats

View File

@ -1460,102 +1460,6 @@ referencing==0.35.1 \
# via # via
# jsonschema # jsonschema
# jsonschema-specifications # jsonschema-specifications
regex==2024.11.6 \
--hash=sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c \
--hash=sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60 \
--hash=sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d \
--hash=sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d \
--hash=sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67 \
--hash=sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773 \
--hash=sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0 \
--hash=sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef \
--hash=sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad \
--hash=sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe \
--hash=sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3 \
--hash=sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114 \
--hash=sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4 \
--hash=sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39 \
--hash=sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e \
--hash=sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3 \
--hash=sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7 \
--hash=sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d \
--hash=sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e \
--hash=sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a \
--hash=sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7 \
--hash=sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f \
--hash=sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0 \
--hash=sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54 \
--hash=sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b \
--hash=sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c \
--hash=sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd \
--hash=sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57 \
--hash=sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34 \
--hash=sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d \
--hash=sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f \
--hash=sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b \
--hash=sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519 \
--hash=sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4 \
--hash=sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a \
--hash=sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638 \
--hash=sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b \
--hash=sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839 \
--hash=sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07 \
--hash=sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf \
--hash=sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff \
--hash=sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0 \
--hash=sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f \
--hash=sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95 \
--hash=sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4 \
--hash=sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e \
--hash=sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13 \
--hash=sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519 \
--hash=sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2 \
--hash=sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008 \
--hash=sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9 \
--hash=sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc \
--hash=sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48 \
--hash=sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20 \
--hash=sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89 \
--hash=sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e \
--hash=sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf \
--hash=sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b \
--hash=sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd \
--hash=sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84 \
--hash=sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29 \
--hash=sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b \
--hash=sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3 \
--hash=sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45 \
--hash=sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3 \
--hash=sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983 \
--hash=sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e \
--hash=sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7 \
--hash=sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4 \
--hash=sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e \
--hash=sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467 \
--hash=sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577 \
--hash=sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001 \
--hash=sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0 \
--hash=sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55 \
--hash=sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9 \
--hash=sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf \
--hash=sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6 \
--hash=sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e \
--hash=sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde \
--hash=sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62 \
--hash=sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df \
--hash=sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51 \
--hash=sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5 \
--hash=sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86 \
--hash=sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2 \
--hash=sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2 \
--hash=sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0 \
--hash=sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c \
--hash=sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f \
--hash=sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6 \
--hash=sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2 \
--hash=sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9 \
--hash=sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91
# via -r src/backend/requirements.in
requests==2.32.3 \ requests==2.32.3 \
--hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
--hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6