2
0
mirror of https://github.com/inventree/InvenTree.git synced 2025-04-28 11:36:44 +00:00

Remove regex dependency (#8547)

* Remove regex from requirements file

* Fix order of operations

- Remove hidden chars before removing HTML tags

* Remove requirement for regex package

* Additional unit tests

* Remove debug msg

* Fix for newline removal

* Fix variable shadowing

* Defer import of PIL.Image
This commit is contained in:
Oliver 2024-11-26 20:29:51 +11:00 committed by GitHub
parent 9d2ca5e617
commit 368f3b7bd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 31 additions and 122 deletions

View File

@ -23,10 +23,8 @@ from django.utils.translation import gettext_lazy as _
import bleach
import pytz
import regex
from bleach import clean
from djmoney.money import Money
from PIL import Image
from common.currency import currency_code_default
@ -140,6 +138,8 @@ def getStaticUrl(filename):
def TestIfImage(img):
"""Test if an image file is indeed an image."""
from PIL import Image
try:
Image.open(img).verify()
return True
@ -781,6 +781,8 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
If raise_error is True, a ValidationError will be thrown if HTML tags are detected
"""
value = str(value).strip()
cleaned = clean(value, strip=True, tags=[], attributes=[])
# Add escaped characters back in
@ -792,39 +794,32 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
# If the length changed, it means that HTML tags were removed!
if len(cleaned) != len(value) and raise_error:
field = field_name or 'non_field_errors'
raise ValidationError({field: [_('Remove HTML tags from this value')]})
return cleaned
def remove_non_printable_characters(
value: str, remove_newline=True, remove_ascii=True, remove_unicode=True
):
def remove_non_printable_characters(value: str, remove_newline=True) -> str:
"""Remove non-printable / control characters from the provided string."""
cleaned = value
if remove_ascii:
# Remove ASCII control characters
# Note that we do not sub out 0x0A (\n) here, it is done separately below
cleaned = regex.sub('[\x00-\x09]+', '', cleaned)
cleaned = regex.sub('[\x0b-\x1f\x7f]+', '', cleaned)
regex = re.compile(r'[\u0000-\u0009\u000B-\u001F\u007F-\u009F]')
cleaned = regex.sub('', cleaned)
if remove_newline:
cleaned = regex.sub('[\x0a]+', '', cleaned)
if remove_unicode:
# Remove Unicode control characters
regex = re.compile(r'[\u200E\u200F\u202A-\u202E]')
cleaned = regex.sub('', cleaned)
if remove_newline:
cleaned = regex.sub(r'[^\P{C}]+', '', cleaned)
else:
# Use 'negative-lookahead' to exclude newline character
cleaned = regex.sub('(?![\x0a])[^\\P{C}]+', '', cleaned)
regex = re.compile(r'[\x0A]')
cleaned = regex.sub('', cleaned)
return cleaned
def clean_markdown(value: str):
def clean_markdown(value: str) -> str:
"""Clean a markdown string.
This function will remove javascript and other potentially harmful content from the markdown string.
@ -883,7 +878,7 @@ def clean_markdown(value: str):
return value
def hash_barcode(barcode_data):
def hash_barcode(barcode_data: str) -> str:
"""Calculate a 'unique' hash for a barcode string.
This hash is used for comparison / lookup.

View File

@ -57,7 +57,7 @@ class CleanMixin:
Ref: https://github.com/mozilla/bleach/issues/192
"""
cleaned = strip_html_tags(data, field_name=field)
cleaned = data
# By default, newline characters are removed
remove_newline = True
@ -66,13 +66,13 @@ class CleanMixin:
try:
if hasattr(self, 'serializer_class'):
model = self.serializer_class.Meta.model
field = model._meta.get_field(field)
field_base = model._meta.get_field(field)
# The following field types allow newline characters
allow_newline = [(InvenTreeNotesField, True)]
for field_type in allow_newline:
if issubclass(type(field), field_type[0]):
if issubclass(type(field_base), field_type[0]):
remove_newline = False
is_markdown = field_type[1]
break
@ -86,6 +86,8 @@ class CleanMixin:
cleaned, remove_newline=remove_newline
)
cleaned = strip_html_tags(cleaned, field_name=field)
if is_markdown:
cleaned = clean_markdown(cleaned)

View File

@ -281,6 +281,15 @@ class PartCategoryAPITest(InvenTreeAPITestCase):
'A\t part\t category\t',
'A pa\rrt cat\r\r\regory',
'A part\u200e catego\u200fry\u202e',
'A\u0000 part\u0000 category',
'A part\u0007 category',
'A\u001f part category',
'A part\u007f category',
'\u0001A part category',
'A part\u0085 category',
'A part category\u200e',
'A part cat\u200fegory',
'A\u0006 part\u007f categ\nory\r',
]
for val in values:

View File

@ -48,7 +48,6 @@ python-dotenv # Environment variable management
pyyaml>=6.0.1 # YAML parsing
qrcode[pil] # QR code generator
rapidfuzz # Fuzzy string matching
regex # Advanced regular expressions
sentry-sdk # Error reporting (optional)
setuptools # Standard dependency
tablib[xls,xlsx,yaml] # Support for XLS and XLSX formats

View File

@ -1460,102 +1460,6 @@ referencing==0.35.1 \
# via
# jsonschema
# jsonschema-specifications
regex==2024.11.6 \
--hash=sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c \
--hash=sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60 \
--hash=sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d \
--hash=sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d \
--hash=sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67 \
--hash=sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773 \
--hash=sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0 \
--hash=sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef \
--hash=sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad \
--hash=sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe \
--hash=sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3 \
--hash=sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114 \
--hash=sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4 \
--hash=sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39 \
--hash=sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e \
--hash=sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3 \
--hash=sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7 \
--hash=sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d \
--hash=sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e \
--hash=sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a \
--hash=sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7 \
--hash=sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f \
--hash=sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0 \
--hash=sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54 \
--hash=sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b \
--hash=sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c \
--hash=sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd \
--hash=sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57 \
--hash=sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34 \
--hash=sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d \
--hash=sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f \
--hash=sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b \
--hash=sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519 \
--hash=sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4 \
--hash=sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a \
--hash=sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638 \
--hash=sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b \
--hash=sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839 \
--hash=sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07 \
--hash=sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf \
--hash=sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff \
--hash=sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0 \
--hash=sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f \
--hash=sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95 \
--hash=sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4 \
--hash=sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e \
--hash=sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13 \
--hash=sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519 \
--hash=sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2 \
--hash=sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008 \
--hash=sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9 \
--hash=sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc \
--hash=sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48 \
--hash=sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20 \
--hash=sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89 \
--hash=sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e \
--hash=sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf \
--hash=sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b \
--hash=sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd \
--hash=sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84 \
--hash=sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29 \
--hash=sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b \
--hash=sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3 \
--hash=sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45 \
--hash=sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3 \
--hash=sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983 \
--hash=sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e \
--hash=sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7 \
--hash=sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4 \
--hash=sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e \
--hash=sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467 \
--hash=sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577 \
--hash=sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001 \
--hash=sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0 \
--hash=sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55 \
--hash=sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9 \
--hash=sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf \
--hash=sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6 \
--hash=sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e \
--hash=sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde \
--hash=sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62 \
--hash=sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df \
--hash=sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51 \
--hash=sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5 \
--hash=sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86 \
--hash=sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2 \
--hash=sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2 \
--hash=sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0 \
--hash=sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c \
--hash=sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f \
--hash=sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6 \
--hash=sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2 \
--hash=sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9 \
--hash=sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91
# via -r src/backend/requirements.in
requests==2.32.3 \
--hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
--hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6