mirror of
https://github.com/inventree/InvenTree.git
synced 2025-04-30 20:46:47 +00:00
* Squashed commit of the following: commit f5cf7b2e7872fc19633321713965763d1890b495 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:36:57 2024 +0100 fixed reqs commit 9d845bee98befa4e53c2ac3c783bd704369e3ad2 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:32:35 2024 +0100 disable autofix/format commit aff5f271484c3500df7ddde043767c008ce4af21 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:28:50 2024 +0100 adjust checks commit 47271cf1efa848ec8374a0d83b5646d06fffa6e7 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:28:22 2024 +0100 reorder order of operations commit e1bf178b40b3f0d2d59ba92209156c43095959d2 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:01:09 2024 +0100 adapted ruff settings to better fit code base commit ad7d88a6f4f15c9552522131c4e207256fc2bbf6 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:59:45 2024 +0100 auto fixed docstring commit a2e54a760e17932dbbc2de0dec23906107f2cda9 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:46:35 2024 +0100 fix getattr useage commit cb80c73bc6c0be7f5d2ed3cc9b2ac03fdefd5c41 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:25:09 2024 +0100 fix requirements file commit b7780bbd21a32007f3b0ce495b519bf59bb19bf5 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:42:28 2024 +0100 fix removed sections commit 71f1681f55c15f62c16c1d7f30a745adc496db97 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:41:21 2024 +0100 fix djlint syntax commit a0bcf1bccef8a8ffd482f38e2063bc9066e1d759 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:35:28 2024 +0100 remove flake8 from code base commit 22475b31cc06919785be046e007915e43f356793 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:34:56 2024 +0100 remove flake8 from code base commit 0413350f14773ac6161473e0cfb069713c13c691 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:24:39 2024 +0100 moved ruff section commit d90c48a0bf98befdfacbbb093ee56cdb28afb40d Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:24:24 2024 +0100 move djlint config to pyproject commit c5ce55d5119bf2e35e429986f62f875c86178ae1 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:20:39 2024 +0100 added isort again commit 42a41d23afc280d4ee6f0e640148abc6f460f05a Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:19:02 2024 +0100 move config section commit 85692331816348cb1145570340d1f6488a8265cc Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:17:52 2024 +0100 fix codespell error commit 2897c6704d1311a800ce5aa47878d96d6980b377 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 17:29:21 2024 +0100 replaced flake8 with ruff mostly for speed improvements * enable autoformat * added autofixes * switched to single quotes everywhere * switched to ruff for import sorting * fix wrong url response * switched to pathlib for lookup * fixed lookup * Squashed commit of the following: commit d3b795824b5d6d1c0eda67150b45b5cd672b3f6b Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 22:56:17 2024 +0100 fixed source path commit 0bac0c19b88897a19d5c995e4ff50427718b827e Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 22:47:53 2024 +0100 fixed req commit 9f61f01d9cc01f1fb7123102f3658c890469b8ce Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 22:45:18 2024 +0100 added missing toml req commit 91b71ed24a6761b629768d0ad8829fec2819a966 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:49:50 2024 +0100 moved isort config commit 12460b04196b12d0272d40552402476d5492fea5 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:43:22 2024 +0100 remove flake8 section from setup.cfg commit f5cf7b2e7872fc19633321713965763d1890b495 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:36:57 2024 +0100 fixed reqs commit 9d845bee98befa4e53c2ac3c783bd704369e3ad2 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:32:35 2024 +0100 disable autofix/format commit aff5f271484c3500df7ddde043767c008ce4af21 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:28:50 2024 +0100 adjust checks commit 47271cf1efa848ec8374a0d83b5646d06fffa6e7 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:28:22 2024 +0100 reorder order of operations commit e1bf178b40b3f0d2d59ba92209156c43095959d2 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 20:01:09 2024 +0100 adapted ruff settings to better fit code base commit ad7d88a6f4f15c9552522131c4e207256fc2bbf6 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:59:45 2024 +0100 auto fixed docstring commit a2e54a760e17932dbbc2de0dec23906107f2cda9 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:46:35 2024 +0100 fix getattr useage commit cb80c73bc6c0be7f5d2ed3cc9b2ac03fdefd5c41 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 19:25:09 2024 +0100 fix requirements file commit b7780bbd21a32007f3b0ce495b519bf59bb19bf5 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:42:28 2024 +0100 fix removed sections commit 71f1681f55c15f62c16c1d7f30a745adc496db97 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:41:21 2024 +0100 fix djlint syntax commit a0bcf1bccef8a8ffd482f38e2063bc9066e1d759 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:35:28 2024 +0100 remove flake8 from code base commit 22475b31cc06919785be046e007915e43f356793 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:34:56 2024 +0100 remove flake8 from code base commit 0413350f14773ac6161473e0cfb069713c13c691 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:24:39 2024 +0100 moved ruff section commit d90c48a0bf98befdfacbbb093ee56cdb28afb40d Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:24:24 2024 +0100 move djlint config to pyproject commit c5ce55d5119bf2e35e429986f62f875c86178ae1 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:20:39 2024 +0100 added isort again commit 42a41d23afc280d4ee6f0e640148abc6f460f05a Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:19:02 2024 +0100 move config section commit 85692331816348cb1145570340d1f6488a8265cc Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 18:17:52 2024 +0100 fix codespell error commit 2897c6704d1311a800ce5aa47878d96d6980b377 Author: Matthias Mair <code@mjmair.com> Date: Sun Jan 7 17:29:21 2024 +0100 replaced flake8 with ruff mostly for speed improvements * fix coverage souce format --------- Co-authored-by: Oliver Walters <oliver.henry.walters@gmail.com>
223 lines
6.4 KiB
Python
223 lines
6.4 KiB
Python
"""Files management tools."""
|
|
|
|
import os
|
|
|
|
from django.core.exceptions import ValidationError
|
|
from django.utils.translation import gettext_lazy as _
|
|
|
|
import tablib
|
|
from rapidfuzz import fuzz
|
|
|
|
|
|
class FileManager:
|
|
"""Class for managing an uploaded file."""
|
|
|
|
name = ''
|
|
|
|
# Fields which are absolutely necessary for valid upload
|
|
REQUIRED_HEADERS = []
|
|
|
|
# Fields which are used for item matching (only one of them is needed)
|
|
ITEM_MATCH_HEADERS = []
|
|
|
|
# Fields which would be helpful but are not required
|
|
OPTIONAL_HEADERS = []
|
|
|
|
OPTIONAL_MATCH_HEADERS = []
|
|
|
|
EDITABLE_HEADERS = []
|
|
|
|
HEADERS = []
|
|
|
|
def __init__(self, file, name=None):
|
|
"""Initialize the FileManager class with a user-uploaded file object."""
|
|
# Set name
|
|
if name:
|
|
self.name = name
|
|
|
|
# Process initial file
|
|
self.process(file)
|
|
|
|
# Update headers
|
|
self.update_headers()
|
|
|
|
@classmethod
|
|
def validate(cls, file):
|
|
"""Validate file extension and data."""
|
|
cleaned_data = None
|
|
|
|
ext = os.path.splitext(file.name)[-1].lower().replace('.', '')
|
|
|
|
try:
|
|
if ext in ['csv', 'tsv']:
|
|
# These file formats need string decoding
|
|
raw_data = file.read().decode('utf-8')
|
|
# Reset stream position to beginning of file
|
|
file.seek(0)
|
|
elif ext in ['xls', 'xlsx', 'json', 'yaml']:
|
|
raw_data = file.read()
|
|
# Reset stream position to beginning of file
|
|
file.seek(0)
|
|
else:
|
|
fmt = ext.upper()
|
|
raise ValidationError(_(f'Unsupported file format: {fmt}'))
|
|
except UnicodeEncodeError:
|
|
raise ValidationError(_('Error reading file (invalid encoding)'))
|
|
|
|
try:
|
|
cleaned_data = tablib.Dataset().load(raw_data, format=ext)
|
|
except tablib.UnsupportedFormat:
|
|
raise ValidationError(_('Error reading file (invalid format)'))
|
|
except tablib.core.InvalidDimensions:
|
|
raise ValidationError(_('Error reading file (incorrect dimension)'))
|
|
except KeyError:
|
|
raise ValidationError(_('Error reading file (data could be corrupted)'))
|
|
|
|
return cleaned_data
|
|
|
|
def process(self, file):
|
|
"""Process file."""
|
|
self.data = self.__class__.validate(file)
|
|
|
|
def update_headers(self):
|
|
"""Update headers."""
|
|
self.HEADERS = (
|
|
self.REQUIRED_HEADERS
|
|
+ self.ITEM_MATCH_HEADERS
|
|
+ self.OPTIONAL_MATCH_HEADERS
|
|
+ self.OPTIONAL_HEADERS
|
|
)
|
|
|
|
def setup(self):
|
|
"""Setup headers should be overridden in usage to set the Different Headers."""
|
|
if not self.name:
|
|
return
|
|
|
|
# Update headers
|
|
self.update_headers()
|
|
|
|
def guess_header(self, header, threshold=80):
|
|
"""Try to match a header (from the file) to a list of known headers.
|
|
|
|
Args:
|
|
header (Any): Header name to look for
|
|
threshold (int, optional): Match threshold for fuzzy search. Defaults to 80.
|
|
|
|
Returns:
|
|
Any: Matched headers
|
|
"""
|
|
# Replace null values with empty string
|
|
if header is None:
|
|
header = ''
|
|
|
|
# Try for an exact match
|
|
for h in self.HEADERS:
|
|
if h == header:
|
|
return h
|
|
|
|
# Try for a case-insensitive match
|
|
for h in self.HEADERS:
|
|
if h.lower() == header.lower():
|
|
return h
|
|
|
|
# Try for a case-insensitive match with space replacement
|
|
for h in self.HEADERS:
|
|
if h.lower() == header.lower().replace(' ', '_'):
|
|
return h
|
|
|
|
# Finally, look for a close match using fuzzy matching
|
|
matches = []
|
|
|
|
for h in self.HEADERS:
|
|
ratio = fuzz.partial_ratio(header, h)
|
|
if ratio > threshold:
|
|
matches.append({'header': h, 'match': ratio})
|
|
|
|
if len(matches) > 0:
|
|
matches = sorted(matches, key=lambda item: item['match'], reverse=True)
|
|
return matches[0]['header']
|
|
|
|
return None
|
|
|
|
def columns(self):
|
|
"""Return a list of headers for the thingy."""
|
|
headers = []
|
|
|
|
for header in self.data.headers:
|
|
# Guess header
|
|
guess = self.guess_header(header, threshold=95)
|
|
# Check if already present
|
|
guess_exists = False
|
|
for _idx, data in enumerate(headers):
|
|
if guess == data['guess']:
|
|
guess_exists = True
|
|
break
|
|
|
|
if not guess_exists:
|
|
headers.append({'name': header, 'guess': guess})
|
|
else:
|
|
headers.append({'name': header, 'guess': None})
|
|
|
|
return headers
|
|
|
|
def col_count(self):
|
|
"""Return the number of columns in the file."""
|
|
if self.data is None:
|
|
return 0
|
|
|
|
return len(self.data.headers)
|
|
|
|
def row_count(self):
|
|
"""Return the number of rows in the file."""
|
|
if self.data is None:
|
|
return 0
|
|
|
|
return len(self.data)
|
|
|
|
def rows(self):
|
|
"""Return a list of all rows."""
|
|
rows = []
|
|
|
|
for i in range(self.row_count()):
|
|
data = list(self.get_row_data(i))
|
|
|
|
# Is the row completely empty? Skip!
|
|
empty = True
|
|
|
|
for idx, item in enumerate(data):
|
|
if len(str(item).strip()) > 0:
|
|
empty = False
|
|
|
|
try:
|
|
# Excel import casts number-looking-items into floats, which is annoying
|
|
if item == int(item) and str(item) != str(int(item)):
|
|
data[idx] = int(item)
|
|
except ValueError:
|
|
pass
|
|
except TypeError:
|
|
data[idx] = ''
|
|
|
|
# Skip empty rows
|
|
if empty:
|
|
continue
|
|
|
|
row = {'data': data, 'index': i}
|
|
|
|
rows.append(row)
|
|
|
|
return rows
|
|
|
|
def get_row_data(self, index):
|
|
"""Retrieve row data at a particular index."""
|
|
if self.data is None or index >= len(self.data):
|
|
return None
|
|
|
|
return self.data[index]
|
|
|
|
def get_row_dict(self, index):
|
|
"""Retrieve a dict object representing the data row at a particular offset."""
|
|
if self.data is None or index >= len(self.data):
|
|
return None
|
|
|
|
return self.data.dict[index]
|