mirror of
https://github.com/inventree/InvenTree.git
synced 2025-04-29 20:16:44 +00:00
185 lines
4.8 KiB
Python
185 lines
4.8 KiB
Python
"""
|
|
Files management tools.
|
|
"""
|
|
|
|
from rapidfuzz import fuzz
|
|
import tablib
|
|
import os
|
|
|
|
from django.utils.translation import gettext_lazy as _
|
|
from django.core.exceptions import ValidationError
|
|
|
|
# from company.models import ManufacturerPart, SupplierPart
|
|
|
|
|
|
class FileManager:
|
|
""" Class for managing an uploaded file """
|
|
|
|
name = ''
|
|
|
|
# Fields which are absolutely necessary for valid upload
|
|
REQUIRED_HEADERS = [
|
|
'Quantity'
|
|
]
|
|
|
|
# Fields which are used for part matching (only one of them is needed)
|
|
PART_MATCH_HEADERS = [
|
|
'Part_Name',
|
|
'Part_IPN',
|
|
'Part_ID',
|
|
]
|
|
|
|
# Fields which would be helpful but are not required
|
|
OPTIONAL_HEADERS = [
|
|
]
|
|
|
|
EDITABLE_HEADERS = [
|
|
]
|
|
|
|
HEADERS = REQUIRED_HEADERS + PART_MATCH_HEADERS + OPTIONAL_HEADERS
|
|
|
|
def __init__(self, file, name=None):
|
|
""" Initialize the FileManager class with a user-uploaded file object """
|
|
|
|
# Set name
|
|
if name:
|
|
self.name = name
|
|
|
|
# Process initial file
|
|
self.process(file)
|
|
|
|
def process(self, file):
|
|
""" Process file """
|
|
|
|
self.data = None
|
|
|
|
ext = os.path.splitext(file.name)[-1].lower()
|
|
|
|
if ext in ['.csv', '.tsv', ]:
|
|
# These file formats need string decoding
|
|
raw_data = file.read().decode('utf-8')
|
|
elif ext in ['.xls', '.xlsx']:
|
|
raw_data = file.read()
|
|
else:
|
|
raise ValidationError(_(f'Unsupported file format: {ext}'))
|
|
|
|
try:
|
|
self.data = tablib.Dataset().load(raw_data)
|
|
except tablib.UnsupportedFormat:
|
|
raise ValidationError(_(f'Error reading {self.name} file (invalid format)'))
|
|
except tablib.core.InvalidDimensions:
|
|
raise ValidationError(_(f'Error reading {self.name} file (incorrect dimension)'))
|
|
|
|
def guess_header(self, header, threshold=80):
|
|
""" Try to match a header (from the file) to a list of known headers
|
|
|
|
Args:
|
|
header - Header name to look for
|
|
threshold - Match threshold for fuzzy search
|
|
"""
|
|
|
|
# Try for an exact match
|
|
for h in self.HEADERS:
|
|
if h == header:
|
|
return h
|
|
|
|
# Try for a case-insensitive match
|
|
for h in self.HEADERS:
|
|
if h.lower() == header.lower():
|
|
return h
|
|
|
|
# Try for a case-insensitive match with space replacement
|
|
for h in self.HEADERS:
|
|
if h.lower() == header.lower().replace(' ', '_'):
|
|
return h
|
|
|
|
# Finally, look for a close match using fuzzy matching
|
|
matches = []
|
|
|
|
for h in self.HEADERS:
|
|
ratio = fuzz.partial_ratio(header, h)
|
|
if ratio > threshold:
|
|
matches.append({'header': h, 'match': ratio})
|
|
|
|
if len(matches) > 0:
|
|
matches = sorted(matches, key=lambda item: item['match'], reverse=True)
|
|
return matches[0]['header']
|
|
|
|
return None
|
|
|
|
def columns(self):
|
|
""" Return a list of headers for the thingy """
|
|
headers = []
|
|
|
|
for header in self.data.headers:
|
|
headers.append({
|
|
'name': header,
|
|
'guess': self.guess_header(header)
|
|
})
|
|
|
|
return headers
|
|
|
|
def col_count(self):
|
|
if self.data is None:
|
|
return 0
|
|
|
|
return len(self.data.headers)
|
|
|
|
def row_count(self):
|
|
""" Return the number of rows in the file. """
|
|
|
|
if self.data is None:
|
|
return 0
|
|
|
|
return len(self.data)
|
|
|
|
def rows(self):
|
|
""" Return a list of all rows """
|
|
rows = []
|
|
|
|
for i in range(self.row_count()):
|
|
|
|
data = [item for item in self.get_row_data(i)]
|
|
|
|
# Is the row completely empty? Skip!
|
|
empty = True
|
|
|
|
for idx, item in enumerate(data):
|
|
if len(str(item).strip()) > 0:
|
|
empty = False
|
|
|
|
try:
|
|
# Excel import casts number-looking-items into floats, which is annoying
|
|
if item == int(item) and not str(item) == str(int(item)):
|
|
data[idx] = int(item)
|
|
except ValueError:
|
|
pass
|
|
|
|
# Skip empty rows
|
|
if empty:
|
|
continue
|
|
|
|
row = {
|
|
'data': data,
|
|
'index': i
|
|
}
|
|
|
|
rows.append(row)
|
|
|
|
return rows
|
|
|
|
def get_row_data(self, index):
|
|
""" Retrieve row data at a particular index """
|
|
if self.data is None or index >= len(self.data):
|
|
return None
|
|
|
|
return self.data[index]
|
|
|
|
def get_row_dict(self, index):
|
|
""" Retrieve a dict object representing the data row at a particular offset """
|
|
|
|
if self.data is None or index >= len(self.data):
|
|
return None
|
|
|
|
return self.data.dict[index]
|