mirror of
				https://github.com/inventree/InvenTree.git
				synced 2025-10-30 20:55:42 +00:00 
			
		
		
		
	Adds functionality to map file columns to model fiels
This commit is contained in:
		| @@ -343,6 +343,9 @@ class DataFileUploadSerializer(serializers.Serializer): | ||||
|     - Extracts data rows | ||||
|     """ | ||||
|  | ||||
|     # Implementing class should register a target model (database model) to be used for import | ||||
|     TARGET_MODEL = None | ||||
|  | ||||
|     class Meta: | ||||
|         fields = [ | ||||
|             'bom_file', | ||||
| @@ -400,18 +403,81 @@ class DataFileUploadSerializer(serializers.Serializer): | ||||
|         except Exception as e: | ||||
|             raise serializers.ValidationError(str(e)) | ||||
|  | ||||
|         if len(self.dataset.headers) == 0: | ||||
|             raise serializers.ValidationError(_("No columns found in file")) | ||||
|  | ||||
|         if len(self.dataset) == 0: | ||||
|             raise serializers.ValidationError(_("No data rows found in file")) | ||||
|  | ||||
|         return data_file | ||||
|  | ||||
|     def match_column(self, column_name, field_names): | ||||
|         """ | ||||
|         Attempt to match a column name (from the file) to a field (defined in the model) | ||||
|  | ||||
|         Order of matching is: | ||||
|         - Direct match | ||||
|         - Case insensitive match | ||||
|         - Fuzzy match | ||||
|         """ | ||||
|  | ||||
|         column_name = column_name.strip() | ||||
|  | ||||
|         column_name_lower = column_name.lower() | ||||
|  | ||||
|         if column_name in field_names: | ||||
|             return column_name | ||||
|  | ||||
|         for field_name in field_names: | ||||
|             if field_name.lower() == column_name_lower: | ||||
|                 return field_name | ||||
|  | ||||
|         # TODO: Fuzzy pattern matching | ||||
|  | ||||
|         # No matches found | ||||
|         return None | ||||
|  | ||||
|  | ||||
|     def extract_data(self): | ||||
|         """ | ||||
|         Returns dataset extracted from the file | ||||
|         """ | ||||
|  | ||||
|         # Provide a dict of available import fields for the model | ||||
|         model_fields = {} | ||||
|  | ||||
|         # Keep track of columns we have already extracted | ||||
|         matched_columns = set() | ||||
|  | ||||
|         if self.TARGET_MODEL: | ||||
|             try: | ||||
|                 model_fields = self.TARGET_MODEL.get_import_fields() | ||||
|             except: | ||||
|                 pass | ||||
|  | ||||
|         # Extract a list of valid model field names | ||||
|         model_field_names = [key for key in model_fields.keys()] | ||||
|  | ||||
|         # Provide a dict of available columns from the dataset | ||||
|         file_columns = {} | ||||
|  | ||||
|         for header in self.dataset.headers: | ||||
|             column = {} | ||||
|  | ||||
|             # Attempt to "match" file columns to model fields | ||||
|             match = self.match_column(header, model_field_names) | ||||
|  | ||||
|             if match is not None and match not in matched_columns: | ||||
|                 matched_columns.add(match) | ||||
|                 column['value'] = match | ||||
|             else: | ||||
|                 column['value'] = None | ||||
|  | ||||
|             file_columns[header] = column | ||||
|  | ||||
|         return { | ||||
|             'headers': self.dataset.headers, | ||||
|             'file_fields': file_columns, | ||||
|             'model_fields': model_fields, | ||||
|             'rows': [row.values() for row in self.dataset.dict], | ||||
|             'filename': self.filename, | ||||
|         } | ||||
| @@ -425,25 +491,20 @@ class DataFileExtractSerializer(serializers.Serializer): | ||||
|     - User provides an array of raw data rows  | ||||
|     """ | ||||
|  | ||||
|     # Provide a dict of expected columns for this importer | ||||
|     EXPECTED_COLUMNS = {} | ||||
|  | ||||
|     # Provide a list of required columns for this importer | ||||
|     REQUIRED_COLUMNS = [] | ||||
|     # Implementing class should register a target model (database model) to be used for import | ||||
|     TARGET_MODEL = None | ||||
|  | ||||
|     class Meta: | ||||
|         fields = [ | ||||
|             'raw_headers', | ||||
|             'mapped_headers', | ||||
|             'columns', | ||||
|             'rows', | ||||
|         ] | ||||
|  | ||||
|     raw_headers = serializers.ListField( | ||||
|         child=serializers.CharField(), | ||||
|     ) | ||||
|  | ||||
|     mapped_headers = serializers.ListField( | ||||
|         child=serializers.CharField(), | ||||
|     # Mapping of columns  | ||||
|     columns = serializers.ListField( | ||||
|         child=serializers.CharField( | ||||
|             allow_blank=True, | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     rows = serializers.ListField( | ||||
| @@ -458,23 +519,16 @@ class DataFileExtractSerializer(serializers.Serializer): | ||||
|  | ||||
|         data = super().validate(data) | ||||
|  | ||||
|         self.raw_headers = data.get('raw_headers', []) | ||||
|         self.mapped_headers = data.get('mapped_headers', []) | ||||
|         self.columns = data.get('columns', []) | ||||
|         self.rows = data.get('rows', []) | ||||
|  | ||||
|         if len(self.rows) == 0: | ||||
|             raise serializers.ValidationError(_("No data rows provided")) | ||||
|  | ||||
|         if len(self.raw_headers) == 0: | ||||
|             raise serializers.ValidationError(_("File headers not supplied")) | ||||
|         if len(self.columns) == 0: | ||||
|             raise serializers.ValidationError(_("No data columns supplied")) | ||||
|  | ||||
|         if len(self.mapped_headers) == 0: | ||||
|             raise serializers.ValidationError(_("Mapped headers not supplied")) | ||||
|  | ||||
|         if len(self.raw_headers) != len(self.mapped_headers): | ||||
|             raise serializers.ValidationError(_("Supplied header list has incorrect length")) | ||||
|  | ||||
|         self.validate_headers() | ||||
|         self.validate_extracted_columns() | ||||
|  | ||||
|         return self.extract_data(data) | ||||
|  | ||||
| @@ -486,18 +540,38 @@ class DataFileExtractSerializer(serializers.Serializer): | ||||
|  | ||||
|         return data | ||||
|  | ||||
|     def validate_headers(self): | ||||
|     def validate_extracted_columns(self): | ||||
|         """ | ||||
|         Perform custom validation of header mapping. | ||||
|         """ | ||||
|  | ||||
|         print("validate_headers()") | ||||
|          | ||||
|         for col in self.REQUIRED_COLUMNS: | ||||
|             print("checking col:", col) | ||||
|             if col not in self.mapped_headers: | ||||
|                 raise serializers.ValidationError(_("Missing required column") + f": {col}") | ||||
|         if self.TARGET_MODEL: | ||||
|             try: | ||||
|                 model_fields = self.TARGET_MODEL.get_import_fields() | ||||
|             except: | ||||
|                 model_fields = {} | ||||
|  | ||||
|         cols_seen = set() | ||||
|  | ||||
|         for name, field in model_fields.items(): | ||||
|  | ||||
|             required = field.get('required', False) | ||||
|  | ||||
|             # Check for missing required columns | ||||
|             if required: | ||||
|                 if name not in self.columns: | ||||
|                     raise serializers.ValidationError(_("Missing required column") + f": '{name}'") | ||||
|          | ||||
|         for col in self.columns: | ||||
|  | ||||
|             if not col: | ||||
|                 continue | ||||
|  | ||||
|             # Check for duplicated columns | ||||
|             if col in cols_seen: | ||||
|                 raise serializers.ValidationError(_("Duplicate column") + f": '{col}'") | ||||
|  | ||||
|             cols_seen.add(col) | ||||
|  | ||||
|     def save(self): | ||||
|         """ | ||||
|   | ||||
| @@ -1539,7 +1539,18 @@ class BomExtract(generics.CreateAPIView): | ||||
|     """ | ||||
|  | ||||
|     queryset = Part.objects.none() | ||||
|     serializer_class = part_serializers.BomExtractSerializer | ||||
|     serializer_class = part_serializers.BomFileExtractSerializer | ||||
|  | ||||
|  | ||||
| class BomUpload(generics.CreateAPIView): | ||||
|     """ | ||||
|     API endpoint for uploading a complete Bill of Materials. | ||||
|  | ||||
|     It is assumed that the BOM has been extracted from a file using the BomExtract endpoint. | ||||
|     """ | ||||
|  | ||||
|     queryset = Part.objects.all() | ||||
|     serializer_class = part_serializers.BomFileUploadSerializer | ||||
|  | ||||
|     def create(self, request, *args, **kwargs): | ||||
|         """ | ||||
| @@ -1556,16 +1567,6 @@ class BomExtract(generics.CreateAPIView): | ||||
|         return Response(data, status=status.HTTP_201_CREATED, headers=headers) | ||||
|  | ||||
|  | ||||
| class BomUpload(generics.CreateAPIView): | ||||
|     """ | ||||
|     API endpoint for uploading a complete Bill of Materials. | ||||
|  | ||||
|     It is assumed that the BOM has been extracted from a file using the BomExtract endpoint. | ||||
|     """ | ||||
|  | ||||
|     queryset = Part.objects.all() | ||||
|     serializer_class = part_serializers.BomUploadSerializer | ||||
|  | ||||
|  | ||||
| class BomDetail(generics.RetrieveUpdateDestroyAPIView): | ||||
|     """ API endpoint for detail view of a single BomItem object """ | ||||
| @@ -1719,9 +1720,9 @@ bom_api_urls = [ | ||||
|         url(r'^.*$', BomDetail.as_view(), name='api-bom-item-detail'), | ||||
|     ])), | ||||
|  | ||||
|     url(r'^upload/', BomUpload.as_view(), name='api-bom-upload'), | ||||
|     url(r'^extract/', BomExtract.as_view(), name='api-bom-extract'), | ||||
|  | ||||
|     url(r'^upload/', BomUpload.as_view(), name='api-bom-upload'), | ||||
|  | ||||
|     # Catch-all | ||||
|     url(r'^.*$', BomList.as_view(), name='api-bom-list'), | ||||
|   | ||||
| @@ -17,7 +17,9 @@ from rest_framework import serializers | ||||
| from sql_util.utils import SubqueryCount, SubquerySum | ||||
| from djmoney.contrib.django_rest_framework import MoneyField | ||||
|  | ||||
| from InvenTree.serializers import (InvenTreeAttachmentSerializerField, | ||||
| from InvenTree.serializers import (DataFileUploadSerializer, | ||||
|                                    DataFileExtractSerializer, | ||||
|                                    InvenTreeAttachmentSerializerField, | ||||
|                                    InvenTreeDecimalField, | ||||
|                                    InvenTreeImageSerializerField, | ||||
|                                    InvenTreeModelSerializer, | ||||
| @@ -709,7 +711,7 @@ class PartCopyBOMSerializer(serializers.Serializer): | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class BomExtractSerializer(serializers.Serializer): | ||||
| class BomFileUploadSerializer(DataFileUploadSerializer): | ||||
|     """ | ||||
|     Serializer for uploading a file and extracting data from it. | ||||
|  | ||||
| @@ -729,50 +731,7 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     class Meta: | ||||
|         fields = [ | ||||
|             'bom_file', | ||||
|             'part', | ||||
|             'clear_existing', | ||||
|         ] | ||||
|  | ||||
|     # These columns must be present | ||||
|     REQUIRED_COLUMNS = [ | ||||
|         'quantity', | ||||
|     ] | ||||
|  | ||||
|     # We need at least one column to specify a "part" | ||||
|     PART_COLUMNS = [ | ||||
|         'part', | ||||
|         'part_id', | ||||
|         'part_name', | ||||
|         'part_ipn', | ||||
|     ] | ||||
|  | ||||
|     # These columns are "optional" | ||||
|     OPTIONAL_COLUMNS = [ | ||||
|         'allow_variants', | ||||
|         'inherited', | ||||
|         'optional', | ||||
|         'overage', | ||||
|         'note', | ||||
|         'reference', | ||||
|     ] | ||||
|  | ||||
|     def find_matching_column(self, col_name, columns): | ||||
|  | ||||
|         # Direct match | ||||
|         if col_name in columns: | ||||
|             return col_name | ||||
|  | ||||
|         col_name = col_name.lower().strip() | ||||
|  | ||||
|         for col in columns: | ||||
|             if col.lower().strip() == col_name: | ||||
|                 return col | ||||
|  | ||||
|         # No match | ||||
|         return None | ||||
|     TARGET_MODEL = BomItem | ||||
|  | ||||
|     def find_matching_data(self, row, col_name, columns): | ||||
|         """ | ||||
| @@ -783,58 +742,7 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|  | ||||
|         return row.get(col_name, None) | ||||
|  | ||||
|     bom_file = serializers.FileField( | ||||
|         label=_("BOM File"), | ||||
|         help_text=_("Select Bill of Materials file"), | ||||
|         required=True, | ||||
|         allow_empty_file=False, | ||||
|     ) | ||||
|  | ||||
|     def validate_bom_file(self, bom_file): | ||||
|         """ | ||||
|         Perform validation checks on the uploaded BOM file | ||||
|         """ | ||||
|  | ||||
|         self.filename = bom_file.name | ||||
|  | ||||
|         name, ext = os.path.splitext(bom_file.name) | ||||
|  | ||||
|         # Remove the leading . from the extension | ||||
|         ext = ext[1:] | ||||
|  | ||||
|         accepted_file_types = [ | ||||
|             'xls', 'xlsx', | ||||
|             'csv', 'tsv', | ||||
|             'xml', | ||||
|         ] | ||||
|  | ||||
|         if ext not in accepted_file_types: | ||||
|             raise serializers.ValidationError(_("Unsupported file type")) | ||||
|  | ||||
|         # Impose a 50MB limit on uploaded BOM files | ||||
|         max_upload_file_size = 50 * 1024 * 1024 | ||||
|  | ||||
|         if bom_file.size > max_upload_file_size: | ||||
|             raise serializers.ValidationError(_("File is too large")) | ||||
|  | ||||
|         # Read file data into memory (bytes object) | ||||
|         try: | ||||
|             data = bom_file.read() | ||||
|         except Exception as e: | ||||
|             raise serializers.ValidationError(str(e)) | ||||
|  | ||||
|         if ext in ['csv', 'tsv', 'xml']: | ||||
|             try: | ||||
|                 data = data.decode() | ||||
|             except Exception as e: | ||||
|                 raise serializers.ValidationError(str(e)) | ||||
|  | ||||
|         # Convert to a tablib dataset (we expect headers) | ||||
|         try: | ||||
|             self.dataset = tablib.Dataset().load(data, ext, headers=True) | ||||
|         except Exception as e: | ||||
|             raise serializers.ValidationError(str(e)) | ||||
|  | ||||
|     """ | ||||
|         for header in self.REQUIRED_COLUMNS: | ||||
|  | ||||
|             match = self.find_matching_column(header, self.dataset.headers) | ||||
| @@ -861,11 +769,9 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|             raise serializers.ValidationError(_("No data rows found")) | ||||
|  | ||||
|         return bom_file | ||||
|     """ | ||||
|  | ||||
|     def extract_data(self): | ||||
|         """ | ||||
|         Read individual rows out of the BOM file | ||||
|         """ | ||||
|     def dextract_data(self): | ||||
|  | ||||
|         rows = [] | ||||
|         errors = [] | ||||
| @@ -880,9 +786,9 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|  | ||||
|             row_error = {} | ||||
|  | ||||
|             """ | ||||
|             If the "level" column is specified, and this is not a top-level BOM item, ignore the row! | ||||
|             """ | ||||
|              | ||||
|             # If the "level" column is specified, and this is not a top-level BOM item, ignore the row! | ||||
|              | ||||
|             if level_column is not None: | ||||
|                 level = row.get('level', None) | ||||
|  | ||||
| @@ -989,15 +895,19 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|             'filename': self.filename, | ||||
|         } | ||||
|  | ||||
|     """ | ||||
|     part = serializers.PrimaryKeyRelatedField(queryset=Part.objects.filter(assembly=True), required=True) | ||||
|  | ||||
|     clear_existing = serializers.BooleanField( | ||||
|         label=_("Clear Existing BOM"), | ||||
|         help_text=_("Delete existing BOM data first"), | ||||
|     ) | ||||
|     """ | ||||
|  | ||||
|     def save(self): | ||||
|  | ||||
|         ... | ||||
|         """ | ||||
|         data = self.validated_data | ||||
|  | ||||
|         master_part = data['part'] | ||||
| @@ -1006,7 +916,15 @@ class BomExtractSerializer(serializers.Serializer): | ||||
|         if clear_existing: | ||||
|  | ||||
|             # Remove all existing BOM items | ||||
|             master_part.bom_items.all().delete() | ||||
|             $ master_part.bom_items.all().delete() | ||||
|         """ | ||||
|  | ||||
|  | ||||
| class BomFileExtractSerializer(DataFileExtractSerializer): | ||||
|     """ | ||||
|     """ | ||||
|  | ||||
|     TARGET_MODEL = BomItem | ||||
|  | ||||
|  | ||||
| class BomUploadSerializer(serializers.Serializer): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user