From 9d2ac521efeb234c26c3a0ee80b7075dbe072c84 Mon Sep 17 00:00:00 2001 From: Oliver Date: Tue, 23 Dec 2025 08:46:41 +1100 Subject: [PATCH] Data export fix (#11055) * Only look at query_params for top-level serializers - Nested serializers should *not* look at query params * Prevent all fields when exporting data * Add unit test for large dataset export * Fix code * Pass through via context rather than primary kwarg * Fix for file download * Ensure request is passed through to the serializer * ensure query params are passed through when exporting data * Fix code comment * Fix for unit test helper func * Increase max export time --- src/backend/InvenTree/InvenTree/mixins.py | 9 ++- .../InvenTree/InvenTree/serializers.py | 41 ++++++++++---- .../InvenTree/InvenTree/test_serializers.py | 2 +- src/backend/InvenTree/InvenTree/unit_test.py | 4 +- src/backend/InvenTree/data_exporter/mixins.py | 13 ++++- .../plugin/base/integration/DataExport.py | 6 +- src/backend/InvenTree/stock/test_api.py | 55 ++++++++++++++++++- 7 files changed, 110 insertions(+), 20 deletions(-) diff --git a/src/backend/InvenTree/InvenTree/mixins.py b/src/backend/InvenTree/InvenTree/mixins.py index 498b201872..5dd51c30ca 100644 --- a/src/backend/InvenTree/InvenTree/mixins.py +++ b/src/backend/InvenTree/InvenTree/mixins.py @@ -239,10 +239,17 @@ class OutputOptionsMixin: def get_serializer(self, *args, **kwargs): """Return serializer instance with output options applied.""" - if self.output_options and hasattr(self, 'request'): + request = getattr(self, 'request', None) + + if self.output_options and request: params = self.request.query_params kwargs.update(self.output_options.format_params(params)) + # Ensure the request is included in the serializer context + context = kwargs.get('context', {}) + context['request'] = request + kwargs['context'] = context + serializer = super().get_serializer(*args, **kwargs) # Check if the serializer actually can be filtered - makes not much sense to use this mixin without that prerequisite diff --git a/src/backend/InvenTree/InvenTree/serializers.py b/src/backend/InvenTree/InvenTree/serializers.py index 3fc8e04db5..e775b8fe53 100644 --- a/src/backend/InvenTree/InvenTree/serializers.py +++ b/src/backend/InvenTree/InvenTree/serializers.py @@ -163,10 +163,26 @@ class FilterableSerializerMixin: def gather_filters(self, kwargs) -> None: """Gather filterable fields through introspection.""" + context = kwargs.get('context', {}) + top_level_serializer = context.get('top_level_serializer', None) + request = context.get('request', None) or getattr(self, 'request', None) + + # Gather query parameters from the request context + query_params = dict(getattr(request, 'query_params', {})) if request else {} + + is_top_level = ( + top_level_serializer is None + or top_level_serializer == self.__class__.__name__ + ) + + # Update the context to ensure that the top_level_serializer flag is removed for nested serializers + if top_level_serializer is None: + context['top_level_serializer'] = self.__class__.__name__ + kwargs['context'] = context + # Fast exit if this has already been done or would not have any effect if getattr(self, '_was_filtered', False) or not hasattr(self, 'fields'): return - self._was_filtered = True # Actually gather the filterable fields # Also see `enable_filter` where` is_filterable and is_filterable_vals are set @@ -176,21 +192,22 @@ class FilterableSerializerMixin: if getattr(a, 'is_filterable', None) } - # Gather query parameters from the request context - query_params = {} - if context := kwargs.get('context', {}): - query_params = dict(getattr(context.get('request', {}), 'query_params', {})) - # Remove filter args from kwargs to avoid issues with super().__init__ popped_kwargs = {} # store popped kwargs as a arg might be reused for multiple fields tgs_vals: dict[str, bool] = {} for k, v in self.filter_targets.items(): pop_ref = v['filter_name'] or k val = kwargs.pop(pop_ref, popped_kwargs.get(pop_ref)) - # Optionally also look in query parameters - if val is None and self.filter_on_query and v.get('filter_by_query', True): + # Note that we only do this for a top-level serializer, to avoid issues with nested serializers + if ( + is_top_level + and val is None + and self.filter_on_query + and v.get('filter_by_query', True) + ): val = query_params.pop(pop_ref, None) + if isinstance(val, list) and len(val) == 1: val = val[0] @@ -199,7 +216,9 @@ class FilterableSerializerMixin: tgs_vals[k] = ( str2bool(val) if isinstance(val, (str, int, float)) else val ) # Support for various filtering style for backwards compatibility + self.filter_target_values = tgs_vals + self._was_filtered = True # Ensure this mixin is not broadly applied as it is expensive on scale (total CI time increased by 21% when running all coverage tests) if len(self.filter_targets) == 0 and not self.no_filters: @@ -216,14 +235,12 @@ class FilterableSerializerMixin: ): return - # Skip filtering when exporting data - leave all fields intact - if getattr(self, '_exporting_data', False): - return + is_exporting = getattr(self, '_exporting_data', False) # Skip filtering for a write requests - all fields should be present for data creation if request := self.context.get('request', None): if method := getattr(request, 'method', None): - if str(method).lower() in ['post', 'put', 'patch']: + if str(method).lower() in ['post', 'put', 'patch'] and not is_exporting: return # Throw out fields which are not requested (either by default or explicitly) diff --git a/src/backend/InvenTree/InvenTree/test_serializers.py b/src/backend/InvenTree/InvenTree/test_serializers.py index 4cde34d806..b6c794e33f 100644 --- a/src/backend/InvenTree/InvenTree/test_serializers.py +++ b/src/backend/InvenTree/InvenTree/test_serializers.py @@ -157,7 +157,7 @@ class FilteredSerializers(InvenTreeAPITestCase): _ = BadSerializer() self.assertTrue(True) # Dummy assertion to ensure we reach here - def test_failiure_OutputOptionsMixin(self): + def test_failure_OutputOptionsMixin(self): """Test failure case for OutputOptionsMixin.""" class BadSerializer(InvenTree.serializers.InvenTreeModelSerializer): diff --git a/src/backend/InvenTree/InvenTree/unit_test.py b/src/backend/InvenTree/InvenTree/unit_test.py index 5360d9f71d..54cde6afe9 100644 --- a/src/backend/InvenTree/InvenTree/unit_test.py +++ b/src/backend/InvenTree/InvenTree/unit_test.py @@ -655,7 +655,9 @@ class InvenTreeAPITestCase( # Append URL params url += '?' + '&'.join([f'{key}={value}' for key, value in params.items()]) - response = self.client.get(url, data=None, format='json') + response = self.get( + url, data=None, format='json', expected_code=expected_code, **kwargs + ) self.check_response(url, response, expected_code=expected_code) # Check that the response is of the correct type diff --git a/src/backend/InvenTree/data_exporter/mixins.py b/src/backend/InvenTree/data_exporter/mixins.py index 316dfdfbce..4b373dcf51 100644 --- a/src/backend/InvenTree/data_exporter/mixins.py +++ b/src/backend/InvenTree/data_exporter/mixins.py @@ -337,6 +337,12 @@ class DataExportViewMixin: # Update the output instance with the total number of items to export output.total = queryset.count() output.save() + request = context.get('request', None) + + if request: + query_params = getattr(request, 'query_params', {}) + context.update(**query_params) + context['request'] = request data = None serializer = serializer_class(context=context, exporting=True) @@ -363,7 +369,12 @@ class DataExportViewMixin: # The returned data *must* be a list of dict objects try: data = export_plugin.export_data( - queryset, serializer_class, headers, export_context, output + queryset, + serializer_class, + headers, + export_context, + output, + serializer_context=context, ) except Exception as e: diff --git a/src/backend/InvenTree/plugin/base/integration/DataExport.py b/src/backend/InvenTree/plugin/base/integration/DataExport.py index dc46aa449c..97d38ec493 100644 --- a/src/backend/InvenTree/plugin/base/integration/DataExport.py +++ b/src/backend/InvenTree/plugin/base/integration/DataExport.py @@ -90,6 +90,7 @@ class DataExportMixin: headers: OrderedDict, context: dict, output: DataOutput, + serializer_context: Optional[dict] = None, **kwargs, ) -> list: """Export data from the queryset. @@ -100,6 +101,7 @@ class DataExportMixin: Arguments: queryset: The queryset to export serializer_class: The serializer class to use for exporting the data + serializer_context: Optional context for the serializer headers: The headers for the export context: Any custom context for the export (provided by the plugin serializer) output: The DataOutput object for the export @@ -107,7 +109,9 @@ class DataExportMixin: Returns: The exported data (a list of dict objects) """ # The default implementation simply serializes the queryset - return serializer_class(queryset, many=True, exporting=True).data + return serializer_class( + queryset, many=True, exporting=True, context=serializer_context or {} + ).data def get_export_options_serializer(self, **kwargs) -> serializers.Serializer | None: """Return a serializer class with dynamic export options for this plugin. diff --git a/src/backend/InvenTree/stock/test_api.py b/src/backend/InvenTree/stock/test_api.py index 7b267facb2..e81c6e0fed 100644 --- a/src/backend/InvenTree/stock/test_api.py +++ b/src/backend/InvenTree/stock/test_api.py @@ -867,9 +867,13 @@ class StockItemListTest(StockAPITestCase): excluded_headers = ['metadata'] - filters = {} + filters = { + 'part_detail': True, + 'location_detail': True, + 'supplier_part_detail': True, + } - with self.export_data(self.list_url, filters) as data_file: + with self.export_data(self.list_url, params=filters) as data_file: self.process_csv( data_file, required_cols=required_headers, @@ -881,7 +885,7 @@ class StockItemListTest(StockAPITestCase): filters['location'] = 1 filters['cascade'] = True - with self.export_data(self.list_url, filters) as data_file: + with self.export_data(self.list_url, params=filters) as data_file: data = self.process_csv(data_file, required_rows=9) for row in data: @@ -909,6 +913,51 @@ class StockItemListTest(StockAPITestCase): with self.export_data(self.list_url, {'part': 25}) as data_file: self.process_csv(data_file, required_rows=items.count()) + def test_large_export(self): + """Test export of very large dataset. + + - Ensure that the time taken to export a large dataset is reasonable. + - Ensure that the number of DB queries is reasonable. + """ + # Create a large number of stock items + locations = list(StockLocation.objects.all()) + parts = list(Part.objects.filter(virtual=False)) + + idx = 0 + + N_LOCATIONS = len(locations) + N_PARTS = len(parts) + + stock_items = [] + + while idx < 2500: + part = parts[idx % N_PARTS] + location = locations[idx % N_LOCATIONS] + + item = StockItem( + part=part, + location=location, + quantity=10, + level=0, + tree_id=0, + lft=0, + rght=0, + ) + stock_items.append(item) + idx += 1 + + StockItem.objects.bulk_create(stock_items) + + self.assertGreaterEqual(StockItem.objects.count(), 2500) + + # Note: While the export is quick on pgsql, it is still quite slow on sqlite3 + with self.export_data( + self.list_url, max_query_count=50, max_query_time=7.5 + ) as data_file: + data = self.process_csv(data_file) + + self.assertGreaterEqual(len(data), 2500) + def test_filter_by_allocated(self): """Test that we can filter by "allocated" status.