mirror of
				https://github.com/django/django.git
				synced 2025-10-24 14:16:09 +00:00 
			
		
		
		
	Fixed #30190 -- Added JSONL serializer.
This commit is contained in:
		
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -52,6 +52,7 @@ answer newbie questions, and generally made Django that much better: | |||||||
|     Alex Robbins <alexander.j.robbins@gmail.com> |     Alex Robbins <alexander.j.robbins@gmail.com> | ||||||
|     Alexey Boriskin <alex@boriskin.me> |     Alexey Boriskin <alex@boriskin.me> | ||||||
|     Alexey Tsivunin <most-208@yandex.ru> |     Alexey Tsivunin <most-208@yandex.ru> | ||||||
|  |     Ali Vakilzade <ali@vakilzade.com> | ||||||
|     Aljosa Mohorovic <aljosa.mohorovic@gmail.com> |     Aljosa Mohorovic <aljosa.mohorovic@gmail.com> | ||||||
|     Amit Chakradeo <https://amit.chakradeo.net/> |     Amit Chakradeo <https://amit.chakradeo.net/> | ||||||
|     Amit Ramon <amit.ramon@gmail.com> |     Amit Ramon <amit.ramon@gmail.com> | ||||||
|   | |||||||
| @@ -28,6 +28,7 @@ BUILTIN_SERIALIZERS = { | |||||||
|     "python": "django.core.serializers.python", |     "python": "django.core.serializers.python", | ||||||
|     "json": "django.core.serializers.json", |     "json": "django.core.serializers.json", | ||||||
|     "yaml": "django.core.serializers.pyyaml", |     "yaml": "django.core.serializers.pyyaml", | ||||||
|  |     "jsonl": "django.core.serializers.jsonl", | ||||||
| } | } | ||||||
|  |  | ||||||
| _serializers = {} | _serializers = {} | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								django/core/serializers/jsonl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								django/core/serializers/jsonl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | """ | ||||||
|  | Serialize data to/from JSON Lines | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | import json | ||||||
|  |  | ||||||
|  | from django.core.serializers.base import DeserializationError | ||||||
|  | from django.core.serializers.json import DjangoJSONEncoder | ||||||
|  | from django.core.serializers.python import ( | ||||||
|  |     Deserializer as PythonDeserializer, Serializer as PythonSerializer, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Serializer(PythonSerializer): | ||||||
|  |     """Convert a queryset to JSON Lines.""" | ||||||
|  |     internal_use_only = False | ||||||
|  |  | ||||||
|  |     def _init_options(self): | ||||||
|  |         self._current = None | ||||||
|  |         self.json_kwargs = self.options.copy() | ||||||
|  |         self.json_kwargs.pop('stream', None) | ||||||
|  |         self.json_kwargs.pop('fields', None) | ||||||
|  |         self.json_kwargs.pop('indent', None) | ||||||
|  |         self.json_kwargs['separators'] = (',', ': ') | ||||||
|  |         self.json_kwargs.setdefault('cls', DjangoJSONEncoder) | ||||||
|  |         self.json_kwargs.setdefault('ensure_ascii', False) | ||||||
|  |  | ||||||
|  |     def start_serialization(self): | ||||||
|  |         self._init_options() | ||||||
|  |  | ||||||
|  |     def end_object(self, obj): | ||||||
|  |         # self._current has the field data | ||||||
|  |         json.dump(self.get_dump_object(obj), self.stream, **self.json_kwargs) | ||||||
|  |         self.stream.write("\n") | ||||||
|  |         self._current = None | ||||||
|  |  | ||||||
|  |     def getvalue(self): | ||||||
|  |         # Grandparent super | ||||||
|  |         return super(PythonSerializer, self).getvalue() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def Deserializer(stream_or_string, **options): | ||||||
|  |     """Deserialize a stream or string of JSON data.""" | ||||||
|  |     if isinstance(stream_or_string, bytes): | ||||||
|  |         stream_or_string = stream_or_string.decode() | ||||||
|  |     if isinstance(stream_or_string, (bytes, str)): | ||||||
|  |         stream_or_string = stream_or_string.split("\n") | ||||||
|  |  | ||||||
|  |     for line in stream_or_string: | ||||||
|  |         if not line.strip(): | ||||||
|  |             continue | ||||||
|  |         try: | ||||||
|  |             yield list(PythonDeserializer([json.loads(line), ], **options))[0] | ||||||
|  |         except (GeneratorExit, DeserializationError): | ||||||
|  |             raise | ||||||
|  |         except Exception as exc: | ||||||
|  |             raise DeserializationError() from exc | ||||||
| @@ -215,7 +215,10 @@ Security | |||||||
| Serialization | Serialization | ||||||
| ~~~~~~~~~~~~~ | ~~~~~~~~~~~~~ | ||||||
|  |  | ||||||
| * ... | * The new :ref:`JSONL <serialization-formats-jsonl>` serializer allows using | ||||||
|  |   the JSON Lines format with :djadmin:`dumpdata` and :djadmin:`loaddata`. This | ||||||
|  |   can be useful for populating large databases because data is loaded line by | ||||||
|  |   line into memory, rather than being loaded all at once. | ||||||
|  |  | ||||||
| Signals | Signals | ||||||
| ~~~~~~~ | ~~~~~~~ | ||||||
|   | |||||||
| @@ -160,11 +160,14 @@ Identifier  Information | |||||||
|  |  | ||||||
| ``json``    Serializes to and from JSON_. | ``json``    Serializes to and from JSON_. | ||||||
|  |  | ||||||
|  | ``jsonl``   Serializes to and from JSONL_. | ||||||
|  |  | ||||||
| ``yaml``    Serializes to YAML (YAML Ain't a Markup Language). This | ``yaml``    Serializes to YAML (YAML Ain't a Markup Language). This | ||||||
|             serializer is only available if PyYAML_ is installed. |             serializer is only available if PyYAML_ is installed. | ||||||
| ==========  ============================================================== | ==========  ============================================================== | ||||||
|  |  | ||||||
| .. _json: https://json.org/ | .. _json: https://json.org/ | ||||||
|  | .. _jsonl: http://jsonlines.org/ | ||||||
| .. _PyYAML: https://pyyaml.org/ | .. _PyYAML: https://pyyaml.org/ | ||||||
|  |  | ||||||
| XML | XML | ||||||
| @@ -307,6 +310,24 @@ The JSON serializer uses ``DjangoJSONEncoder`` for encoding. A subclass of | |||||||
|  |  | ||||||
| .. _ecma-262: https://www.ecma-international.org/ecma-262/5.1/#sec-15.9.1.15 | .. _ecma-262: https://www.ecma-international.org/ecma-262/5.1/#sec-15.9.1.15 | ||||||
|  |  | ||||||
|  | .. _serialization-formats-jsonl: | ||||||
|  |  | ||||||
|  | JSONL | ||||||
|  | ----- | ||||||
|  |  | ||||||
|  | .. versionadded:: 3.2 | ||||||
|  |  | ||||||
|  | *JSONL* stands for *JSON Lines*. With this format, objects are separated by new | ||||||
|  | lines, and each line contains a valid JSON object. JSONL serialized data look | ||||||
|  | like this:: | ||||||
|  |  | ||||||
|  |     { "pk": "4b678b301dfd8a4e0dad910de3ae245b", "model": "sessions.session", "fields": { ... }} | ||||||
|  |     { "pk": "88bea72c02274f3c9bf1cb2bb8cee4fc", "model": "sessions.session", "fields": { ... }} | ||||||
|  |     { "pk": "9cf0e26691b64147a67e2a9f06ad7a53", "model": "sessions.session", "fields": { ... }} | ||||||
|  |  | ||||||
|  | JSONL can be useful for populating large databases, since the data can be | ||||||
|  | processed line by line, rather than being loaded into memory all at once. | ||||||
|  |  | ||||||
| YAML | YAML | ||||||
| ---- | ---- | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										312
									
								
								tests/serializers/test_jsonl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										312
									
								
								tests/serializers/test_jsonl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,312 @@ | |||||||
|  | import decimal | ||||||
|  | import json | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | from django.core import serializers | ||||||
|  | from django.core.serializers.base import DeserializationError | ||||||
|  | from django.db import models | ||||||
|  | from django.test import TestCase, TransactionTestCase | ||||||
|  | from django.test.utils import isolate_apps | ||||||
|  |  | ||||||
|  | from .models import Score | ||||||
|  | from .tests import SerializersTestBase, SerializersTransactionTestBase | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JsonlSerializerTestCase(SerializersTestBase, TestCase): | ||||||
|  |     serializer_name = "jsonl" | ||||||
|  |     pkless_str = [ | ||||||
|  |         """{ | ||||||
|  |             "pk": null, | ||||||
|  |             "model": "serializers.category", | ||||||
|  |             "fields": {"name": "Reference"} | ||||||
|  |         }""", | ||||||
|  |         """{ | ||||||
|  |             "model": "serializers.category", | ||||||
|  |             "fields": {"name": "Non-fiction"} | ||||||
|  |         }""" | ||||||
|  |     ] | ||||||
|  |     pkless_str = "\n".join([s.replace("\n", "") for s in pkless_str]) | ||||||
|  |  | ||||||
|  |     mapping_ordering_str = """{ | ||||||
|  | "model": "serializers.article", | ||||||
|  | "pk": %(article_pk)s, | ||||||
|  | "fields": { | ||||||
|  | "author": %(author_pk)s, | ||||||
|  | "headline": "Poker has no place on ESPN", | ||||||
|  | "pub_date": "2006-06-16T11:00:00", | ||||||
|  | "categories": [ | ||||||
|  | %(first_category_pk)s, | ||||||
|  | %(second_category_pk)s | ||||||
|  | ], | ||||||
|  | "meta_data": [] | ||||||
|  | } | ||||||
|  | }""".replace("\n", "") + "\n" | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _validate_output(serial_str): | ||||||
|  |         try: | ||||||
|  |             for line in serial_str.split("\n"): | ||||||
|  |                 if line: | ||||||
|  |                     json.loads(line) | ||||||
|  |         except Exception: | ||||||
|  |             return False | ||||||
|  |         else: | ||||||
|  |             return True | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _get_pk_values(serial_str): | ||||||
|  |         serial_list = [json.loads(line) for line in serial_str.split("\n") if line] | ||||||
|  |         return [obj_dict['pk'] for obj_dict in serial_list] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _get_field_values(serial_str, field_name): | ||||||
|  |         serial_list = [json.loads(line) for line in serial_str.split("\n") if line] | ||||||
|  |         return [obj_dict['fields'][field_name] for obj_dict in serial_list if field_name in obj_dict['fields']] | ||||||
|  |  | ||||||
|  |     def test_no_indentation(self): | ||||||
|  |         s = serializers.jsonl.Serializer() | ||||||
|  |         json_data = s.serialize([Score(score=5.0), Score(score=6.0)], indent=2) | ||||||
|  |         for line in json_data.splitlines(): | ||||||
|  |             self.assertIsNone(re.search(r'.+,\s*$', line)) | ||||||
|  |  | ||||||
|  |     @isolate_apps('serializers') | ||||||
|  |     def test_custom_encoder(self): | ||||||
|  |         class ScoreDecimal(models.Model): | ||||||
|  |             score = models.DecimalField() | ||||||
|  |  | ||||||
|  |         class CustomJSONEncoder(json.JSONEncoder): | ||||||
|  |             def default(self, o): | ||||||
|  |                 if isinstance(o, decimal.Decimal): | ||||||
|  |                     return str(o) | ||||||
|  |                 return super().default(o) | ||||||
|  |  | ||||||
|  |         s = serializers.jsonl.Serializer() | ||||||
|  |         json_data = s.serialize( | ||||||
|  |             [ScoreDecimal(score=decimal.Decimal(1.0))], cls=CustomJSONEncoder | ||||||
|  |         ) | ||||||
|  |         self.assertIn('"fields": {"score": "1"}', json_data) | ||||||
|  |  | ||||||
|  |     def test_json_deserializer_exception(self): | ||||||
|  |         with self.assertRaises(DeserializationError): | ||||||
|  |             for obj in serializers.deserialize("jsonl", """[{"pk":1}"""): | ||||||
|  |                 pass | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_invalid_pk(self): | ||||||
|  |         """ | ||||||
|  |         If there is an invalid primary key, the error message should contain | ||||||
|  |         the model associated with it. | ||||||
|  |         """ | ||||||
|  |         test_string = """{ | ||||||
|  |             "pk": "badpk", | ||||||
|  |             "model": "serializers.player", | ||||||
|  |             "fields": { | ||||||
|  |                 "name": "Bob", | ||||||
|  |                 "rank": 1, | ||||||
|  |                 "team": "Team" | ||||||
|  |             } | ||||||
|  |         }""".replace("\n", "") | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, "(serializers.player:pk=badpk)"): | ||||||
|  |             list(serializers.deserialize('jsonl', test_string)) | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_invalid_field(self): | ||||||
|  |         """ | ||||||
|  |         If there is an invalid field value, the error message should contain | ||||||
|  |         the model associated with it. | ||||||
|  |         """ | ||||||
|  |         test_string = """{ | ||||||
|  |             "pk": "1", | ||||||
|  |             "model": "serializers.player", | ||||||
|  |             "fields": { | ||||||
|  |                 "name": "Bob", | ||||||
|  |                 "rank": "invalidint", | ||||||
|  |                 "team": "Team" | ||||||
|  |             } | ||||||
|  |         }""".replace("\n", "") | ||||||
|  |         expected = "(serializers.player:pk=1) field_value was 'invalidint'" | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             list(serializers.deserialize('jsonl', test_string)) | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_for_foreign_keys(self): | ||||||
|  |         """ | ||||||
|  |         Invalid foreign keys with a natural key should throw a helpful error | ||||||
|  |         message, such as what the failing key is. | ||||||
|  |         """ | ||||||
|  |         test_string = """{ | ||||||
|  |             "pk": 1, | ||||||
|  |             "model": "serializers.category", | ||||||
|  |             "fields": { | ||||||
|  |                 "name": "Unknown foreign key", | ||||||
|  |                 "meta_data": [ | ||||||
|  |                     "doesnotexist", | ||||||
|  |                     "metadata" | ||||||
|  |                 ] | ||||||
|  |             } | ||||||
|  |         }""".replace("\n", "") | ||||||
|  |         key = ["doesnotexist", "metadata"] | ||||||
|  |         expected = "(serializers.category:pk=1) field_value was '%r'" % key | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             list(serializers.deserialize('jsonl', test_string)) | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_for_many2many_non_natural(self): | ||||||
|  |         """ | ||||||
|  |         Invalid many-to-many keys should throw a helpful error message. | ||||||
|  |         """ | ||||||
|  |         test_strings = [ | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.article", | ||||||
|  |                 "fields": { | ||||||
|  |                     "author": 1, | ||||||
|  |                     "headline": "Unknown many to many", | ||||||
|  |                     "pub_date": "2014-09-15T10:35:00", | ||||||
|  |                     "categories": [1, "doesnotexist"] | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.author", | ||||||
|  |                 "fields": { | ||||||
|  |                     "name": "Agnes" | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.category", | ||||||
|  |                 "fields": { | ||||||
|  |                     "name": "Reference" | ||||||
|  |                 } | ||||||
|  |             }""" | ||||||
|  |         ] | ||||||
|  |         test_string = "\n".join([s.replace("\n", "") for s in test_strings]) | ||||||
|  |         expected = "(serializers.article:pk=1) field_value was 'doesnotexist'" | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             list(serializers.deserialize('jsonl', test_string)) | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_for_many2many_natural1(self): | ||||||
|  |         """ | ||||||
|  |         Invalid many-to-many keys should throw a helpful error message. | ||||||
|  |         This tests the code path where one of a list of natural keys is invalid. | ||||||
|  |         """ | ||||||
|  |         test_strings = [ | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.categorymetadata", | ||||||
|  |                 "fields": { | ||||||
|  |                     "kind": "author", | ||||||
|  |                     "name": "meta1", | ||||||
|  |                     "value": "Agnes" | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.article", | ||||||
|  |                 "fields": { | ||||||
|  |                     "author": 1, | ||||||
|  |                     "headline": "Unknown many to many", | ||||||
|  |                     "pub_date": "2014-09-15T10:35:00", | ||||||
|  |                     "meta_data": [ | ||||||
|  |                         ["author", "meta1"], | ||||||
|  |                         ["doesnotexist", "meta1"], | ||||||
|  |                         ["author", "meta1"] | ||||||
|  |                     ] | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.author", | ||||||
|  |                 "fields": { | ||||||
|  |                     "name": "Agnes" | ||||||
|  |                 } | ||||||
|  |             }""" | ||||||
|  |         ] | ||||||
|  |         test_string = "\n".join([s.replace("\n", "") for s in test_strings]) | ||||||
|  |         key = ["doesnotexist", "meta1"] | ||||||
|  |         expected = "(serializers.article:pk=1) field_value was '%r'" % key | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             for obj in serializers.deserialize('jsonl', test_string): | ||||||
|  |                 obj.save() | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_for_many2many_natural2(self): | ||||||
|  |         """ | ||||||
|  |         Invalid many-to-many keys should throw a helpful error message. This | ||||||
|  |         tests the code path where a natural many-to-many key has only a single | ||||||
|  |         value. | ||||||
|  |         """ | ||||||
|  |         test_strings = [ | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.article", | ||||||
|  |                 "fields": { | ||||||
|  |                     "author": 1, | ||||||
|  |                     "headline": "Unknown many to many", | ||||||
|  |                     "pub_date": "2014-09-15T10:35:00", | ||||||
|  |                     "meta_data": [1, "doesnotexist"] | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.categorymetadata", | ||||||
|  |                 "fields": { | ||||||
|  |                     "kind": "author", | ||||||
|  |                     "name": "meta1", | ||||||
|  |                     "value": "Agnes" | ||||||
|  |                 } | ||||||
|  |             }""", | ||||||
|  |             """{ | ||||||
|  |                 "pk": 1, | ||||||
|  |                 "model": "serializers.author", | ||||||
|  |                 "fields": { | ||||||
|  |                     "name": "Agnes" | ||||||
|  |                 } | ||||||
|  |             }""" | ||||||
|  |         ] | ||||||
|  |         test_string = "\n".join([s.replace("\n", "") for s in test_strings]) | ||||||
|  |         expected = "(serializers.article:pk=1) field_value was 'doesnotexist'" | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             for obj in serializers.deserialize('jsonl', test_string, ignore=False): | ||||||
|  |                 obj.save() | ||||||
|  |  | ||||||
|  |     def test_helpful_error_message_for_many2many_not_iterable(self): | ||||||
|  |         """ | ||||||
|  |         Not iterable many-to-many field value throws a helpful error message. | ||||||
|  |         """ | ||||||
|  |         test_string = """{ | ||||||
|  |             "pk": 1, | ||||||
|  |             "model": "serializers.m2mdata", | ||||||
|  |             "fields": {"data": null} | ||||||
|  |         }""".replace("\n", "") | ||||||
|  |  | ||||||
|  |         expected = "(serializers.m2mdata:pk=1) field_value was 'None'" | ||||||
|  |         with self.assertRaisesMessage(DeserializationError, expected): | ||||||
|  |             next(serializers.deserialize('jsonl', test_string, ignore=False)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JsonSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): | ||||||
|  |     serializer_name = "jsonl" | ||||||
|  |     fwd_ref_str = [ | ||||||
|  |         """{ | ||||||
|  |             "pk": 1, | ||||||
|  |             "model": "serializers.article", | ||||||
|  |             "fields": { | ||||||
|  |                 "headline": "Forward references pose no problem", | ||||||
|  |                 "pub_date": "2006-06-16T15:00:00", | ||||||
|  |                 "categories": [1], | ||||||
|  |                 "author": 1 | ||||||
|  |             } | ||||||
|  |         }""", | ||||||
|  |         """{ | ||||||
|  |             "pk": 1, | ||||||
|  |             "model": "serializers.category", | ||||||
|  |             "fields": { | ||||||
|  |                 "name": "Reference" | ||||||
|  |             } | ||||||
|  |         }""", | ||||||
|  |         """{ | ||||||
|  |             "pk": 1, | ||||||
|  |             "model": "serializers.author", | ||||||
|  |             "fields": { | ||||||
|  |                 "name": "Agnes" | ||||||
|  |             } | ||||||
|  |         }""" | ||||||
|  |     ] | ||||||
|  |     fwd_ref_str = "\n".join([s.replace("\n", "") for s in fwd_ref_str]) | ||||||
		Reference in New Issue
	
	Block a user