1
0
mirror of https://github.com/django/django.git synced 2024-12-22 09:05:43 +00:00

Fixed #18392 -- Changed default mysql encoding to "utf8mb4".

This commit is contained in:
Ben Cail 2024-10-17 16:32:36 -04:00 committed by Sarah Boyce
parent 2debd018db
commit 9609b48b91
6 changed files with 28 additions and 35 deletions

View File

@ -215,7 +215,7 @@ class DatabaseWrapper(BaseDatabaseWrapper):
def get_connection_params(self): def get_connection_params(self):
kwargs = { kwargs = {
"conv": django_conversions, "conv": django_conversions,
"charset": "utf8", "charset": "utf8mb4",
} }
settings_dict = self.settings_dict settings_dict = self.settings_dict
if settings_dict["USER"]: if settings_dict["USER"]:

View File

@ -71,21 +71,11 @@ class DatabaseFeatures(BaseDatabaseFeatures):
@cached_property @cached_property
def test_collations(self): def test_collations(self):
charset = "utf8"
if (
self.connection.mysql_is_mariadb
and self.connection.mysql_version >= (10, 6)
) or (
not self.connection.mysql_is_mariadb
and self.connection.mysql_version >= (8, 0, 30)
):
# utf8 is an alias for utf8mb3 in MariaDB 10.6+ and MySQL 8.0.30+.
charset = "utf8mb3"
return { return {
"ci": f"{charset}_general_ci", "ci": "utf8mb4_general_ci",
"non_default": f"{charset}_esperanto_ci", "non_default": "utf8mb4_esperanto_ci",
"swedish_ci": f"{charset}_swedish_ci", "swedish_ci": "utf8mb4_swedish_ci",
"virtual": f"{charset}_esperanto_ci", "virtual": "utf8mb4_esperanto_ci",
} }
test_now_utc_template = "UTC_TIMESTAMP(6)" test_now_utc_template = "UTC_TIMESTAMP(6)"
@ -99,10 +89,6 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"db_functions.comparison.test_least.LeastTests." "db_functions.comparison.test_least.LeastTests."
"test_coalesce_workaround", "test_coalesce_workaround",
}, },
"Running on MySQL requires utf8mb4 encoding (#18392).": {
"model_fields.test_textfield.TextFieldTests.test_emoji",
"model_fields.test_charfield.TestCharField.test_emoji",
},
"MySQL doesn't support functional indexes on a function that " "MySQL doesn't support functional indexes on a function that "
"returns JSON": { "returns JSON": {
"schema.tests.SchemaTests.test_func_index_json_key_transform", "schema.tests.SchemaTests.test_func_index_json_key_transform",

View File

@ -30,10 +30,7 @@ Fields
The ``max_length`` should be sufficient for many use cases. If you need The ``max_length`` should be sufficient for many use cases. If you need
a longer length, please use a :ref:`custom user model a longer length, please use a :ref:`custom user model
<specifying-custom-user-model>`. If you use MySQL with the ``utf8mb4`` <specifying-custom-user-model>`.
encoding (recommended for proper Unicode support), specify at most
``max_length=191`` because MySQL can only create unique indexes with
191 characters in that case by default.
.. attribute:: first_name .. attribute:: first_name

View File

@ -517,7 +517,7 @@ You can `create your database`_ using the command-line tools and this SQL:
.. code-block:: sql .. code-block:: sql
CREATE DATABASE <dbname> CHARACTER SET utf8; CREATE DATABASE <dbname> CHARACTER SET utf8mb4;
This ensures all tables and columns will use UTF-8 by default. This ensures all tables and columns will use UTF-8 by default.
@ -542,21 +542,21 @@ Django doesn't provide an API to change them.
.. _documented thoroughly: https://dev.mysql.com/doc/refman/en/charset.html .. _documented thoroughly: https://dev.mysql.com/doc/refman/en/charset.html
By default, with a UTF-8 database, MySQL will use the By default, with a UTF-8 database, MySQL will use the
``utf8_general_ci`` collation. This results in all string equality ``utf8mb4_0900_ai_ci`` collation. This results in all string equality
comparisons being done in a *case-insensitive* manner. That is, ``"Fred"`` and comparisons being done in a *case-insensitive* manner. That is, ``"Fred"`` and
``"freD"`` are considered equal at the database level. If you have a unique ``"freD"`` are considered equal at the database level. If you have a unique
constraint on a field, it would be illegal to try to insert both ``"aa"`` and constraint on a field, it would be illegal to try to insert both ``"aa"`` and
``"AA"`` into the same column, since they compare as equal (and, hence, ``"AA"`` into the same column, since they compare as equal (and, hence,
non-unique) with the default collation. If you want case-sensitive comparisons non-unique) with the default collation. If you want case-sensitive comparisons
on a particular column or table, change the column or table to use the on a particular column or table, change the column or table to use the
``utf8_bin`` collation. ``utf8mb4_0900_as_cs`` collation.
Please note that according to `MySQL Unicode Character Sets`_, comparisons for Please note that according to `MySQL Unicode Character Sets`_, comparisons for
the ``utf8_general_ci`` collation are faster, but slightly less correct, than the ``utf8mb4_general_ci`` collation are faster, but slightly less correct,
comparisons for ``utf8_unicode_ci``. If this is acceptable for your application, than comparisons for ``utf8mb4_unicode_ci``. If this is acceptable for your
you should use ``utf8_general_ci`` because it is faster. If this is not acceptable application, you should use ``utf8mb4_general_ci`` because it is faster. If
(for example, if you require German dictionary order), use ``utf8_unicode_ci`` this is not acceptable (for example, if you require German dictionary order),
because it is more accurate. use ``utf8mb4_unicode_ci`` because it is more accurate.
.. _MySQL Unicode Character Sets: https://dev.mysql.com/doc/refman/en/charset-unicode-sets.html .. _MySQL Unicode Character Sets: https://dev.mysql.com/doc/refman/en/charset-unicode-sets.html
@ -602,7 +602,7 @@ Here's a sample configuration which uses a MySQL option file::
database = NAME database = NAME
user = USER user = USER
password = PASSWORD password = PASSWORD
default-character-set = utf8 default-character-set = utf8mb4
Several other `MySQLdb connection options`_ may be useful, such as ``ssl``, Several other `MySQLdb connection options`_ may be useful, such as ``ssl``,
``init_command``, and ``sql_mode``. ``init_command``, and ``sql_mode``.

View File

@ -174,7 +174,9 @@ CSRF
Database backends Database backends
~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~
* ... * MySQL connections now default to using the ``utf8mb4`` character set,
instead of ``utf8``, which is an alias for the deprecated character set
``utf8mb3``.
Decorators Decorators
~~~~~~~~~~ ~~~~~~~~~~
@ -385,6 +387,14 @@ Dropped support for PostgreSQL 13
Upstream support for PostgreSQL 13 ends in November 2025. Django 5.2 supports Upstream support for PostgreSQL 13 ends in November 2025. Django 5.2 supports
PostgreSQL 14 and higher. PostgreSQL 14 and higher.
Changed MySQL connection character set default
----------------------------------------------
MySQL connections now default to using the ``utf8mb4`` character set, instead
of ``utf8``, which is an alias for the deprecated character set ``utf8mb3``.
``utf8mb3`` can be specified in the ``OPTIONS`` part of the ``DATABASES``
setting, if needed for legacy databases.
Miscellaneous Miscellaneous
------------- -------------

View File

@ -112,7 +112,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase):
"--user=someuser", "--user=someuser",
"--host=somehost", "--host=somehost",
"--port=444", "--port=444",
"--default-character-set=utf8", "--default-character-set=utf8mb4",
"somedbname", "somedbname",
] ]
expected_env = {"MYSQL_PWD": "somepassword"} expected_env = {"MYSQL_PWD": "somepassword"}
@ -124,7 +124,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase):
"PASSWORD": "somepassword", "PASSWORD": "somepassword",
"HOST": "somehost", "HOST": "somehost",
"PORT": 444, "PORT": 444,
"OPTIONS": {"charset": "utf8"}, "OPTIONS": {"charset": "utf8mb4"},
} }
), ),
(expected_args, expected_env), (expected_args, expected_env),