1
0
mirror of https://github.com/django/django.git synced 2024-12-22 09:05:43 +00:00

Fixed #18392 -- Changed default mysql encoding to "utf8mb4".

This commit is contained in:
Ben Cail 2024-10-17 16:32:36 -04:00 committed by Sarah Boyce
parent 2debd018db
commit 9609b48b91
6 changed files with 28 additions and 35 deletions

View File

@ -215,7 +215,7 @@ class DatabaseWrapper(BaseDatabaseWrapper):
def get_connection_params(self):
kwargs = {
"conv": django_conversions,
"charset": "utf8",
"charset": "utf8mb4",
}
settings_dict = self.settings_dict
if settings_dict["USER"]:

View File

@ -71,21 +71,11 @@ class DatabaseFeatures(BaseDatabaseFeatures):
@cached_property
def test_collations(self):
charset = "utf8"
if (
self.connection.mysql_is_mariadb
and self.connection.mysql_version >= (10, 6)
) or (
not self.connection.mysql_is_mariadb
and self.connection.mysql_version >= (8, 0, 30)
):
# utf8 is an alias for utf8mb3 in MariaDB 10.6+ and MySQL 8.0.30+.
charset = "utf8mb3"
return {
"ci": f"{charset}_general_ci",
"non_default": f"{charset}_esperanto_ci",
"swedish_ci": f"{charset}_swedish_ci",
"virtual": f"{charset}_esperanto_ci",
"ci": "utf8mb4_general_ci",
"non_default": "utf8mb4_esperanto_ci",
"swedish_ci": "utf8mb4_swedish_ci",
"virtual": "utf8mb4_esperanto_ci",
}
test_now_utc_template = "UTC_TIMESTAMP(6)"
@ -99,10 +89,6 @@ class DatabaseFeatures(BaseDatabaseFeatures):
"db_functions.comparison.test_least.LeastTests."
"test_coalesce_workaround",
},
"Running on MySQL requires utf8mb4 encoding (#18392).": {
"model_fields.test_textfield.TextFieldTests.test_emoji",
"model_fields.test_charfield.TestCharField.test_emoji",
},
"MySQL doesn't support functional indexes on a function that "
"returns JSON": {
"schema.tests.SchemaTests.test_func_index_json_key_transform",

View File

@ -30,10 +30,7 @@ Fields
The ``max_length`` should be sufficient for many use cases. If you need
a longer length, please use a :ref:`custom user model
<specifying-custom-user-model>`. If you use MySQL with the ``utf8mb4``
encoding (recommended for proper Unicode support), specify at most
``max_length=191`` because MySQL can only create unique indexes with
191 characters in that case by default.
<specifying-custom-user-model>`.
.. attribute:: first_name

View File

@ -517,7 +517,7 @@ You can `create your database`_ using the command-line tools and this SQL:
.. code-block:: sql
CREATE DATABASE <dbname> CHARACTER SET utf8;
CREATE DATABASE <dbname> CHARACTER SET utf8mb4;
This ensures all tables and columns will use UTF-8 by default.
@ -542,21 +542,21 @@ Django doesn't provide an API to change them.
.. _documented thoroughly: https://dev.mysql.com/doc/refman/en/charset.html
By default, with a UTF-8 database, MySQL will use the
``utf8_general_ci`` collation. This results in all string equality
``utf8mb4_0900_ai_ci`` collation. This results in all string equality
comparisons being done in a *case-insensitive* manner. That is, ``"Fred"`` and
``"freD"`` are considered equal at the database level. If you have a unique
constraint on a field, it would be illegal to try to insert both ``"aa"`` and
``"AA"`` into the same column, since they compare as equal (and, hence,
non-unique) with the default collation. If you want case-sensitive comparisons
on a particular column or table, change the column or table to use the
``utf8_bin`` collation.
``utf8mb4_0900_as_cs`` collation.
Please note that according to `MySQL Unicode Character Sets`_, comparisons for
the ``utf8_general_ci`` collation are faster, but slightly less correct, than
comparisons for ``utf8_unicode_ci``. If this is acceptable for your application,
you should use ``utf8_general_ci`` because it is faster. If this is not acceptable
(for example, if you require German dictionary order), use ``utf8_unicode_ci``
because it is more accurate.
the ``utf8mb4_general_ci`` collation are faster, but slightly less correct,
than comparisons for ``utf8mb4_unicode_ci``. If this is acceptable for your
application, you should use ``utf8mb4_general_ci`` because it is faster. If
this is not acceptable (for example, if you require German dictionary order),
use ``utf8mb4_unicode_ci`` because it is more accurate.
.. _MySQL Unicode Character Sets: https://dev.mysql.com/doc/refman/en/charset-unicode-sets.html
@ -602,7 +602,7 @@ Here's a sample configuration which uses a MySQL option file::
database = NAME
user = USER
password = PASSWORD
default-character-set = utf8
default-character-set = utf8mb4
Several other `MySQLdb connection options`_ may be useful, such as ``ssl``,
``init_command``, and ``sql_mode``.

View File

@ -174,7 +174,9 @@ CSRF
Database backends
~~~~~~~~~~~~~~~~~
* ...
* MySQL connections now default to using the ``utf8mb4`` character set,
instead of ``utf8``, which is an alias for the deprecated character set
``utf8mb3``.
Decorators
~~~~~~~~~~
@ -385,6 +387,14 @@ Dropped support for PostgreSQL 13
Upstream support for PostgreSQL 13 ends in November 2025. Django 5.2 supports
PostgreSQL 14 and higher.
Changed MySQL connection character set default
----------------------------------------------
MySQL connections now default to using the ``utf8mb4`` character set, instead
of ``utf8``, which is an alias for the deprecated character set ``utf8mb3``.
``utf8mb3`` can be specified in the ``OPTIONS`` part of the ``DATABASES``
setting, if needed for legacy databases.
Miscellaneous
-------------

View File

@ -112,7 +112,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase):
"--user=someuser",
"--host=somehost",
"--port=444",
"--default-character-set=utf8",
"--default-character-set=utf8mb4",
"somedbname",
]
expected_env = {"MYSQL_PWD": "somepassword"}
@ -124,7 +124,7 @@ class MySqlDbshellCommandTestCase(SimpleTestCase):
"PASSWORD": "somepassword",
"HOST": "somehost",
"PORT": 444,
"OPTIONS": {"charset": "utf8"},
"OPTIONS": {"charset": "utf8mb4"},
}
),
(expected_args, expected_env),