Skip to content

Commit b493b7e

Browse files
committed
unicode: Converted the template output and database I/O interfaces to
understand unicode strings. All tests pass (except for one commented out with "XFAIL"), but untested with database servers using non-UTF8, non-ASCII on the server. git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@4971 bcc190cf-cafb-0310-a4f2-bffc1f526a37
1 parent 232b7ac commit b493b7e

File tree

21 files changed

+308
-117
lines changed

21 files changed

+308
-117
lines changed

django/db/backends/mysql/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def cursor(self):
8181
kwargs = {
8282
'conv': django_conversions,
8383
'charset': 'utf8',
84-
'use_unicode': False,
84+
'use_unicode': True,
8585
}
8686
if settings.DATABASE_USER:
8787
kwargs['user'] = settings.DATABASE_USER

django/db/backends/mysql_old/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def cursor(self):
8989
'db': settings.DATABASE_NAME,
9090
'passwd': settings.DATABASE_PASSWORD,
9191
'conv': django_conversions,
92+
'use_unicode': True,
9293
}
9394
if settings.DATABASE_HOST.startswith('/'):
9495
kwargs['unix_socket'] = settings.DATABASE_HOST
@@ -101,6 +102,7 @@ def cursor(self):
101102
cursor = self.connection.cursor()
102103
if self.connection.get_server_info() >= '4.1':
103104
cursor.execute("SET NAMES 'utf8'")
105+
cursor.execute("SET CHARACTER SET 'utf8'")
104106
else:
105107
cursor = self.connection.cursor()
106108
if settings.DEBUG:

django/db/backends/postgresql/base.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
Requires psycopg 1: http://initd.org/projects/psycopg1
55
"""
66

7+
from django.utils.encoding import smart_str, smart_unicode
78
from django.db.backends import util
9+
from django.db.backends.postgresql.encodings import ENCODING_MAP
810
try:
911
import psycopg as Database
1012
except ImportError, e:
@@ -20,30 +22,28 @@
2022
# Import copy of _thread_local.py from Python 2.4
2123
from django.utils._threading_local import local
2224

23-
def smart_basestring(s, charset):
24-
if isinstance(s, unicode):
25-
return s.encode(charset)
26-
return s
27-
2825
class UnicodeCursorWrapper(object):
2926
"""
3027
A thin wrapper around psycopg cursors that allows them to accept Unicode
3128
strings as params.
3229
3330
This is necessary because psycopg doesn't apply any DB quoting to
3431
parameters that are Unicode strings. If a param is Unicode, this will
35-
convert it to a bytestring using DEFAULT_CHARSET before passing it to
36-
psycopg.
32+
convert it to a bytestring using database client's encoding before passing
33+
it to psycopg.
34+
35+
All results retrieved from the database are converted into Unicode strings
36+
before being returned to the caller.
3737
"""
3838
def __init__(self, cursor, charset):
3939
self.cursor = cursor
4040
self.charset = charset
4141

4242
def execute(self, sql, params=()):
43-
return self.cursor.execute(sql, [smart_basestring(p, self.charset) for p in params])
43+
return self.cursor.execute(smart_str(sql, self.charset), [smart_str(p, self.charset, True) for p in params])
4444

4545
def executemany(self, sql, param_list):
46-
new_param_list = [tuple([smart_basestring(p, self.charset) for p in params]) for params in param_list]
46+
new_param_list = [tuple([smart_str(p, self.charset) for p in params]) for params in param_list]
4747
return self.cursor.executemany(sql, new_param_list)
4848

4949
def __getattr__(self, attr):
@@ -53,6 +53,7 @@ def __getattr__(self, attr):
5353
return getattr(self.cursor, attr)
5454

5555
postgres_version = None
56+
client_encoding = None
5657

5758
class DatabaseWrapper(local):
5859
def __init__(self, **kwargs):
@@ -82,11 +83,21 @@ def cursor(self):
8283
cursor = self.connection.cursor()
8384
if set_tz:
8485
cursor.execute("SET TIME ZONE %s", [settings.TIME_ZONE])
85-
cursor = UnicodeCursorWrapper(cursor, settings.DEFAULT_CHARSET)
86+
if not settings.DATABASE_CHARSET:
87+
cursor.execute("SHOW client_encoding")
88+
encoding = ENCODING_MAP[cursor.fetchone()[0]]
89+
else:
90+
encoding = settings.DATABASE_CHARSET
91+
cursor = UnicodeCursorWrapper(cursor, encoding)
92+
global client_encoding
93+
if not client_encoding:
94+
# We assume the client encoding isn't going to change for random
95+
# reasons.
96+
client_encoding = encoding
8697
global postgres_version
8798
if not postgres_version:
8899
cursor.execute("SELECT version()")
89-
postgres_version = [int(val) for val in cursor.fetchone()[0].split()[1].split('.')]
100+
postgres_version = [int(val) for val in cursor.fetchone()[0].split()[1].split('.')]
90101
if settings.DEBUG:
91102
return util.CursorDebugWrapper(cursor, self)
92103
return cursor
@@ -148,7 +159,7 @@ def get_random_function_sql():
148159

149160
def get_deferrable_sql():
150161
return " DEFERRABLE INITIALLY DEFERRED"
151-
162+
152163
def get_fulltext_search_sql(field_name):
153164
raise NotImplementedError
154165

@@ -162,20 +173,21 @@ def get_sql_flush(style, tables, sequences):
162173
"""Return a list of SQL statements required to remove all data from
163174
all tables in the database (without actually removing the tables
164175
themselves) and put the database in an empty 'initial' state
165-
166-
"""
176+
177+
"""
167178
if tables:
168179
if postgres_version[0] >= 8 and postgres_version[1] >= 1:
169-
# Postgres 8.1+ can do 'TRUNCATE x, y, z...;'. In fact, it *has to* in order to be able to
170-
# truncate tables referenced by a foreign key in any other table. The result is a
171-
# single SQL TRUNCATE statement.
180+
# Postgres 8.1+ can do 'TRUNCATE x, y, z...;'. In fact, it *has to*
181+
# in order to be able to truncate tables referenced by a foreign
182+
# key in any other table. The result is a single SQL TRUNCATE
183+
# statement.
172184
sql = ['%s %s;' % \
173185
(style.SQL_KEYWORD('TRUNCATE'),
174186
style.SQL_FIELD(', '.join([quote_name(table) for table in tables]))
175187
)]
176188
else:
177-
# Older versions of Postgres can't do TRUNCATE in a single call, so they must use
178-
# a simple delete.
189+
# Older versions of Postgres can't do TRUNCATE in a single call, so
190+
# they must use a simple delete.
179191
sql = ['%s %s %s;' % \
180192
(style.SQL_KEYWORD('DELETE'),
181193
style.SQL_KEYWORD('FROM'),
@@ -237,7 +249,15 @@ def get_sql_sequence_reset(style, model_list):
237249
style.SQL_KEYWORD('FROM'),
238250
style.SQL_TABLE(f.m2m_db_table())))
239251
return output
240-
252+
253+
def typecast_string(s):
254+
"""
255+
Cast all returned strings to unicode strings.
256+
"""
257+
if not s:
258+
return s
259+
return smart_unicode(s, client_encoding)
260+
241261
# Register these custom typecasts, because Django expects dates/times to be
242262
# in Python's native (standard-library) datetime/time format, whereas psycopg
243263
# use mx.DateTime by default.
@@ -248,6 +268,7 @@ def get_sql_sequence_reset(style, model_list):
248268
Database.register_type(Database.new_type((1083,1266), "TIME", util.typecast_time))
249269
Database.register_type(Database.new_type((1114,1184), "TIMESTAMP", util.typecast_timestamp))
250270
Database.register_type(Database.new_type((16,), "BOOLEAN", util.typecast_boolean))
271+
Database.register_type(Database.new_type(Database.types[1043].values, 'STRING', typecast_string))
251272

252273
OPERATOR_MAPPING = {
253274
'exact': '= %s',
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Mapping between PostgreSQL encodings and Python codec names. This mapping
2+
# doesn't exist in psycopg, so we have to maintain it by hand (using
3+
# information from section 21.2.1 in the PostgreSQL manual).
4+
ENCODING_MAP = {
5+
"BIG5": 'big5-tw',
6+
"EUC_CN": 'gb2312',
7+
"EUC_JP": 'euc_jp',
8+
"EUC_KR": 'euc_kr',
9+
"GB18030": 'gb18030',
10+
"GBK": 'gbk',
11+
"ISO_8859_5": 'iso8859_5',
12+
"ISO_8859_6": 'iso8859_6',
13+
"ISO_8859_7": 'iso8859_7',
14+
"ISO_8859_8": 'iso8859_8',
15+
"JOHAB": 'johab',
16+
"KOI8": 'koi18_r',
17+
"KOI18R": 'koi18_r',
18+
"LATIN1": 'latin_1',
19+
"LATIN2": 'iso8859_2',
20+
"LATIN3": 'iso8859_3',
21+
"LATIN4": 'iso8859_4',
22+
"LATIN5": 'iso8859_9',
23+
"LATIN6": 'iso8859_10',
24+
"LATIN7": 'iso8859_13',
25+
"LATIN8": 'iso8859_14',
26+
"LATIN9": 'iso8859_15',
27+
"SJIS": 'shift_jis',
28+
"SQL_ASCII": 'ascii',
29+
"UHC": 'cp949',
30+
"UTF8": 'utf-8',
31+
"WIN866": 'cp866',
32+
"WIN874": 'cp874',
33+
"WIN1250": 'cp1250',
34+
"WIN1251": 'cp1251',
35+
"WIN1252": 'cp1252',
36+
"WIN1256": 'cp1256',
37+
"WIN1258": 'cp1258',
38+
39+
# Unsupported (no equivalents in codecs module):
40+
# EUC_TW
41+
# LATIN10
42+
}
43+
# Mapping between PostgreSQL encodings and Python codec names. This mapping
44+
# doesn't exist in psycopg, so we have to maintain it by hand (using
45+
# information from section 21.2.1 in the PostgreSQL manual).
46+
ENCODING_MAP = {
47+
"BIG5": 'big5-tw',
48+
"EUC_CN": 'gb2312',
49+
"EUC_JP": 'euc_jp',
50+
"EUC_KR": 'euc_kr',
51+
"GB18030": 'gb18030',
52+
"GBK": 'gbk',
53+
"ISO_8859_5": 'iso8859_5',
54+
"ISO_8859_6": 'iso8859_6',
55+
"ISO_8859_7": 'iso8859_7',
56+
"ISO_8859_8": 'iso8859_8',
57+
"JOHAB": 'johab',
58+
"KOI8": 'koi18_r',
59+
"KOI18R": 'koi18_r',
60+
"LATIN1": 'latin_1',
61+
"LATIN2": 'iso8859_2',
62+
"LATIN3": 'iso8859_3',
63+
"LATIN4": 'iso8859_4',
64+
"LATIN5": 'iso8859_9',
65+
"LATIN6": 'iso8859_10',
66+
"LATIN7": 'iso8859_13',
67+
"LATIN8": 'iso8859_14',
68+
"LATIN9": 'iso8859_15',
69+
"SJIS": 'shift_jis',
70+
"SQL_ASCII": 'ascii',
71+
"UHC": 'cp949',
72+
"UTF8": 'utf-8',
73+
"WIN866": 'cp866',
74+
"WIN874": 'cp874',
75+
"WIN1250": 'cp1250',
76+
"WIN1251": 'cp1251',
77+
"WIN1252": 'cp1252',
78+
"WIN1256": 'cp1256',
79+
"WIN1258": 'cp1258',
80+
81+
# Unsupported (no equivalents in codecs module):
82+
# EUC_TW
83+
# LATIN10
84+
}

django/db/backends/postgresql_psycopg2/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.db.backends import util
88
try:
99
import psycopg2 as Database
10+
import psycopg2.extensions
1011
except ImportError, e:
1112
from django.core.exceptions import ImproperlyConfigured
1213
raise ImproperlyConfigured, "Error loading psycopg2 module: %s" % e
@@ -20,6 +21,8 @@
2021
# Import copy of _thread_local.py from Python 2.4
2122
from django.utils._threading_local import local
2223

24+
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
25+
2326
postgres_version = None
2427

2528
class DatabaseWrapper(local):
@@ -47,6 +50,7 @@ def cursor(self):
4750
conn_string += " port=%s" % settings.DATABASE_PORT
4851
self.connection = Database.connect(conn_string, **self.options)
4952
self.connection.set_isolation_level(1) # make transactions transparent to all cursors
53+
self.connection.set_client_encoding('UTF8')
5054
cursor = self.connection.cursor()
5155
cursor.tzinfo_factory = None
5256
if set_tz:

django/db/backends/sqlite3/base.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,6 @@
2626
Database.register_converter("timestamp", util.typecast_timestamp)
2727
Database.register_converter("TIMESTAMP", util.typecast_timestamp)
2828

29-
def utf8rowFactory(cursor, row):
30-
def utf8(s):
31-
if type(s) == unicode:
32-
return s.encode("utf-8")
33-
else:
34-
return s
35-
return [utf8(r) for r in row]
36-
3729
try:
3830
# Only exists in Python 2.4+
3931
from threading import local
@@ -60,7 +52,6 @@ def cursor(self):
6052
self.connection.create_function("django_extract", 2, _sqlite_extract)
6153
self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
6254
cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
63-
cursor.row_factory = utf8rowFactory
6455
if settings.DEBUG:
6556
return util.CursorDebugWrapper(cursor, self)
6657
else:
@@ -76,8 +67,9 @@ def _rollback(self):
7667

7768
def close(self):
7869
from django.conf import settings
79-
# If database is in memory, closing the connection destroys the database.
80-
# To prevent accidental data loss, ignore close requests on an in-memory db.
70+
# If database is in memory, closing the connection destroys the
71+
# database. To prevent accidental data loss, ignore close requests on
72+
# an in-memory db.
8173
if self.connection is not None and settings.DATABASE_NAME != ":memory:":
8274
self.connection.close()
8375
self.connection = None
@@ -153,10 +145,10 @@ def get_pk_default_value():
153145
return "NULL"
154146

155147
def get_sql_flush(style, tables, sequences):
156-
"""Return a list of SQL statements required to remove all data from
157-
all tables in the database (without actually removing the tables
158-
themselves) and put the database in an empty 'initial' state
159-
148+
"""
149+
Return a list of SQL statements required to remove all data from all tables
150+
in the database (without actually removing the tables themselves) and put
151+
the database in an empty 'initial' state.
160152
"""
161153
# NB: The generated SQL below is specific to SQLite
162154
# Note: The DELETE FROM... SQL generated below works for SQLite databases
@@ -174,7 +166,7 @@ def get_sql_sequence_reset(style, model_list):
174166
"Returns a list of the SQL statements to reset sequences for the given models."
175167
# No sequence reset required
176168
return []
177-
169+
178170
def _sqlite_date_trunc(lookup_type, dt):
179171
try:
180172
dt = util.typecast_timestamp(dt)
@@ -204,3 +196,4 @@ def _sqlite_date_trunc(lookup_type, dt):
204196
'istartswith': "LIKE %s ESCAPE '\\'",
205197
'iendswith': "LIKE %s ESCAPE '\\'",
206198
}
199+

0 commit comments

Comments
 (0)