Skip to content

Commit 05ad606

Browse files
authored
🦺 improve robustness and correctness of MySQL-to-SQLite migrations (#111)
1 parent 646888f commit 05ad606

8 files changed

+568
-33
lines changed

src/mysql_to_sqlite3/transporter.py

Lines changed: 274 additions & 31 deletions
Large diffs are not rendered by default.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import re
2+
from unittest.mock import MagicMock, patch
3+
4+
from mysql_to_sqlite3.transporter import MySQLtoSQLite
5+
6+
7+
def _make_base_instance():
8+
with patch.object(MySQLtoSQLite, "__init__", return_value=None):
9+
inst = MySQLtoSQLite() # type: ignore[call-arg]
10+
inst._mysql_cur_dict = MagicMock()
11+
inst._mysql_database = "db"
12+
inst._sqlite_json1_extension_enabled = False
13+
inst._collation = "BINARY"
14+
inst._prefix_indices = False
15+
inst._without_tables = False
16+
inst._without_foreign_keys = True
17+
inst._logger = MagicMock()
18+
inst._sqlite_strict = False
19+
# Track index names for uniqueness
20+
inst._seen_sqlite_index_names = set()
21+
inst._sqlite_index_name_counters = {}
22+
return inst
23+
24+
25+
def test_show_columns_backticks_are_escaped_in_mysql_query() -> None:
26+
inst = _make_base_instance()
27+
28+
# Capture executed SQL
29+
executed_sql = []
30+
31+
def capture_execute(sql: str, *_, **__):
32+
executed_sql.append(sql)
33+
34+
inst._mysql_cur_dict.execute.side_effect = capture_execute
35+
36+
# SHOW COLUMNS -> then STATISTICS query
37+
inst._mysql_cur_dict.fetchall.side_effect = [
38+
[
39+
{
40+
"Field": "id",
41+
"Type": "INT",
42+
"Null": "NO",
43+
"Default": None,
44+
"Key": "PRI",
45+
"Extra": "",
46+
}
47+
],
48+
[],
49+
]
50+
# TABLE collision check -> 0
51+
inst._mysql_cur_dict.fetchone.return_value = {"count": 0}
52+
53+
sql = inst._build_create_table_sql("we`ird")
54+
assert sql.startswith('CREATE TABLE IF NOT EXISTS "we`ird" (')
55+
56+
# First executed SQL should be SHOW COLUMNS with backticks escaped
57+
assert executed_sql
58+
assert executed_sql[0] == "SHOW COLUMNS FROM `we``ird`"
59+
60+
61+
def test_identifiers_with_double_quotes_are_safely_quoted_in_create_and_index() -> None:
62+
inst = _make_base_instance()
63+
inst._prefix_indices = True # ensure an index is emitted with a deterministic name prefix
64+
65+
# SHOW COLUMNS first call, then STATISTICS rows
66+
inst._mysql_cur_dict.fetchall.side_effect = [
67+
[
68+
{
69+
"Field": 'na"me',
70+
"Type": "VARCHAR(10)",
71+
"Null": "YES",
72+
"Default": None,
73+
"Key": "",
74+
"Extra": "",
75+
},
76+
],
77+
[
78+
{
79+
"name": "idx",
80+
"primary": 0,
81+
"unique": 0,
82+
"auto_increment": 0,
83+
"columns": 'na"me',
84+
"types": "VARCHAR(10)",
85+
}
86+
],
87+
]
88+
inst._mysql_cur_dict.fetchone.return_value = {"count": 0}
89+
90+
sql = inst._build_create_table_sql('ta"ble')
91+
92+
# Column should be quoted with doubled quotes inside
93+
assert '"na""me" VARCHAR(10)' in sql or '"na""me" TEXT' in sql
94+
95+
# Index should quote table and column names with doubled quotes
96+
norm = re.sub(r"\s+", " ", sql)
97+
assert 'CREATE INDEX IF NOT EXISTS "ta""ble_idx" ON "ta""ble" ("na""me")' in norm
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import pytest
2+
3+
from mysql_to_sqlite3.sqlite_utils import CollatingSequences
4+
from mysql_to_sqlite3.transporter import MySQLtoSQLite
5+
6+
7+
class TestCollationSqlglotAugmented:
8+
@pytest.mark.parametrize(
9+
"mysql_type",
10+
[
11+
"char varying(12)",
12+
"CHARACTER VARYING(12)",
13+
],
14+
)
15+
def test_collation_applied_for_char_varying_synonyms(self, mysql_type: str) -> None:
16+
out = MySQLtoSQLite._data_type_collation_sequence(collation=CollatingSequences.NOCASE, column_type=mysql_type)
17+
assert out == f"COLLATE {CollatingSequences.NOCASE}"
18+
19+
def test_collation_applied_for_national_character_varying(self) -> None:
20+
out = MySQLtoSQLite._data_type_collation_sequence(
21+
collation=CollatingSequences.NOCASE, column_type="national character varying(15)"
22+
)
23+
assert out == f"COLLATE {CollatingSequences.NOCASE}"
24+
25+
def test_no_collation_for_json(self) -> None:
26+
# Regardless of case or synonym handling, JSON should not have collation applied
27+
assert (
28+
MySQLtoSQLite._data_type_collation_sequence(collation=CollatingSequences.NOCASE, column_type="json") == ""
29+
)
30+
31+
def test_no_collation_when_binary_collation(self) -> None:
32+
# BINARY collation disables COLLATE clause entirely
33+
assert (
34+
MySQLtoSQLite._data_type_collation_sequence(collation=CollatingSequences.BINARY, column_type="VARCHAR(10)")
35+
== ""
36+
)
37+
38+
@pytest.mark.parametrize(
39+
"numeric_synonym",
40+
[
41+
"double precision",
42+
"FIXED(10,2)",
43+
],
44+
)
45+
def test_no_collation_for_numeric_synonyms(self, numeric_synonym: str) -> None:
46+
assert (
47+
MySQLtoSQLite._data_type_collation_sequence(
48+
collation=CollatingSequences.NOCASE, column_type=numeric_synonym
49+
)
50+
== ""
51+
)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import pytest
2+
3+
from mysql_to_sqlite3.transporter import MySQLtoSQLite
4+
5+
6+
class TestDefaultsSqlglotEnhanced:
7+
@pytest.mark.parametrize(
8+
"expr,expected",
9+
[
10+
("CURRENT_TIME", "DEFAULT CURRENT_TIME"),
11+
("CURRENT_DATE", "DEFAULT CURRENT_DATE"),
12+
("CURRENT_TIMESTAMP", "DEFAULT CURRENT_TIMESTAMP"),
13+
],
14+
)
15+
def test_current_tokens_passthrough(self, expr: str, expected: str) -> None:
16+
assert MySQLtoSQLite._translate_default_from_mysql_to_sqlite(expr, column_extra="DEFAULT_GENERATED") == expected
17+
18+
def test_null_literal_generated(self) -> None:
19+
assert (
20+
MySQLtoSQLite._translate_default_from_mysql_to_sqlite("NULL", column_extra="DEFAULT_GENERATED")
21+
== "DEFAULT NULL"
22+
)
23+
24+
@pytest.mark.parametrize(
25+
"expr,boolean_type,expected",
26+
[
27+
("true", "BOOLEAN", {"DEFAULT(TRUE)", "DEFAULT '1'"}),
28+
("false", "BOOLEAN", {"DEFAULT(FALSE)", "DEFAULT '0'"}),
29+
("true", "INTEGER", {"DEFAULT '1'"}),
30+
("false", "INTEGER", {"DEFAULT '0'"}),
31+
],
32+
)
33+
def test_boolean_tokens_generated(self, expr: str, boolean_type: str, expected: set) -> None:
34+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite(
35+
expr, column_type=boolean_type, column_extra="DEFAULT_GENERATED"
36+
)
37+
assert out in expected
38+
39+
def test_parenthesized_string_literal_generated(self) -> None:
40+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("('abc')", column_extra="DEFAULT_GENERATED")
41+
# Either DEFAULT 'abc' or DEFAULT ('abc') depending on normalization
42+
assert out in {"DEFAULT 'abc'", "DEFAULT ('abc')"}
43+
44+
def test_parenthesized_numeric_literal_generated(self) -> None:
45+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("(42)", column_extra="DEFAULT_GENERATED")
46+
assert out in {"DEFAULT 42", "DEFAULT (42)"}
47+
48+
def test_constant_arithmetic_expression_generated(self) -> None:
49+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("1+2*3", column_extra="DEFAULT_GENERATED")
50+
# sqlglot formats with spaces for sqlite dialect
51+
assert out in {"DEFAULT 1 + 2 * 3", "DEFAULT (1 + 2 * 3)"}
52+
53+
def test_hex_blob_literal_generated(self) -> None:
54+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("x'41'", column_extra="DEFAULT_GENERATED")
55+
# Should recognize as blob literal and keep as-is
56+
assert out.upper() == "DEFAULT X'41'"
57+
58+
def test_plain_string_escaping_single_quote(self) -> None:
59+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("O'Reilly")
60+
assert out == "DEFAULT 'O''Reilly'"

tests/unit/test_indices_prefix_and_uniqueness.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def test_build_create_table_sql_prefix_indices_true_prefixes_index_names() -> No
4949
sql = inst._build_create_table_sql("users")
5050

5151
# With prefix_indices=True, the index name should be prefixed with table name
52-
assert 'CREATE INDEX IF NOT EXISTS "users_idx_name" ON "users" ("name");' in sql
52+
assert 'CREATE INDEX IF NOT EXISTS "users_idx_name" ON "users" ("name");' in sql
5353

5454

5555
def test_build_create_table_sql_collision_renamed_and_uniqueness_suffix() -> None:

tests/unit/test_types_and_defaults_extra.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ def test_data_type_collation_sequence(self) -> None:
4040
def test_translate_default_common_keywords(self, default: str, expected: str) -> None:
4141
assert MySQLtoSQLite._translate_default_from_mysql_to_sqlite(default) == expected
4242

43+
def test_translate_default_current_timestamp_precision_transpiled(self) -> None:
44+
# MySQL allows fractional seconds: CURRENT_TIMESTAMP(6). Ensure it's normalized to SQLite token.
45+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite(
46+
"CURRENT_TIMESTAMP(6)", column_extra="DEFAULT_GENERATED"
47+
)
48+
assert out == "DEFAULT CURRENT_TIMESTAMP"
49+
50+
def test_translate_default_generated_expr_fallback_quotes(self) -> None:
51+
# Unknown expressions should fall back to quoted string default for safety
52+
out = MySQLtoSQLite._translate_default_from_mysql_to_sqlite("uuid()", column_extra="DEFAULT_GENERATED")
53+
assert out == "DEFAULT 'uuid()'"
54+
4355
def test_translate_default_charset_introducer_str_hex_and_bin(self) -> None:
4456
# DEFAULT_GENERATED with charset introducer and hex (escaped as in MySQL)
4557
s = "_utf8mb4 X\\'41\\'" # hex for 'A'
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
3+
from mysql_to_sqlite3.transporter import MySQLtoSQLite
4+
5+
6+
class TestSqlglotAugmentedTypeTranslation:
7+
@pytest.mark.parametrize("mysql_type", ["double precision", "DOUBLE PRECISION", "DoUbLe PrEcIsIoN"])
8+
def test_double_precision_maps_to_numeric_type(self, mysql_type: str) -> None:
9+
# Prior mapper would resolve this to TEXT; sqlglot fallback should improve it
10+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite(mysql_type)
11+
assert out in {"DOUBLE", "REAL"}
12+
13+
def test_fixed_maps_to_decimal(self) -> None:
14+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("fixed(10,2)")
15+
# Normalize to DECIMAL (without length) to match existing style
16+
assert out == "DECIMAL"
17+
18+
def test_character_varying_keeps_length_as_varchar(self) -> None:
19+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("character varying(20)")
20+
assert out == "VARCHAR(20)"
21+
22+
def test_char_varying_keeps_length_as_varchar(self) -> None:
23+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("char varying(12)")
24+
assert out == "VARCHAR(12)"
25+
26+
def test_national_character_varying_maps_to_nvarchar(self) -> None:
27+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("national character varying(15)")
28+
assert out == "NVARCHAR(15)"
29+
30+
def test_national_character_maps_to_nchar(self) -> None:
31+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("national character(5)")
32+
assert out == "NCHAR(5)"
33+
34+
@pytest.mark.parametrize(
35+
"mysql_type,expected",
36+
[
37+
("int unsigned", "INTEGER"),
38+
("mediumint unsigned", "MEDIUMINT"),
39+
("smallint unsigned", "SMALLINT"),
40+
("tinyint unsigned", "TINYINT"),
41+
("bigint unsigned", "BIGINT"),
42+
],
43+
)
44+
def test_unsigned_variants_strip_unsigned(self, mysql_type: str, expected: str) -> None:
45+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite(mysql_type)
46+
assert out == expected
47+
48+
def test_timestamp_maps_to_datetime(self) -> None:
49+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("timestamp")
50+
assert out == "DATETIME"
51+
52+
def test_varbinary_and_blobs_map_to_blob(self) -> None:
53+
assert MySQLtoSQLite._translate_type_from_mysql_to_sqlite("varbinary(16)") == "BLOB"
54+
assert MySQLtoSQLite._translate_type_from_mysql_to_sqlite("mediumblob") == "BLOB"
55+
56+
def test_char_maps_to_character_with_length(self) -> None:
57+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("char(3)")
58+
assert out == "CHARACTER(3)"
59+
60+
def test_json_mapping_respects_json1(self) -> None:
61+
assert (
62+
MySQLtoSQLite._translate_type_from_mysql_to_sqlite("json", sqlite_json1_extension_enabled=False) == "TEXT"
63+
)
64+
assert MySQLtoSQLite._translate_type_from_mysql_to_sqlite("json", sqlite_json1_extension_enabled=True) == "JSON"
65+
66+
def test_fallback_to_text_on_unknown_type(self) -> None:
67+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("geography")
68+
assert out == "TEXT"
69+
70+
def test_enum_remains_text(self) -> None:
71+
out = MySQLtoSQLite._translate_type_from_mysql_to_sqlite("enum('a','b')")
72+
assert out == "TEXT"

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,4 @@ import-order-style = pycharm
112112
application-import-names = flake8
113113

114114
[pylint]
115-
disable = C0209,C0301,C0411,R,W0107,W0622,C0103
115+
disable = C0209,C0301,C0411,R,W0107,W0622,C0103,C0302

0 commit comments

Comments
 (0)