I'm trying to prove time complexity of merge sort algorithm in Isabelle HOL:
theory Merge_Sort_Time
imports Complex_Main "HOL-ex.BigO"
begin
fun merge :: "'a::linorder list ⇒ 'a list ⇒ 'a list" where
"merge xs [] = xs"
| "merge [] ys = ys"
| "merge (x#xs) (y#ys) = (if x ≤ y then x # merge xs (y#ys)
else y # merge (x#xs) ys)"
fun msort :: "'a::linorder list ⇒ 'a list" where
"msort [] = []"
| "msort [x] = [x]"
| "msort xs = (let n = length xs div 2 in
merge (msort (take n xs)) (msort (drop n xs)))"
fun msort_time :: "nat ⇒ real" where
"msort_time 0 = 0"
| "msort_time (Suc 0) = 1"
| "msort_time n = msort_time (n div 2) + msort_time (n - n div 2) + n"
lemma msort_time_upper_bound:
"msort_time ∈ O(λn. n * log 2 n)"
sorry
end
The problem is that the lemma is unprovable because it doesn't hold for n = 1.
The lemma is expanded into:
msort_time ∈ {h. ∃c. ∀x. ¦h x¦ ≤ c * ¦x * log 2 x¦}
But probably it should be:
msort_time ∈ {h. ∃c n. ∀x ≥ n. ¦h x¦ ≤ c * ¦x * log 2 x¦}
Maybe I can use another library for Big O notation?
I've tried to prove the following lemma instead but got stuck:
lemma sum_log:
"x > 0 ⟹ y > 0 ⟹
x * log 2 x + y * log 2 y ≤ (x + y) * log 2 (x + y)"
by (simp add: add_mono_thms_linordered_semiring(1) distrib_right)
lemma msort_time_upper_bound:
"∃c. ∀n ≥ 2. msort_time n ≤ c * n * log 2 n"
proof
let ?c = 3
show "∀n ≥ 2. msort_time n ≤ ?c * n * log 2 n"
proof
fix n :: nat
show "n ≥ 2 ⟶ msort_time n ≤ ?c * real n * log 2 n"
proof
assume "n ≥ 2"
then show "msort_time n ≤ ?c * real n * log 2 n"
proof (induct n rule: less_induct)
case (less n)
then show ?case
proof -
consider
(n1) "n < 2" |
(n2) "n = 2" |
(n3) "n = 3" |
(n4) "n ≥ 4"
by arith
then show "msort_time n ≤ ?c * real n * log 2 n"
proof (cases)
case n1
then show ?thesis by (simp add: less.prems less_le_not_le)
next
case n2
then show ?thesis by (simp add: numeral_2_eq_2)
next
case n3
then have "msort_time n = 8"
by (induct rule: msort_time.induct; simp)
also have "... < ?c * n" by (simp add: n3)
also have "... < ?c * n * log 2 n" by (simp add: n3)
finally show ?thesis by linarith
next
case n4
let ?a = "n div 2"
let ?b = "n - ?a"
have a: "2 ≤ ?a ∧ ?a < n" using n4 by simp_all
have b: "2 ≤ ?b ∧ ?b < n" using n4 by simp_all
have IH_a: "msort_time ?a ≤ ?c * real ?a * log 2 ?a"
using a less.hyps by blast
have IH_b: "msort_time ?b ≤ ?c * real ?b * log 2 ?b"
using b less.hyps by blast
have "msort_time n = msort_time ?a + msort_time ?b + n"
by (metis dual_order.strict_iff_not less.prems lessI msort_time.elims not_numeral_le_zero numeral_2_eq_2)
also have "... ≤ ?c * real ?a * log 2 ?a + ?c * real ?b * log 2 ?b + n"
using IH_a IH_b by argo
also have "... ≤ ?c * (real ?a + real ?b) * log 2 (real ?a + real ?b) + n"
using sum_log[of ?a ?b] b by linarith
also have "... ≤ ?c * n * log 2 n + n"
by simp
also have "... ≤ 2 * ?c * n * log 2 n"
using n4 mult_ge1_I by simp
finally show ?thesis
sorry
qed
qed
qed
qed
qed
qed
Could you please help to prove it?
Could you please give some suggestions on simplification of the proof. Is it possible to decrease number of nested proofs?