I did the benchmark of @eric-ouellet post, including my version (kinda same of eric's solution with a little refactoring + early loop break).
I don't fully understand how BenchMarkDotNet work behind the scene, and can't really get it why there is so much difference from means of methods to other ones.
EDIT (brief explanation) :
I did my benchmark against 100 random pairs of int[10] and 100 random pairs of string[10] arrays.
They have a probability that each of the 2 second generated ones are either simply the first one shuffled, or a new random one with a recursing probability of corrupted (a random item value set to default) datas to handle several scenarii.
Not used in my benchmark, but it could also generate a random array's length centered of the expected length.
/// The class that will be the entry point of my benchmarks
public class EnumerableComparison
{
/// Set the size and length of my randomly generated enumerable to benchmark.
private const int _batchSize = 100;
private const int _batchLength = 10;
/// Calling our (static) algorithms from this (benchmark) class.
/// Decorate each call with [Benchmark] attribute to tell
/// BenchmarkDotNet to benchmark this method.
/// Also with [ArgumentsSource(Rnd***Datas)] to tell
/// BenchMarkDotNet to provide our benchmarked methods with
/// arguments generated with the Rnd***Datas() method.
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool OnlyOrdered_Duplicate(int[] first, int[] second)
=> CompareEnumerableInSameOrder<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool OnlyOrdered_Duplicate(string[] first, string[] second)
=> CompareEnumerableInSameOrder<string>(first, second);
public bool CompareEnumerableInSameOrder<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableInSameOrder(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_Duplicate_Pankaj(int[] first, int[] second)
=> CompareEnumerableAnyOrderPankaj<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_Duplicate_Pankaj(string[] first, string[] second)
=> CompareEnumerableAnyOrderPankaj<string>(first, second);
public bool CompareEnumerableAnyOrderPankaj<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableAnyOrderPankaj(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_NoDuplicate_Selman(int[] first, int[] second)
=> CompareEnumerableAnyOrderSelman<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_NoDuplicate_Selman(string[] first, string[] second)
=> CompareEnumerableAnyOrderSelman<string>(first, second);
public bool CompareEnumerableAnyOrderSelman<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableAnyOrderSelman(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_Duplicate_Sergey(int[] first, int[] second)
=> CompareEnumerableAnyOrderSergey<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_Duplicate_Sergey(string[] first, string[] second)
=> CompareEnumerableAnyOrderSergey<string>(first, second);
public bool CompareEnumerableAnyOrderSergey<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableAnyOrderSergey(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_Duplicate_Guru(int[] first, int[] second)
=> CompareEnumerableAnyOrderGuru<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_Duplicate_Guru(string[] first, string[] second)
=> CompareEnumerableAnyOrderGuru<string>(first, second);
public bool CompareEnumerableAnyOrderGuru<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableAnyOrderGuru(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_Duplicate_EricO(int[] first, int[] second)
=> CompareEnumerableAnyOrderEricO<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_Duplicate_EricO(string[] first, string[] second)
=> CompareEnumerableAnyOrderEricO<string>(first, second);
public bool CompareEnumerableAnyOrderEricO<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.CompareEnumerableAnyOrderEricO(second);
[Benchmark]
[ArgumentsSource(nameof(RndIntDatas))]
public bool AnyOrder_Duplicate_TRex(int[] first, int[] second)
=> CompareEnumerableAnyOrderTRexAll<int>(first, second);
[Benchmark]
[ArgumentsSource(nameof(RndStringDatas))]
public bool AnyOrder_Duplicate_TRex(string[] first, string[] second)
=> CompareEnumerableAnyOrderTRexAll<string>(first, second);
public bool CompareEnumerableAnyOrderTRexAll<T>(IEnumerable<T> first, IEnumerable<T> second)
=> first.EnumerableEqualsAllV3(second);
/// <summary>
/// Generate a pair of <typeparamref name="T"/>[] having a centered length
/// of <paramref name="batchLength"/>, that could be randomly altered
/// according to <paramref name="lengthVariation"/> value.
/// The first one is using <paramref name="selector"/> delegate
/// to generate the <typeparamref name="T"/> items
/// and second one is either the first one shuffled according to
/// <paramref name="copyProbability"/> value,
/// or a new generated one using <paramref name="selector"/> delegate again
/// Both have a <paramref name="duplicationProbability"/>
/// recursing probability to duplicate a random generated element
/// and a <paramref name="corruptProbability"/>
/// recursing probability to corrupt a random generated element.
/// </summary>
/// <typeparam name="T">
/// The type of generated pairs of <typeparamref name="T"/>[]
/// </typeparam>
/// <param name="batchSize">
/// The number of generated pairs of <typeparamref name="T"/>[]
/// </param>
/// <param name="batchLength">
/// The length of generated pairs of <typeparamref name="T"/>[]
/// </param>
/// <param name="selector">
/// The delegate to transform the index of generated
/// <see cref="IEnumerable{int}"/> into an <typeparamref name="T"/>[]
/// </param>
/// <param name="corruptProbability">
/// The probability to corrupt (set to default(<typeparamref name="T"/>))"/>
/// a random element of the generated pairs
/// </param>
/// <param name="copyProbability">
/// The probability that the second element of the generated pair is simply
/// the first element of the pair shuffled.
/// </param>
/// <param name="duplicationProbability">
/// The recursive probability that a random element of
/// a generated <typeparamref name="T"/>[] is duplicated.
/// </param>
/// <param name="lengthVariation">
/// The percentage amount to randomly modify <paramref name="batchLength"/>
/// </param>
/// <returns>
/// A <see cref="IEnumerable{object}"/> whose all item are a generated pair
/// of <typeparamref name="T"/>[]
/// </returns>
public IEnumerable<object[]> RandomizedDatas<T>(
int batchSize,
int batchLength,
Func<int, T> selector,
int corruptProbability = 5,
int copyProbability = 20,
int duplicationProbability = 50,
double lengthVariation = 0.0)
{
batchLength = Math.Abs(batchLength);
lengthVariation = Math.Clamp(lengthVariation, 0.0, 1.0);
copyProbability = Math.Clamp(copyProbability, 0, 100);
corruptProbability = Math.Clamp(corruptProbability, 0, 99);
duplicationProbability = Math.Clamp(duplicationProbability, 0, 99);
T[]? sample1, sample2;
for (int i = 0; i < batchSize; i++)
{
sample1 = Enumerable.Range(1, Math.Max(1, Helper.StaticRandom.Next((int)Math.Floor(batchLength * (1.0 - lengthVariation)), (int)Math.Ceiling(batchLength * (1.0 + lengthVariation)))))
.Select(selector).ToArray();
while (Helper.StaticRandom.Next(0, 101) < duplicationProbability)
sample1 = sample1.AddRandomDuplicate();
while (Helper.StaticRandom.Next(0, 101) < corruptProbability)
sample1 = sample1.Corrupt();
if (Helper.StaticRandom.Next(0, 101) < copyProbability)
sample2 = sample1.CopyShuffle();
else
{
sample2 = Enumerable.Range(1, Math.Max(1, Helper.StaticRandom.Next((int)Math.Floor(batchLength * (1.0 - lengthVariation)), (int)Math.Ceiling(batchLength * (1.0 + lengthVariation)))))
.Select(selector).ToArray();
while (Helper.StaticRandom.Next(0, 101) < duplicationProbability)
sample2 = sample2.AddRandomDuplicate();
while (Helper.StaticRandom.Next(0, 101) < corruptProbability)
sample2 = sample2.Corrupt();
}
yield return new object[] { sample1, sample2 };
}
}
public IEnumerable<object[]> RndIntDatas()
=> RandomizedDatas(_batchSize, _batchLength, _ => Helper.StaticRandom.Next(1, _batchLength*10));
public IEnumerable<object[]> RndStringDatas()
=> RandomizedDatas(_batchSize, _batchLength, _ => Helper.RandomString(Helper.StaticRandom.Next(1, _batchLength)));
}
And
public static class Helper
{
private static Random _staticRandom = new();
public static Random StaticRandom => _staticRandom;
private const string ALPHANUMERICAL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
public static string RandomString(int lenght)
{
var stringChars = new char[lenght];
for (int i = 0; i < stringChars.Length; i++)
{
stringChars[i] = ALPHANUMERICAL[_staticRandom.Next(ALPHANUMERICAL.Length)];
}
return new string(stringChars);
}
public static T[] CopyShuffle<T>(this T[] values)
{
T[] shuffled = new T[values.Length];
values.CopyTo(shuffled, 0);
Random.Shared.Shuffle(shuffled);
return shuffled;
}
public static T[] Corrupt<T>(this T[] values)
{
values[StaticRandom.Next(0, values.Length)] = default;
return values;
}
public static T[] AddDuplicate<T>(this T[] values, int index)
{
if (values == null || index >= values.Length)
return values;
return values.Concat(new T[] { values[index] }).ToArray();
}
public static T[] AddRandomDuplicate<T>(this T[] values)
{
if (values == null || values.Length == 0)
return values;
return values.AddDuplicate(StaticRandom.Next(0, values.Length));
}
/// <summary>
/// Check for Equality where items should be in the same order
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns>return true if both are equals</returns>
public static bool CompareEnumerableInSameOrder<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
=> enum1.SequenceEqual(enum2);
/// <summary>
/// Check for Equality in ANY order (SLOW).
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns>return true if both are equals</returns>
public static bool CompareEnumerableAnyOrderPankaj<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
=> Enumerable.SequenceEqual(enum1.OrderBy(fElement => fElement), enum2.OrderBy(sElement => sElement));
/// <summary>
/// Check for Equality in ANY order (VERY SLOW). In reality, it just does not work with duplicate.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns></returns>
public static bool CompareEnumerableAnyOrderSelman<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
=> enum1.Count() == enum2.Count() && enum1.All(enum2.Contains);
/// <summary>
/// Check for Equality in ANY order. Supposed to be Fast (according to Author in SO) but very slow.
/// Error on NULL element !
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns>return true if both are equals</returns>
public static bool CompareEnumerableAnyOrderSergey<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
{
var counts = enum1.GroupBy(v => v).ToDictionary(g => g.Key, g => g.Count());
var ok = true;
foreach (var n in enum2)
{
if (n == null)///ADDED to avoid crash
return false;
if (counts.TryGetValue(n, out int c))
{
counts[n] = c - 1;
}
else
{
ok = false;
break;
}
}
return ok && counts.Values.All(c => c == 0);
}
/// <summary>
/// Check for Equality in ANY order (ORDER DESTRUCTIVE on LIST). Just for fun.
/// Code based on suggestion from "Guru Stron" on comments of my first answer
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns>return true if both are equals</returns>
public static bool CompareEnumerableAnyOrderGuru<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
{
var list1 = new List<T>(enum1);
list1.Sort();
var list2 = new List<T>(enum2);
list2.Sort();
return Enumerable.SequenceEqual(list1, list2);
}
/// <summary>
/// Check for Equality in ANY order (Fast). Support null
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="enum1"></param>
/// <param name="enum2"></param>
/// <returns></returns>
public static bool CompareEnumerableAnyOrderEricO<T>(this IEnumerable<T> enum1, IEnumerable<T> enum2)
{
if (enum1.Count() != enum2.Count())
return false;
// null value is never insert in the map, they are calculated outside of it.
#pragma warning disable CS8714 // The type cannot be used as type parameter in the generic type or method. Nullability of type argument doesn't match 'notnull' constraint.
Dictionary<T, int> counts = new Dictionary<T, int>(enum1.Count());
#pragma warning restore CS8714 // The type cannot be used as type parameter in the generic type or method. Nullability of type argument doesn't match 'notnull' constraint.
int nullCount = 0;
foreach (var n in enum1)
{
if (n == null)
{
nullCount++;
continue;
}
if (counts.TryGetValue(n, out int c))
{
counts[n] = c + 1;
}
else
{
counts[n] = 1;
}
}
var ok = true;
foreach (var n in enum2)
{
if (n == null)
{
nullCount++;
continue;
}
if (counts.TryGetValue(n, out int c))
{
counts[n] = c - 1;
}
else
{
ok = false;
break;
}
}
return ok && nullCount == 0 && counts.Values.All(c => c == 0);
}
/// <summary>
/// Check for equality in Any order. Handle duplicate
/// </summary>
/// <typeparam name="T">The type of elements in <paramref name="first"/> and <paramref name="second"/> <see cref="IEnumerable{T}"/> to test equality </typeparam>
/// <param name="first">The first <see cref="IEnumerable{T}"/> to test equality against <paramref name="second"/></param>
/// <param name="second">The second <see cref="IEnumerable{T}"/> to test equality against <paramref name="first"/></param>
/// <returns>True if <paramref name="first"/> exactly have same elements than those in <paramref name="second"/> without taking order in consideration (duplicate are important)</returns>
public static bool EnumerableEqualsAllV3<T>(this IEnumerable<T> first, IEnumerable<T> second)
{
if (first == default && second == default)
return true;
if (first == default || second == default)
return false;
if (ReferenceEquals(first, second))
return true;
int firstCount = first.Count();
if (firstCount != second.Count())
return false;
IDictionary<T, int> mappedCount = new Dictionary<T, int>(firstCount);
int nullCount = 0;
foreach (T item in first)
{
if (item == null)
{
nullCount++;
continue;
}
if (mappedCount.ContainsKey(item))
{
mappedCount[item]++;
}
else
{
mappedCount[item] = 1;
}
}
foreach (T item in second)
{
if (item == null)
{
nullCount--;
continue;
}
if (!mappedCount.ContainsKey(item))
return false;
mappedCount[item]--;
}
return mappedCount.Values.All(count => count == 0);
}
}
Here are (pre edited) the results. (too long to be post in this post unfortunately, so is the use of pastebin site).
ids5contains duplicates. Is that intentional?[EF]tag, and make sure that the title of the new question says "comparing lists inside EF's Where clause" or something similar.