It took me a little bit to build a test that I would be fairly comfortable with; to actually judge the numbers I would get...
@BenchmarkMode(value = { Mode.AverageTime })
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class StreamVsLoop {
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(StreamVsLoop.class.getSimpleName())
.jvmArgs("-ea")
.shouldFailOnError(true)
.build();
new Runner(opt).run();
}
@State(Scope.Thread)
public static class StringInput {
private String[] letters = { "q", "a", "z", "w", "s", "x", "e", "d", "c", "r", "f", "v", "t", "g", "b",
"y", "h", "n", "u", "j", "m", "i", "k", "o", "l", "p" };
public String s = "";
@Param(value = { "1000", "10000", "100000" })
int next;
@TearDown(Level.Iteration)
public void tearDown() {
if (next == 1000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 99;
}
if (next == 10000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 999;
}
if (next == 100000) {
long count = Arrays.stream(s.split(",")).filter(x -> x.length() == 5).count();
assert count == 9999;
}
s = null;
}
/**
* a very brute-force tentative to have 1/2 elements to be filtered and 1/2 not
* highly inneficiant, but this is not part of the measurment, so who cares?
*/
@Setup(Level.Iteration)
public void setUp() {
for (int i = 0; i < next; i++) {
int index = ThreadLocalRandom.current().nextInt(0, letters.length);
String letter = letters[index];
if (next == 1000) {
if (i < 500 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 500 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
} else if (next == 10000) {
if (i < 5000 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 5000 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
} else if (next == 100000) {
if (i < 50000 && i % 4 == 0) {
s = s + "," + letter;
} else if (i > 50000 && i % 5 == 0) {
s = s + "," + letter;
} else {
s = s + letter;
}
}
}
}
}
@Benchmark
@Fork
public String stream(StringInput si) {
Stream<String> tokens = Arrays.stream(si.s.split(","));
return tokens.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.sorted().distinct()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String loop(StringInput si) {
String[] tokens = si.s.split(",");
Set<String> resultSet = new TreeSet<>();
for (String t : tokens) {
if (t.length() != 4) {
resultSet.add(t.toUpperCase());
}
}
return String.join(",", resultSet);
}
@Benchmark
@Fork(1)
public String sortedDistinct(StringInput si) {
return Pattern.compile(",").splitAsStream(si.s)
.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.sorted()
.distinct()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String distinctSorted(StringInput si) {
return Pattern.compile(",").splitAsStream(si.s)
.filter(t -> t.length() != 4)
.map(String::toUpperCase)
.distinct()
.sorted()
.collect(Collectors.joining(","));
}
@Benchmark
@Fork(1)
public String variant3(StringInput si) {
String s = si.s;
Set<String> resultSet = new TreeSet<>();
int o = 0, p;
for (p = s.indexOf(','); p >= 0; p = s.indexOf(',', o = p + 1)) {
if (p - o == 4) {
continue;
}
resultSet.add(s.substring(o, p).toUpperCase());
}
if (s.length() - o != 4) {
resultSet.add(s.substring(o).toUpperCase());
}
return String.join(",", resultSet);
}
}
streamvsLoop.StreamVsLoop.distinctSorted 1000 0.028
streamvsLoop.StreamVsLoop.sortedDistinct 1000 0.024
streamvsLoop.StreamVsLoop.loop 1000 0.016
streamvsLoop.StreamVsLoop.stream 1000 0.020
streamvsLoop.StreamVsLoop.variant3 1000 0.012
streamvsLoop.StreamVsLoop.distinctSorted 10000 0.394
streamvsLoop.StreamVsLoop.sortedDistinct 10000 0.359
streamvsLoop.StreamVsLoop.loop 10000 0.274
streamvsLoop.StreamVsLoop.stream 10000 0.304 ± 0.006
streamvsLoop.StreamVsLoop.variant3 10000 0.234
streamvsLoop.StreamVsLoop.distinctSorted 100000 4.950
streamvsLoop.StreamVsLoop.sortedDistinct 100000 4.432
streamvsLoop.StreamVsLoop.loop 100000 5.457
streamvsLoop.StreamVsLoop.stream 100000 3.927 ± 0.048
streamvsLoop.StreamVsLoop.variant3 100000 3.595
Holger's method wins, but boy is the difference small between the other solutions, once the code is hot enough.
JMHfor instance on how to correctly measuretoUpperCase()on the result, as natural order is:A< ... <Z<a< ... <z.distinct()after.sorted(), not before it.