aboutsummaryrefslogtreecommitdiffstats
path: root/tests/benchmarks/quick/qsggeometry/tst_bench_qsggeometry.cpp
blob: 179aab46dd3dac7e432228df2f4ba2fdcee2eb0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only

#include <qtest.h>
#include <QtCore/QVector>
#include <QElapsedTimer>
#include <QtQuick/QSGGeometry>
#include <numeric>

class GeometryBenchmark : public QObject
{
    Q_OBJECT

private slots:
    void benchmarkVerticesInCache_data();
    void benchmarkVerticesInCache();
    void benchmarkVerticesInSystemMemory_data();
    void benchmarkVerticesInSystemMemory();

private:
    struct GeometryUpdateParams {
        int startingVertexPosition;
        int totalFramesOfAnimation;
        int vertexCountStep;
        bool useSetVertexCount;
    };
    template<typename YValueFunc>
    static void runVertexBenchmark(const GeometryUpdateParams& params,
                                     YValueFunc makeYValue);
};

// Compare allocate() and setVertexCount() for changing vertex count and
// updating vertices using cached data
//
// Benchmark results:
//
//                           Vertices   allocate()   setVertexCount()
//   i.MX6 800 MHz            2-17K  |   174 μs   |    0.75 μs
//                          20K-37K  |   579 μs   |    1.10 μs
//                          40K-57K  |   988 μs   |    1.57 μs
//
//   Intel i5-8265U 3.8Ghz    2-17K  |    25 μs   |    0.05 μs
//                          20K-37K  |    79 μs   |    0.06 μs
//                          40K-57K  |   145 μs   |    0.09 μs
//
//   AMD 7945HX 5.5GHz        2-17K  |     5 μs   |    0.02 μs
//                          20K-37K  |    17 μs   |    0.03 μs
//                          40K-57K  |    29 μs   |    0.05 μs
//
// This benchmark uses two caches:
//   - caching vertex data in a float array of y-values
//   - processor cache
//
// This test is simplistic and does not reflect the more likely case that
// there would be other threads doing work and causing processor-cache misses.
//
void GeometryBenchmark::benchmarkVerticesInCache_data()
{
    QTest::addColumn<bool>("useSetVertexCount");
    QTest::addColumn<int>("totalFramesOfAnimation");
    QTest::addColumn<int>("vertexCountStep");
    QTest::addColumn<int>("startingVertexPosition");

    QTest::newRow("allocate() grow from 2 verts")
        << false << 1000 << 17 << 2;
    QTest::newRow("allocate() grow from 20000 vert")
        << false << 1000 << 17 << 20'000;
    QTest::newRow("allocate() grow from 40000 verts")
        << false << 1000 << 17 << 40'000;
    QTest::newRow("setVertexCount() grow from 2 verts")
        << true  << 1000 << 17 << 0;
    QTest::newRow("setVertexCount() grow from 20000 verts")
        << true  << 1000 << 17 << 20'000;
    QTest::newRow("setVertexCount() grow from 40000 vert")
        << true  << 1000 << 17 << 40'000;
}

void GeometryBenchmark::benchmarkVerticesInCache()
{
    QFETCH(bool, useSetVertexCount);
    QFETCH(int, totalFramesOfAnimation);
    QFETCH(int, vertexCountStep);
    QFETCH(int, startingVertexPosition);

    auto makeYValue = [](int i) {
        return i/10.0f;
    };

    GeometryUpdateParams params{
        startingVertexPosition,
        totalFramesOfAnimation,
        vertexCountStep,
        useSetVertexCount
    };

    runVertexBenchmark(params, makeYValue);
}


// Compare uncached performance of allocate() and setVertexCount().
//
// This benchmark demonstrates the performance benefit of patch:
//    https://codereview.qt-project.org/c/qt/qtdeclarative/+/590520
// which implements QTBUG-126835 "Partial rendering of a geometry node"
//
// The goal of QTBUG-126835 is to provide an efficient means for changing the
// number of vertices while animating. Using allocate() to change the number
// of vertices requires updating all vertices, whereas setVertexCount() only
// requires updating new vertices.
//
// The test runs for 1000 frames (17 seconds of animation) and changes the
// number of vertices with each new frame.
//
// This benchmark elminates the benefit of CPU cache and is a good indicator
// of real world performance in a heavily multithreaded environment.
//
// Benchmark results:
//
//                          Vertices   allocate()   setVertexCount()
//   i.MX6 800 MHz            2-17K  |  1.7 msecs |       7 μs
//                          20K-37K  |  6.0 msecs |      10 μs
//                          40K-57K  | 10.1 msecs |      16 μs
//
//   Intel i5-8265U 3.8Ghz    2-17K  |    157 μs  |    0.21 μs
//                          20K-37K  |    654 μs  |    0.53 μs
//                          40K-57K  |   1133 μs  |    0.85 μs
//
//   AMD 7945HX 5.5GHz        2-17K  |     37 μs  |    0.16 μs
//                          20K-37K  |    147 μs  |    0.33 μs
//                          40K-57K  |    257 μs  |    0.29 μs
//
void GeometryBenchmark::benchmarkVerticesInSystemMemory_data()
{
    benchmarkVerticesInCache_data();
}

void GeometryBenchmark::benchmarkVerticesInSystemMemory()
{
    QFETCH(bool, useSetVertexCount);
    QFETCH(int, totalFramesOfAnimation);
    QFETCH(int, vertexCountStep);
    QFETCH(int, startingVertexPosition);

    const int totalVerts = startingVertexPosition
                           + (vertexCountStep * totalFramesOfAnimation);

    // Data is stored in an array of buffers and reads rotate through the
    // buffers so that each subsequent read is targeting a location in memory
    // that is not already cached. As a result, this tests the performance of
    // reading vertex data from system memory.

    std::array<std::vector<float>, 1000> data;
    data.fill(std::vector<float>(totalVerts));

    // Initialize each vector's element with the element index
    for (auto& arr : data)
        for (uint i = 0; i < arr.size(); ++i)
            arr[i] = i;

    // Index into the arrays
    size_t arrayIndex = 0;

    // Force cache misses by rolling backwards through the data arrays
    auto makeYValue = [&arrayIndex, &data](size_t vector_idx) -> float {
        float value = data[arrayIndex][vector_idx];

        arrayIndex = (arrayIndex == 0) ? data.size() - 1
                                       : arrayIndex - 1;
        return value;
    };

    GeometryUpdateParams params{
        startingVertexPosition,
        totalFramesOfAnimation,
        vertexCountStep,
        useSetVertexCount
    };

    runVertexBenchmark(params, makeYValue);
}

// This function is shared between the benchmarkVerticesInCache() and
// benchmarkVerticesInSystemMemory() benchmark functions.
//
// The function benchmarks the efficiency of updating vertex data in a
// QSGGeometry object, comparing the performance of using setVertexCount()
// versus allocate(). It measures the time taken to update vertices over
// multiple frames and ensures data integrity by verifying that the
// geometry's data matches expected values.
//
template<typename YValueFunc>
void GeometryBenchmark::runVertexBenchmark(const GeometryUpdateParams& params,
                                           YValueFunc makeYValue)
{
    int finalVertexCount = params.startingVertexPosition
                           + (params.vertexCountStep * params.totalFramesOfAnimation);

    // Create a collection to hold Y values for verifying data integrity
    QVector<float> yValues(finalVertexCount);

    // Populate values collection with pseudo-data up to minimum vertices
    for (int i = 0; i < params.startingVertexPosition; ++i) {
        yValues[i] = makeYValue(i);
    }

    QSGGeometry geometry(QSGGeometry::defaultAttributes_Point2D(), 1, 0);

    // function to add data to geometry
    auto updateVertices = [&geometry, &makeYValue](int start, int end) {
        auto *pt = geometry.vertexDataAsPoint2D() + start;
        for (int i = start; i < end; ++i, ++pt) {
            pt->x = i;
            pt->y = makeYValue(i);
        }
    };

    int iterations = 0;
    QElapsedTimer timer;
    timer.start();
    do {
        for (int i = params.startingVertexPosition; i < finalVertexCount; i += params.vertexCountStep) {
            int newCount = qMin(finalVertexCount, i + params.vertexCountStep);

            // add data to values collection which will be added to geometry
            for (int j = i; j < newCount; ++j) {
                yValues[j] = makeYValue(j);
            }

            // set up geometry to have previous data and prepare for new data
            if (params.useSetVertexCount) {
                if (i == params.startingVertexPosition) {
                    // populate initial data
                    geometry.allocate(finalVertexCount);
                    updateVertices(0, params.startingVertexPosition);
                }

                // resizing does not invalidate previously allocated vertices
                geometry.setVertexCount(newCount);
            } else {
                geometry.allocate(newCount);
                // must repopulate because all vertices were invalidated by allocate()
                updateVertices(0, i);
            }

            // copy new data to geometry
            updateVertices(i, newCount);
        }
        ++iterations;
    } while (timer.elapsed() < 2);

    // Divide by the number of frames to get the time per frame
    qreal nanosecsPerTest = qreal(timer.nsecsElapsed()) / iterations;
    qreal nanosecs = nanosecsPerTest/params.totalFramesOfAnimation;
    QTest::setBenchmarkResult(nanosecs, QTest::WalltimeNanoseconds);

    // Verify that the geometry has expected data
    auto *pt = geometry.vertexDataAsPoint2D();
    for (int i = 0; i < finalVertexCount; ++i, ++pt) {
        QCOMPARE(pt->x, float(i));
        QCOMPARE(pt->y, yValues[i]);
    }
}

QTEST_MAIN(GeometryBenchmark)
#include "tst_bench_qsggeometry.moc"