Skip to content
This repository was archived by the owner on Jul 6, 2021. It is now read-only.

Commit 0f38ee2

Browse files
committed
Raise the performance of K000 check
As found during profiling the K000 check, code of generating separated files with query contributes to the main delay. In current commit: * 2 calls of jq with full JSON replaced by one * calculation of query length and generating link moved to SQL code * code for generating unused `readable_queryid` variable remove. As a result, we can see a rise in performance of K000 group of reports to 4 times with processing 500 queries.
2 parents d99f08a + 214a590 commit 0f38ee2

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

resources/checks/K000_query_analysis.sh

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ mkdir -p "${tmp_dir}"
3434
results_cnt="0"
3535
# fname_prefix generated by formula "json_files_cnt + 1"
3636
for file in "${tmp_dir}"/[1-9]*_${ALIAS_INDEX}.json; do
37+
3738
if [[ -f "${file}" ]]; then
3839
results_cnt=$(( results_cnt + 1 ))
3940
fi
@@ -316,17 +317,24 @@ sql="
316317
${sub_sql}
317318
s1.md5 as md5,
318319
s1.obj->>'queryid' as queryid,
319-
s1.obj->>'query' as query
320+
s1.obj->>'query' as query,
321+
octet_length(s1.obj->>'query') as querylen
320322
from s1
321323
join s2 using(md5)
322-
group by s1.md5, s1.obj->>'queryid', s1.obj->>'query'
323-
), queries as (
324+
group by s1.md5, s1.obj->>'queryid', s1.obj->>'query', s1.obj->>'querylen'
325+
), num_queries as (
324326
-- K003
325327
select
326328
row_number() over(order by diff_total_time desc) as rownum,
327329
*
328330
from queries_pre
329331
order by diff_total_time desc
332+
), queries as (
333+
-- K003
334+
select
335+
*,
336+
'../../json_reports/${TIMESTAMP_DIRNAME}/K_query_groups/' || rownum::text || '_${ALIAS_INDEX}.sql' as link
337+
from num_queries
330338
), aggregated as (
331339
-- globally aggregated metrics (K001)
332340
select
@@ -384,24 +392,16 @@ SQL
384392
# for each query of K003 (of 50), generate file with query and link to the file
385393
for query_num in $(jq -r '.queries | keys | .[]' <<<${JSON}); do
386394

387-
query_text=$(jq -r '.queries."'$query_num'".query' <<<${JSON})
388-
current_bytes=$(echo "$query_text" | wc -c | awk '{ print $1 }')
389-
queryid=$(jq -r '.queries."'$query_num'".queryid' <<<${JSON})
395+
query_data=$(jq -r '.queries."'$query_num'" | [.queryid, .querylen, .query]' <<<${JSON})
396+
query_id=$(jq -r '.[0]' <<<${query_data})
397+
current_bytes=$(jq -r '.[1]' <<<${query_data})
398+
query_text=$(jq -r '.[2]' <<<${query_data})
390399

391-
# Put query into a file
392400
mkdir -p "${JSON_REPORTS_DIR}/K_query_groups" >/dev/null 2>&1 || true
393-
echo "-- queryid: ${queryid}" > "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
401+
echo "-- queryid: ${query_id}" > "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
394402
echo "-- NOTICE: the first 50k characters" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
395403
echo "-- NOTICE: current query size (bytes): '${current_bytes}'" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
396404
echo "$query_text" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
397-
398-
# Generate link to a full text
399-
link="../../json_reports/${TIMESTAMP_DIRNAME}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
400-
readable_queryid="${query_num}_${ALIAS_INDEX}"
401-
402-
# add link into the object
403-
JSON=$(jq --arg link $link -r '.queries."'$query_num'" += { "link": $link }' <<<${JSON})
404-
JSON=$(jq --arg readable_queryid $readable_queryid -r '.queries."'$query_num'" += { "readable_queryid": $readable_queryid }' <<<${JSON})
405405
done
406406

407407
# print resulting JSON to stdout

0 commit comments

Comments
 (0)