Raise the performance of K000 check

dmius · dmius · commit 0f38ee25177e · 2019-07-19T18:08:22.000Z
As found during profiling the K000 check, code of generating separated files with query contributes to the main delay. In current commit:
* 2 calls of jq with full JSON replaced by one
* calculation of query length and generating link moved to SQL code
* code for generating unused `readable_queryid` variable remove.
As a result, we can see a rise in performance of K000 group of reports to 4 times with processing 500 queries.
diff --git a/resources/checks/K000_query_analysis.sh b/resources/checks/K000_query_analysis.sh
@@ -34,6 +34,7 @@ mkdir -p "${tmp_dir}"
 results_cnt="0"
 # fname_prefix generated by formula "json_files_cnt + 1"
 for file in "${tmp_dir}"/[1-9]*_${ALIAS_INDEX}.json; do
+
   if [[ -f "${file}" ]]; then
     results_cnt=$(( results_cnt + 1 ))
   fi
@@ -316,17 +317,24 @@ sql="
       ${sub_sql}
       s1.md5 as md5,
       s1.obj->>'queryid' as queryid,
-      s1.obj->>'query' as query
+      s1.obj->>'query' as query,
+      octet_length(s1.obj->>'query') as querylen
     from s1
     join s2 using(md5)
-    group by s1.md5, s1.obj->>'queryid', s1.obj->>'query'
-  ), queries as (
+    group by s1.md5, s1.obj->>'queryid', s1.obj->>'query', s1.obj->>'querylen'
+  ), num_queries as (
     -- K003
     select
       row_number() over(order by diff_total_time desc) as rownum,
       *
     from queries_pre
     order by diff_total_time desc
+  ), queries as (
+    -- K003
+    select
+      *,
+      '../../json_reports/${TIMESTAMP_DIRNAME}/K_query_groups/' || rownum::text || '_${ALIAS_INDEX}.sql' as link
+    from num_queries
   ), aggregated as (
     -- globally aggregated metrics (K001)
     select
@@ -384,24 +392,16 @@ SQL
 # for each query of K003 (of 50), generate file with query and link to the file
 for query_num in $(jq -r '.queries | keys | .[]' <<<${JSON}); do
 
-  query_text=$(jq -r '.queries."'$query_num'".query' <<<${JSON})
-  current_bytes=$(echo "$query_text" | wc -c | awk '{ print $1 }')
-  queryid=$(jq -r '.queries."'$query_num'".queryid' <<<${JSON})
+  query_data=$(jq -r '.queries."'$query_num'" | [.queryid, .querylen, .query]' <<<${JSON})
+  query_id=$(jq -r '.[0]' <<<${query_data})
+  current_bytes=$(jq -r '.[1]' <<<${query_data})
+  query_text=$(jq -r '.[2]' <<<${query_data})
 
-  # Put query into a file
   mkdir -p "${JSON_REPORTS_DIR}/K_query_groups" >/dev/null 2>&1 || true
-  echo "-- queryid: ${queryid}" > "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
+  echo "-- queryid: ${query_id}" > "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
   echo "-- NOTICE: the first 50k characters" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
   echo "-- NOTICE: current query size (bytes): '${current_bytes}'" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
   echo "$query_text" >> "${JSON_REPORTS_DIR}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
-
-  # Generate link to a full text
-  link="../../json_reports/${TIMESTAMP_DIRNAME}/K_query_groups/${query_num}_${ALIAS_INDEX}.sql"
-  readable_queryid="${query_num}_${ALIAS_INDEX}"
-
-  # add link into the object
-  JSON=$(jq --arg link $link -r '.queries."'$query_num'" += { "link": $link }' <<<${JSON})
-  JSON=$(jq --arg readable_queryid $readable_queryid -r '.queries."'$query_num'" += { "readable_queryid": $readable_queryid }' <<<${JSON})
 done
 
 # print resulting JSON to stdout