From cc1ccddf8682361304aaded01002f7fc460d8c37 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Wed, 25 Jun 2025 09:23:21 +0200 Subject: [PATCH] Proposal: change CSV format Currently the CSV format is limited by: * adding many information about the test setup into a single field, instead of using separate columns for the separate features * doesn't include some parameters used during the run, such as the PG_TDE variable This is just a draft/proposal, not an actual ready to be merged PR, as it only hardcodes the PG_TDE variable instead of providing something more generic, I just want to validate the idea with it first. The usecase for this script is that a separete simple CSV processing script can go through the CSV files, and upload them to a database used by grafana, without: * relying on information not existing in the csv, such as shell variables * doing extensive string processing to try to recover all the information which was put into a single field Toughts? --- db-bench/data_funcs.inc | 20 ++++++++++---------- db-bench/main_funcs.inc | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/db-bench/data_funcs.inc b/db-bench/data_funcs.inc index ef463fe..a358682 100755 --- a/db-bench/data_funcs.inc +++ b/db-bench/data_funcs.inc @@ -61,27 +61,27 @@ function print_parameters() { function diff_to_average() { local csv_file="$1" diff_output=$(awk -F ',' 'BEGIN { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { sum[i] = 0 count[i] = 0 } } { if (FNR != total_rows) { # Process all rows except the last one - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { if ($i != "") { count[i]++ sum[i] += $i } } } else { # Process the last row - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { last_row_data[i] = $i } } } END { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { avg[i] = (count[i] > 0) ? sum[i] / count[i] : 0 printf ", %.2f%%", (count[i] > 0) ? ((last_row_data[i] - avg[i]) / avg[i]) * 100 : 0 } @@ -94,13 +94,13 @@ function diff_to_average() { function average() { local csv_file="$1" awk -F ',' 'BEGIN { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { sum[i] = 0 count[i] = 0 } } { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { if ($i != "") { count[i]++ sum[i] += $i @@ -108,7 +108,7 @@ function average() { } } END { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { avg[i] = (count[i] > 0) ? sum[i] / count[i] : 0 printf ", %.2f", avg[i] } @@ -120,14 +120,14 @@ function average() { function standard_deviation_percent() { local csv_file="$1" awk -F ',' 'BEGIN { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { sum[i] = 0 count[i] = 0 sumsq[i] = 0 } } { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { if ($i != "") { count[i]++ sum[i] += $i @@ -136,7 +136,7 @@ function standard_deviation_percent() { } } END { - for (i=2; i<=NF; i++) { + for (i=6; i<=NF; i++) { avg[i] = (count[i] > 0) ? sum[i] / count[i] : 0 printf ", %.2f%%", (count[i] > 0) ? (sqrt((sumsq[i]/count[i]) - (avg[i])**2) / avg[i]) * 100 : 0 } diff --git a/db-bench/main_funcs.inc b/db-bench/main_funcs.inc index d200def..27c791d 100755 --- a/db-bench/main_funcs.inc +++ b/db-bench/main_funcs.inc @@ -54,8 +54,8 @@ function on_exit(){ local DURATION=$((END_TIME - START_TIME)) local TIME_HMS=$(printf "%02d:%02d:%02d" $((DURATION / 3600)) $(((DURATION % 3600) / 60)) $((DURATION % 60))) - HEADER="WORKLOAD" - for num_threads in ${THREADS_LIST}; do HEADER+=", ${num_threads} THDS"; done + HEADER="WORKLOAD,MODE,MEASUREMENT,BRANCH,PG_TDE" + for num_threads in ${THREADS_LIST}; do HEADER+=",${num_threads} THDS"; done echo "Create .csv files" echo "${HEADER}" > ${LOG_BASE_FULL_RESULTS}.csv @@ -244,17 +244,17 @@ function run_sysbench() { local LOG_RESULTS_PATH="${CACHE_DIR}/${BENCH_ID}_${CONFIG_BASE^^}_$(basename "${WORKLOAD_NAMES}" .txt)_${RUN_NAME}" local LOG_RESULTS_CACHE="${LOG_RESULTS_PATH}/${WORKLOAD_NAME}_${SCALING_GOVERNOR}_${THREADS_LIST// /_}.csv" - local BENCH_WITH_CONFIG="${BENCH_ID}_${CONFIG_BASE}_${WORKLOAD_NAME}_${BENCH_NAME}" - local RESULTS_LINE="${BENCH_WITH_CONFIG}_qps" + local BENCH_WITH_CONFIG="${BENCH_ID},${CONFIG_BASE},${WORKLOAD_NAME},${BENCH_NAME},${PG_TDE}" + local RESULTS_LINE="${BENCH_WITH_CONFIG}" for number in "${result_set[@]}"; do RESULTS_LINE+=", ${number}"; done mkdir -p $LOG_RESULTS_PATH echo "${RESULTS_LINE}" > ${LOG_NAME_RESULTS} cat ${LOG_NAME_RESULTS} >> ${LOG_RESULTS_CACHE} cat ${LOG_NAME_RESULTS} >> ${LOGS_QPS}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_qps.csv - echo "${BENCH_WITH_CONFIG}_diff$(diff_to_average "${LOG_RESULTS_CACHE}")" >> ${LOGS_DIFF}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_diff.csv - echo "${BENCH_WITH_CONFIG}_stddev$(standard_deviation_percent "${LOG_RESULTS_CACHE}")" >> ${LOGS_STDDEV}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_stddev.csv - echo "${BENCH_WITH_CONFIG}_avg$(average "${LOG_RESULTS_CACHE}")" >> ${LOGS_AVG}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_avg.csv + echo "${BENCH_WITH_CONFIG}$(diff_to_average "${LOG_RESULTS_CACHE}")" >> ${LOGS_DIFF}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_diff.csv + echo "${BENCH_WITH_CONFIG}$(standard_deviation_percent "${LOG_RESULTS_CACHE}")" >> ${LOGS_STDDEV}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_stddev.csv + echo "${BENCH_WITH_CONFIG}$(average "${LOG_RESULTS_CACHE}")" >> ${LOGS_AVG}/${BENCH_ID}_${WORKLOAD_NAME}_${BENCH_NAME}_avg.csv unset result_set fi }