From 81d4531e10b07e96b863b23920d32de189b7616d Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sat, 22 Feb 2020 13:00:38 +0100 Subject: [PATCH] Implement performant list checking routine. Signed-off-by: DL6ER --- gravity.sh | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index 4860339f..30e43135 100755 --- a/gravity.sh +++ b/gravity.sh @@ -412,11 +412,36 @@ gravity_DownloadBlocklists() { gravity_Blackbody=true } +total_num=0 parseList() { - local adlistID="${1}" src="${2}" target="${3}" + local adlistID="${1}" src="${2}" target="${3}" incorrect_lines #Append ,${arg} to every line and then remove blank lines before import # /.$/a\\ ensures there is a newline on the last line - sed -e "s/$/,${adlistID}/;/^$/d;/.$/a\\" "${src}" >> "${target}" + sed -e "/[^a-zA-Z0-9.\_-]/d;s/$/,${adlistID}/;/^$/d;/.$/a\\" "${src}" >> "${target}" + incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" + + local num_lines num_target_lines num_correct_lines percentage percentage_fraction + num_lines="$(grep -c "^" "${src}")" + #num_correct_lines="$(grep -c "^[a-zA-Z0-9.-]*$" "${src}")" + num_target_lines="$(grep -c "^" "${target}")" + num_correct_lines="$(( num_target_lines-total_num ))" + total_num="$num_target_lines" + percentage=100 + percentage_fraction=0 + if [[ "${num_lines}" -gt 0 ]]; then + percentage="$(( 1000*num_correct_lines/num_lines ))" + percentage_fraction="$(( percentage%10 ))" + percentage="$(( percentage/10 ))" + fi + echo " ${INFO} List quality: ${num_correct_lines} of ${num_lines} lines importable (${percentage}.${percentage_fraction}%)" + + if [[ -n "${incorrect_lines}" ]]; then + echo " Example for invalid domains (showing only the first five):" + while IFS= read -r line; do + echo " - ${line}" + done <<< "${incorrect_lines}" + fi + } # Download specified URL and perform checks on HTTP status and file content