Rename some of the variables to hopefully make the process a little clearer

Signed-off-by: Adam Warner <me@adamwarner.co.uk>
2024-11-15 02:42:58 +00:00 · 2023-01-21 23:47:19 +00:00 · 2023-01-21 23:47:19 +00:00 · 9939cf1d77
commit 9939cf1d77
parent 79f4a7cef0
1 changed files with 31 additions and 31 deletions
--- a/gravity.sh
+++ b/gravity.sh
@ -244,7 +244,7 @@ database_adlist_number() {
    return;
  fi

-  output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${num_source_lines}" "${num_unusable}" "${1}" | pihole-FTL sqlite3 "${gravityDBfile}"; } 2>&1 )
+  output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${num_domains}" "${num_non_domains}" "${1}" | pihole-FTL sqlite3 "${gravityDBfile}"; } 2>&1 )
  status="$?"

  if [[ "${status}" -ne 0 ]]; then
@ -519,12 +519,12 @@ gravity_DownloadBlocklists() {
  gravity_Blackbody=true
 }

-# num_target_lines does increase for every correctly added domain in pareseList()
-num_target_lines=0
-num_source_lines=0
-num_unusable=0
+# num_total_imported_domains increases for each list processed
+num_total_imported_domains=0
+num_domains=0
+num_non_domains=0
 parseList() {
-  local adlistID="${1}" src="${2}" target="${3}" unusable_lines sample_unusable_lines tmp_unusuable_lines_str false_positive
+  local adlistID="${1}" src="${2}" target="${3}" non_domains sample_non_domains tmp_non_domains_str false_positive
  # This sed does the following things:
  # 1. Remove all lines containing no domains
  # 2. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_)
@ -535,9 +535,9 @@ parseList() {

  # Find lines containing no domains or with invalid characters (see above)
  # Remove duplicates from the list
-  mapfile -t unusable_lines <<< "$(sed -r "/([^\.]+\.)+[^\.]{2,}/d" < "${src}")"
-  mapfile -t -O "${#unusable_lines[@]}" unusable_lines <<< "$(sed -r "/[^a-zA-Z0-9.\_-]/!d" < "${src}")"
-  IFS=" " read -r -a unusable_lines <<< "$(tr ' ' '\n' <<< "${unusable_lines[@]}" | sort -u | tr '\n' ' ')"
+  mapfile -t non_domains <<< "$(sed -r "/([^\.]+\.)+[^\.]{2,}/d" < "${src}")"
+  mapfile -t -O "${#non_domains[@]}" non_domains <<< "$(sed -r "/[^a-zA-Z0-9.\_-]/!d" < "${src}")"
+  IFS=" " read -r -a non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | sort -u | tr '\n' ' ')"

  # A list of items of common local hostnames not to report as unusable
  # Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files
@ -559,37 +559,37 @@ parseList() {
    )

  # Read the unusable lines into a string
-  tmp_unusuable_lines_str=" ${unusable_lines[*]} "
+  tmp_non_domains_str=" ${non_domains[*]} "
  for false_positive in "${false_positives[@]}"; do
-    # Remove false positives from tmp_unusuable_lines_str
-    tmp_unusuable_lines_str="${tmp_unusuable_lines_str/ ${false_positive} / }"
+    # Remove false positives from tmp_non_domains_str
+    tmp_non_domains_str="${tmp_non_domains_str/ ${false_positive} / }"
  done
  # Read the string back into an array
-  IFS=" " read -r -a unusable_lines <<< "${tmp_unusuable_lines_str}"
+  IFS=" " read -r -a non_domains <<< "${tmp_non_domains_str}"

-  # Get a sample of the incorrect lines, limited to 5 (the list should already have been de-duplicated)
-  IFS=" " read -r -a sample_unusable_lines <<< "$(tr ' ' '\n' <<< "${unusable_lines[@]}" | head -n 5 | tr '\n' ' ')"
+  # Get a sample of non-domain entries, limited to 5 (the list should already have been de-duplicated)
+  IFS=" " read -r -a sample_non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | head -n 5 | tr '\n' ' ')"

-  local num_target_lines_new num_correct_lines
-  # Get number of lines in source file
-  num_source_lines="$(grep -c "^" "${src}")"
-  # Get the new number of lines in destination file
-  num_target_lines_new="$(grep -c "^" "${target}")"
-  # Number of new correctly added lines
-  num_correct_lines="$(( num_target_lines_new-num_target_lines ))"
-  # Update number of lines in target file
-  num_target_lines="$num_target_lines_new"
-  num_unusable="${#unusable_lines[@]}"
+  local tmp_new_imported_total
+  # Get the new number of domains in destination file
+  tmp_new_imported_total="$(grep -c "^" "${target}")"
+  # Number of imported lines for this file is the difference between the new total and the old total. (Or, the number of domains we just added.)
+  num_domains="$(( tmp_new_imported_total-num_total_imported_domains ))"
+  # Replace the running total with the new total.
+  num_total_imported_domains="$tmp_new_imported_total"
+  # Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains)
+  num_non_domains="${#non_domains[@]}"

-  if [[ "${num_unusable}" -ne 0 ]]; then
-    echo "  ${INFO} Imported ${num_correct_lines} domains, ignoring ${num_unusable} non-domain entries"
+  # If there are unusable lines, we display some information about them. This is not error or major cause for concern.
+  if [[ "${num_non_domains}" -ne 0 ]]; then
+    echo "  ${INFO} Imported ${num_domains} domains, ignoring ${num_non_domains} non-domain entries"
    echo "      Sample of non-domain entries:"
-    for each in "${sample_unusable_lines[@]}"
+    for each in "${sample_non_domains[@]}"
    do
        echo "        - ${each}"
    done
  else
-    echo "  ${INFO} Imported ${num_correct_lines} domains"
+    echo "  ${INFO} Imported ${num_domains} domains"
  fi
 }

@ -745,8 +745,8 @@ gravity_DownloadBlocklistFromUrl() {
    else
      echo -e "  ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}"
      # Manually reset these two numbers because we do not call parseList here
-      num_source_lines=0
-      num_unusable=0
+      num_domains=0
+      num_non_domains=0
      database_adlist_number "${adlistID}"
      database_adlist_status "${adlistID}" "4"
    fi