mirror of
https://github.com/pi-hole/pi-hole.git
synced 2024-11-15 02:42:58 +00:00
Allow limited parsing of ABP style adlists (#5179)
This commit is contained in:
commit
75a32d22a3
3 changed files with 94 additions and 42 deletions
|
@ -230,7 +230,7 @@ initialize_debug() {
|
||||||
|
|
||||||
# This is a function for visually displaying the current test that is being run.
|
# This is a function for visually displaying the current test that is being run.
|
||||||
# Accepts one variable: the name of what is being diagnosed
|
# Accepts one variable: the name of what is being diagnosed
|
||||||
# Colors do not show in the dasboard, but the icons do: [i], [✓], and [✗]
|
# Colors do not show in the dashboard, but the icons do: [i], [✓], and [✗]
|
||||||
echo_current_diagnostic() {
|
echo_current_diagnostic() {
|
||||||
# Colors are used for visually distinguishing each test in the output
|
# Colors are used for visually distinguishing each test in the output
|
||||||
# These colors do not show in the GUI, but the formatting will
|
# These colors do not show in the GUI, but the formatting will
|
||||||
|
|
|
@ -119,7 +119,7 @@ scanDatabaseTable() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Send prepared query to gravity database
|
# Send prepared query to gravity database
|
||||||
result="$(pihole-FTL sqlite3 "${gravityDBfile}" "${querystr}")" 2> /dev/null
|
result="$(pihole-FTL sqlite3 -separator ',' "${gravityDBfile}" "${querystr}")" 2> /dev/null
|
||||||
if [[ -z "${result}" ]]; then
|
if [[ -z "${result}" ]]; then
|
||||||
# Return early when there are no matches in this table
|
# Return early when there are no matches in this table
|
||||||
return
|
return
|
||||||
|
@ -139,8 +139,8 @@ scanDatabaseTable() {
|
||||||
# Loop over results and print them
|
# Loop over results and print them
|
||||||
mapfile -t results <<< "${result}"
|
mapfile -t results <<< "${result}"
|
||||||
for result in "${results[@]}"; do
|
for result in "${results[@]}"; do
|
||||||
domain="${result/|*}"
|
domain="${result/,*}"
|
||||||
if [[ "${result#*|}" == "0" ]]; then
|
if [[ "${result#*,}" == "0" ]]; then
|
||||||
extra=" (disabled)"
|
extra=" (disabled)"
|
||||||
else
|
else
|
||||||
extra=""
|
extra=""
|
||||||
|
@ -215,10 +215,10 @@ if [[ -n "${exact}" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for result in "${results[@]}"; do
|
for result in "${results[@]}"; do
|
||||||
match="${result/|*/}"
|
match="${result/,*/}"
|
||||||
extra="${result#*|}"
|
extra="${result#*,}"
|
||||||
adlistAddress="${extra/|*/}"
|
adlistAddress="${extra/,*/}"
|
||||||
extra="${extra#*|}"
|
extra="${extra#*,}"
|
||||||
if [[ "${extra}" == "0" ]]; then
|
if [[ "${extra}" == "0" ]]; then
|
||||||
extra=" (disabled)"
|
extra=" (disabled)"
|
||||||
else
|
else
|
||||||
|
|
120
gravity.sh
120
gravity.sh
|
@ -137,6 +137,18 @@ update_gravity_timestamp() {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Update timestamp when the gravity table was last updated successfully
|
||||||
|
set_abp_info() {
|
||||||
|
pihole-FTL sqlite3 "${gravityDBfile}" "INSERT OR REPLACE INTO info (property,value) VALUES ('abp_domains',${abp_domains});"
|
||||||
|
status="$?"
|
||||||
|
|
||||||
|
if [[ "${status}" -ne 0 ]]; then
|
||||||
|
echo -e "\\n ${CROSS} Unable to update ABP domain status in database ${gravityDBfile}\\n ${output}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
# Import domains from file and store them in the specified database table
|
# Import domains from file and store them in the specified database table
|
||||||
database_table_from_file() {
|
database_table_from_file() {
|
||||||
# Define locals
|
# Define locals
|
||||||
|
@ -519,25 +531,52 @@ gravity_DownloadBlocklists() {
|
||||||
gravity_Blackbody=true
|
gravity_Blackbody=true
|
||||||
}
|
}
|
||||||
|
|
||||||
# num_total_imported_domains increases for each list processed
|
|
||||||
num_total_imported_domains=0
|
|
||||||
num_domains=0
|
|
||||||
num_non_domains=0
|
|
||||||
parseList() {
|
|
||||||
local adlistID="${1}" src="${2}" target="${3}" non_domains sample_non_domains
|
|
||||||
# This sed does the following things:
|
|
||||||
# 1. Remove all lines containing no domains
|
|
||||||
# 2. Remove all domains containing invalid characters. Valid are: a-z, A-Z, 0-9, dot (.), minus (-), underscore (_)
|
|
||||||
# 3. Append ,adlistID to every line
|
|
||||||
# 4. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701)
|
|
||||||
# 5. Ensures there is a newline on the last line
|
|
||||||
sed -r "/([^\.]+\.)+[^\.]{2,}/!d;/[^a-zA-Z0-9.\_-]/d;s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${src}" >> "${target}"
|
|
||||||
|
|
||||||
# Find lines containing no domains or with invalid characters (see above)
|
# global variable to indicate if we found ABP style domains during the gravity run
|
||||||
|
# is saved in gravtiy's info table to signal FTL if such domains are available
|
||||||
|
abp_domains=0
|
||||||
|
parseList() {
|
||||||
|
local adlistID="${1}" src="${2}" target="${3}" temp_file temp_file_base non_domains sample_non_domains valid_domain_pattern abp_domain_pattern
|
||||||
|
|
||||||
|
# Create a temporary file for the sed magic instead of using "${target}" directly
|
||||||
|
# this allows to split the sed commands to improve readability
|
||||||
|
# we use a file handle here and remove the temporary file immediately so the content will be deleted in any case
|
||||||
|
# when the script stops
|
||||||
|
temp_file_base="$(mktemp -p "/tmp" --suffix=".gravity")"
|
||||||
|
exec 3>"$temp_file_base"
|
||||||
|
rm "${temp_file_base}"
|
||||||
|
temp_file="/proc/$$/fd/3"
|
||||||
|
|
||||||
|
# define valid domain patterns
|
||||||
|
# no need to include uppercase letters, as we convert to lowercase in gravity_ParseFileIntoDomains() already
|
||||||
|
# adapted from https://stackoverflow.com/a/30007882
|
||||||
|
# supported ABP style: ||subdomain.domain.tlp^
|
||||||
|
|
||||||
|
valid_domain_pattern="([a-z0-9]([a-z0-9_-]{0,61}[a-z0-9]){0,1}\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]"
|
||||||
|
abp_domain_pattern="\|\|${valid_domain_pattern}\^"
|
||||||
|
|
||||||
|
|
||||||
|
# 1. Add all valid domains
|
||||||
|
sed -r "/^${valid_domain_pattern}$/!d" "${src}" > "${temp_file}"
|
||||||
|
|
||||||
|
# 2. Add valid ABP style domains if there is at least one such domain
|
||||||
|
if grep -E "^${abp_domain_pattern}$" -m 1 -q "${src}"; then
|
||||||
|
echo " ${INFO} List contained AdBlock Plus style domains"
|
||||||
|
abp_domains=1
|
||||||
|
sed -r "/^${abp_domain_pattern}$/!d" "${src}" >> "${temp_file}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Find lines containing no domains or with invalid characters (not matching regex above)
|
||||||
|
# This is simply everything that is not in $temp_file compared to $src
|
||||||
# Remove duplicates from the list
|
# Remove duplicates from the list
|
||||||
mapfile -t non_domains <<< "$(sed -r "/([^\.]+\.)+[^\.]{2,}/d" < "${src}")"
|
mapfile -t non_domains < <(grep -Fvf "${temp_file}" "${src}" | sort -u )
|
||||||
mapfile -t -O "${#non_domains[@]}" non_domains <<< "$(sed -r "/[^a-zA-Z0-9.\_-]/!d" < "${src}")"
|
|
||||||
IFS=" " read -r -a non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | sort -u | tr '\n' ' ')"
|
# 3. Remove trailing period (see https://github.com/pi-hole/pi-hole/issues/4701)
|
||||||
|
# 4. Append ,adlistID to every line
|
||||||
|
# 5. Ensures there is a newline on the last line
|
||||||
|
# and write everything to the target file
|
||||||
|
sed "s/\.$//;s/$/,${adlistID}/;/.$/a\\" "${temp_file}" >> "${target}"
|
||||||
|
|
||||||
# A list of items of common local hostnames not to report as unusable
|
# A list of items of common local hostnames not to report as unusable
|
||||||
# Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files
|
# Some lists (i.e StevenBlack's) contain these as they are supposed to be used as HOST files
|
||||||
|
@ -553,13 +592,8 @@ parseList() {
|
||||||
# Get a sample of non-domain entries, limited to 5 (the list should already have been de-duplicated)
|
# Get a sample of non-domain entries, limited to 5 (the list should already have been de-duplicated)
|
||||||
IFS=" " read -r -a sample_non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | head -n 5 | tr '\n' ' ')"
|
IFS=" " read -r -a sample_non_domains <<< "$(tr ' ' '\n' <<< "${non_domains[@]}" | head -n 5 | tr '\n' ' ')"
|
||||||
|
|
||||||
local tmp_new_imported_total
|
# Get the number of domains added
|
||||||
# Get the new number of domains in destination file
|
num_domains="$(grep -c "^" "${temp_file}")"
|
||||||
tmp_new_imported_total="$(grep -c "^" "${target}")"
|
|
||||||
# Number of imported lines for this file is the difference between the new total and the old total. (Or, the number of domains we just added.)
|
|
||||||
num_domains="$(( tmp_new_imported_total-num_total_imported_domains ))"
|
|
||||||
# Replace the running total with the new total.
|
|
||||||
num_total_imported_domains="$tmp_new_imported_total"
|
|
||||||
# Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains)
|
# Get the number of non_domains (this is the number of entries left after stripping the source of comments/duplicates/false positives/domains)
|
||||||
num_non_domains="${#non_domains[@]}"
|
num_non_domains="${#non_domains[@]}"
|
||||||
|
|
||||||
|
@ -574,6 +608,9 @@ parseList() {
|
||||||
else
|
else
|
||||||
echo " ${INFO} Imported ${num_domains} domains"
|
echo " ${INFO} Imported ${num_domains} domains"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# close file handle
|
||||||
|
exec 3<&-
|
||||||
}
|
}
|
||||||
|
|
||||||
compareLists() {
|
compareLists() {
|
||||||
|
@ -744,18 +781,30 @@ gravity_ParseFileIntoDomains() {
|
||||||
# Most of the lists downloaded are already in hosts file format but the spacing/formatting is not contiguous
|
# Most of the lists downloaded are already in hosts file format but the spacing/formatting is not contiguous
|
||||||
# This helps with that and makes it easier to read
|
# This helps with that and makes it easier to read
|
||||||
# It also helps with debugging so each stage of the script can be researched more in depth
|
# It also helps with debugging so each stage of the script can be researched more in depth
|
||||||
# 1) Remove carriage returns
|
# 1) Convert all characters to lowercase
|
||||||
# 2) Convert all characters to lowercase
|
tr '[:upper:]' '[:lower:]' < "${src}" > "${destination}"
|
||||||
# 3) Remove comments (text starting with "#", include possible spaces before the hash sign)
|
|
||||||
|
# 2) Remove carriage returns
|
||||||
|
sed -i 's/\r$//' "${destination}"
|
||||||
|
|
||||||
|
# 3a) Remove comments (text starting with "#", include possible spaces before the hash sign)
|
||||||
|
sed -i 's/\s*#.*//g' "${destination}"
|
||||||
|
|
||||||
|
# 3b) Remove lines starting with ! (ABP Comments)
|
||||||
|
sed -i 's/\s*!.*//g' "${destination}"
|
||||||
|
|
||||||
|
# 3c) Remove lines starting with [ (ABP Header)
|
||||||
|
sed -i 's/\s*\[.*//g' "${destination}"
|
||||||
|
|
||||||
# 4) Remove lines containing "/"
|
# 4) Remove lines containing "/"
|
||||||
# 5) Remove leading tabs, spaces, etc.
|
sed -i -r '/(\/).*$/d' "${destination}"
|
||||||
|
|
||||||
|
# 5) Remove leading tabs, spaces, etc. (Also removes leading IP addresses)
|
||||||
|
sed -i -r 's/^.*\s+//g' "${destination}"
|
||||||
|
|
||||||
# 6) Remove empty lines
|
# 6) Remove empty lines
|
||||||
< "${src}" tr -d '\r' | \
|
sed -i '/^$/d' "${destination}"
|
||||||
tr '[:upper:]' '[:lower:]' | \
|
|
||||||
sed 's/\s*#.*//g' | \
|
|
||||||
sed -r '/(\/).*$/d' | \
|
|
||||||
sed -r 's/^.*\s+//g' | \
|
|
||||||
sed '/^$/d'> "${destination}"
|
|
||||||
chmod 644 "${destination}"
|
chmod 644 "${destination}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -988,6 +1037,9 @@ fi
|
||||||
# Update gravity timestamp
|
# Update gravity timestamp
|
||||||
update_gravity_timestamp
|
update_gravity_timestamp
|
||||||
|
|
||||||
|
# Set abp_domain info field
|
||||||
|
set_abp_info
|
||||||
|
|
||||||
# Ensure proper permissions are set for the database
|
# Ensure proper permissions are set for the database
|
||||||
chown pihole:pihole "${gravityDBfile}"
|
chown pihole:pihole "${gravityDBfile}"
|
||||||
chmod g+w "${piholeDir}" "${gravityDBfile}"
|
chmod g+w "${piholeDir}" "${gravityDBfile}"
|
||||||
|
|
Loading…
Reference in a new issue