pi-hole/gravity.sh

959 lines
34 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# shellcheck disable=SC1090
# Pi-hole: A black hole for Internet advertisements
2017-02-22 17:55:20 +00:00
# (c) 2017 Pi-hole, LLC (https://pi-hole.net)
# Network-wide ad blocking via your own hardware.
#
# Usage: "pihole -g"
# Compiles a list of ad-serving domains by downloading them from multiple sources
2015-12-06 13:55:50 +00:00
#
2017-02-22 17:55:20 +00:00
# This file is copyright under the latest version of the EUPL.
# Please see LICENSE file for your rights under this license.
export LC_ALL=C
PI_HOLE_SCRIPT_DIR="/opt/pihole"
# Source utils.sh for GetFTLConfigValue
utilsfile="${PI_HOLE_SCRIPT_DIR}/utils.sh"
# shellcheck disable=SC1090
. "${utilsfile}"
coltable="${PI_HOLE_SCRIPT_DIR}/COL_TABLE"
# shellcheck disable=SC1090
. "${coltable}"
# shellcheck disable=SC1091
. "/etc/.pihole/advanced/Scripts/database_migration/gravity-db.sh"
2016-04-11 10:29:14 +00:00
basename="pihole"
PIHOLE_COMMAND="/usr/local/bin/${basename}"
piholeDir="/etc/${basename}"
# Legacy (pre v5.0) list file locations
whitelistFile="${piholeDir}/whitelist.txt"
blacklistFile="${piholeDir}/blacklist.txt"
regexFile="${piholeDir}/regex.list"
adListFile="${piholeDir}/adlists.list"
piholeGitDir="/etc/.pihole"
GRAVITYDB=$(getFTLConfigValue files.gravity)
GRAVITY_TMPDIR=$(getFTLConfigValue files.gravity_tmp)
gravityDBschema="${piholeGitDir}/advanced/Templates/gravity.db.sql"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
gravityDBcopy="${piholeGitDir}/advanced/Templates/gravity_copy.sql"
domainsExtension="domains"
curl_connect_timeout=10
# Check gravity temp directory
if [ ! -d "${GRAVITY_TMPDIR}" ] || [ ! -w "${GRAVITY_TMPDIR}" ]; then
echo -e " ${COL_LIGHT_RED}Gravity temporary directory does not exist or is not a writeable directory, falling back to /tmp. ${COL_NC}"
GRAVITY_TMPDIR="/tmp"
fi
# Set this only after sourcing pihole-FTL.conf as the gravity database path may
# have changed
gravityDBfile="${GRAVITYDB}"
gravityDBfile_default="/etc/pihole/gravity.db"
gravityTEMPfile="${GRAVITYDB}_temp"
gravityDIR="$(dirname -- "${gravityDBfile}")"
gravityOLDfile="${gravityDIR}/gravity_old.db"
# Generate new SQLite3 file from schema template
generate_gravity_database() {
if ! pihole-FTL sqlite3 -ni "${gravityDBfile}" <"${gravityDBschema}"; then
echo -e " ${CROSS} Unable to create ${gravityDBfile}"
return 1
fi
chown pihole:pihole "${gravityDBfile}"
chmod g+w "${piholeDir}" "${gravityDBfile}"
}
# Build gravity tree
gravity_build_tree() {
local str
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
str="Building tree"
echo -ne " ${INFO} ${str}..."
# The index is intentionally not UNIQUE as poor quality adlists may contain domains more than once
output=$({ pihole-FTL sqlite3 -ni "${gravityTEMPfile}" "CREATE INDEX idx_gravity ON gravity (domain, adlist_id);"; } 2>&1)
status="$?"
if [[ "${status}" -ne 0 ]]; then
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
echo -e "\\n ${CROSS} Unable to build gravity tree in ${gravityTEMPfile}\\n ${output}"
return 1
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
echo -e "${OVER} ${TICK} ${str}"
}
# Copy data from old to new database file and swap them
gravity_swap_databases() {
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
str="Swapping databases"
echo -ne " ${INFO} ${str}..."
# Swap databases and remove or conditionally rename old database
# Number of available blocks on disk
availableBlocks=$(stat -f --format "%a" "${gravityDIR}")
# Number of blocks, used by gravity.db
gravityBlocks=$(stat --format "%b" "${gravityDBfile}")
# Only keep the old database if available disk space is at least twice the size of the existing gravity.db.
# Better be safe than sorry...
oldAvail=false
if [ "${availableBlocks}" -gt "$((gravityBlocks * 2))" ] && [ -f "${gravityDBfile}" ]; then
oldAvail=true
mv "${gravityDBfile}" "${gravityOLDfile}"
else
rm "${gravityDBfile}"
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
mv "${gravityTEMPfile}" "${gravityDBfile}"
echo -e "${OVER} ${TICK} ${str}"
if $oldAvail; then
echo -e " ${TICK} The old database remains available"
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
}
# Update timestamp when the gravity table was last updated successfully
update_gravity_timestamp() {
output=$({ printf ".timeout 30000\\nINSERT OR REPLACE INTO info (property,value) values ('updated',cast(strftime('%%s', 'now') as int));" | pihole-FTL sqlite3 -ni "${gravityTEMPfile}"; } 2>&1)
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to update gravity timestamp in database ${gravityTEMPfile}\\n ${output}"
return 1
fi
return 0
}
# Import domains from file and store them in the specified database table
database_table_from_file() {
# Define locals
local table src backup_path backup_file tmpFile list_type
table="${1}"
src="${2}"
backup_path="${piholeDir}/migration_backup"
backup_file="${backup_path}/$(basename "${2}")"
# Create a temporary file. We don't use '--suffix' here because not all
# implementations of mktemp support it, e.g. on Alpine
tmpFile="$(mktemp -p "${GRAVITY_TMPDIR}")"
mv "${tmpFile}" "${tmpFile%.*}.gravity"
tmpFile="${tmpFile%.*}.gravity"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
local timestamp
timestamp="$(date --utc +'%s')"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
local rowid
declare -i rowid
rowid=1
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Special handling for domains to be imported into the common domainlist table
if [[ "${table}" == "whitelist" ]]; then
list_type="0"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
table="domainlist"
elif [[ "${table}" == "blacklist" ]]; then
list_type="1"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
table="domainlist"
elif [[ "${table}" == "regex" ]]; then
list_type="3"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
table="domainlist"
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Get MAX(id) from domainlist when INSERTing into this table
if [[ "${table}" == "domainlist" ]]; then
rowid="$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "SELECT MAX(id) FROM domainlist;")"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
if [[ -z "$rowid" ]]; then
rowid=0
fi
rowid+=1
fi
# Loop over all domains in ${src} file
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Read file line by line
grep -v '^ *#' <"${src}" | while IFS= read -r domain; do
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Only add non-empty lines
if [[ -n "${domain}" ]]; then
if [[ "${table}" == "domain_audit" ]]; then
# domain_audit table format (no enable or modified fields)
echo "${rowid},\"${domain}\",${timestamp}" >>"${tmpFile}"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
elif [[ "${table}" == "adlist" ]]; then
# Adlist table format
echo "${rowid},\"${domain}\",1,${timestamp},${timestamp},\"Migrated from ${src}\",,0,0,0,0,0" >>"${tmpFile}"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
else
# White-, black-, and regexlist table format
echo "${rowid},${list_type},\"${domain}\",1,${timestamp},${timestamp},\"Migrated from ${src}\"" >>"${tmpFile}"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
fi
rowid+=1
fi
done
# Store domains in database table specified by ${table}
# Use printf as .mode and .import need to be on separate lines
# see https://unix.stackexchange.com/a/445615/83260
output=$({ printf ".timeout 30000\\n.mode csv\\n.import \"%s\" %s\\n" "${tmpFile}" "${table}" | pihole-FTL sqlite3 -ni "${gravityDBfile}"; } 2>&1)
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to fill table ${table}${list_type} in database ${gravityDBfile}\\n ${output}"
gravity_Cleanup "error"
fi
# Move source file to backup directory, create directory if not existing
mkdir -p "${backup_path}"
mv "${src}" "${backup_file}" 2>/dev/null ||
echo -e " ${CROSS} Unable to backup ${src} to ${backup_path}"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Delete tmpFile
rm "${tmpFile}" >/dev/null 2>&1 ||
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
echo -e " ${CROSS} Unable to remove ${tmpFile}"
}
# Check if a column with name ${2} exists in gravity table with name ${1}
gravity_column_exists() {
output=$({ printf ".timeout 30000\\nSELECT EXISTS(SELECT * FROM pragma_table_info('%s') WHERE name='%s');\\n" "${1}" "${2}" | pihole-FTL sqlite3 -ni "${gravityTEMPfile}"; } 2>&1)
if [[ "${output}" == "1" ]]; then
return 0 # Bash 0 is success
fi
return 1 # Bash non-0 is failure
}
# Update number of domain on this list. We store this in the "old" database as all values in the new database will later be overwritten
database_adlist_number() {
# Only try to set number of domains when this field exists in the gravity database
if ! gravity_column_exists "adlist" "number"; then
return
fi
output=$({ printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${2}" "${3}" "${1}" | pihole-FTL sqlite3 -ni "${gravityTEMPfile}"; } 2>&1)
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to update number of domains in adlist with ID ${1} in database ${gravityTEMPfile}\\n ${output}"
gravity_Cleanup "error"
fi
}
# Update status of this list. We store this in the "old" database as all values in the new database will later be overwritten
database_adlist_status() {
# Only try to set the status when this field exists in the gravity database
if ! gravity_column_exists "adlist" "status"; then
return
fi
output=$({ printf ".timeout 30000\\nUPDATE adlist SET status = %i WHERE id = %i;\\n" "${2}" "${1}" | pihole-FTL sqlite3 -ni "${gravityTEMPfile}"; } 2>&1)
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to update status of adlist with ID ${1} in database ${gravityTEMPfile}\\n ${output}"
gravity_Cleanup "error"
fi
}
# Migrate pre-v5.0 list files to database-based Pi-hole versions
migrate_to_database() {
# Create database file only if not present
if [ ! -e "${gravityDBfile}" ]; then
# Create new database file - note that this will be created in version 1
echo -e " ${INFO} Creating new gravity database"
if ! generate_gravity_database; then
echo -e " ${CROSS} Error creating new gravity database. Please contact support."
return 1
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Check if gravity database needs to be updated
upgrade_gravityDB "${gravityDBfile}" "${piholeDir}"
# Migrate list files to new database
if [ -e "${adListFile}" ]; then
# Store adlist domains in database
echo -e " ${INFO} Migrating content of ${adListFile} into new database"
database_table_from_file "adlist" "${adListFile}"
fi
if [ -e "${blacklistFile}" ]; then
# Store blacklisted domains in database
echo -e " ${INFO} Migrating content of ${blacklistFile} into new database"
database_table_from_file "blacklist" "${blacklistFile}"
fi
if [ -e "${whitelistFile}" ]; then
# Store whitelisted domains in database
echo -e " ${INFO} Migrating content of ${whitelistFile} into new database"
database_table_from_file "whitelist" "${whitelistFile}"
fi
if [ -e "${regexFile}" ]; then
# Store regex domains in database
# Important note: We need to add the domains to the "regex" table
# as it will only later be renamed to "regex_blacklist"!
echo -e " ${INFO} Migrating content of ${regexFile} into new database"
database_table_from_file "regex" "${regexFile}"
fi
fi
# Check if gravity database needs to be updated
upgrade_gravityDB "${gravityDBfile}" "${piholeDir}"
}
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Determine if DNS resolution is available before proceeding
gravity_CheckDNSResolutionAvailable() {
local lookupDomain="raw.githubusercontent.com"
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Determine if $lookupDomain is resolvable
if timeout 4 getent hosts "${lookupDomain}" &>/dev/null; then
echo -e "${OVER} ${TICK} DNS resolution is available\\n"
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
return 0
else
echo -e " ${CROSS} DNS resolution is currently unavailable"
fi
str="Waiting until DNS resolution is available..."
echo -ne " ${INFO} ${str}"
until getent hosts github.com &> /dev/null; do
# Append one dot for each second waiting
str="${str}."
echo -ne " ${OVER} ${INFO} ${str}"
sleep 1
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
done
# If we reach this point, DNS resolution is available
echo -e "${OVER} ${TICK} DNS resolution is available"
}
# Retrieve blocklist URLs and parse domains from adlist.list
gravity_DownloadBlocklists() {
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
echo -e " ${INFO} ${COL_BOLD}Neutrino emissions detected${COL_NC}..."
if [[ "${gravityDBfile}" != "${gravityDBfile_default}" ]]; then
echo -e " ${INFO} Storing gravity database in ${COL_BOLD}${gravityDBfile}${COL_NC}"
fi
# Retrieve source URLs from gravity database
# We source only enabled adlists, SQLite3 stores boolean values as 0 (false) or 1 (true)
mapfile -t sources <<<"$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "SELECT address FROM vw_adlist;" 2>/dev/null)"
mapfile -t sourceIDs <<<"$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "SELECT id FROM vw_adlist;" 2>/dev/null)"
mapfile -t sourceTypes <<<"$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "SELECT type FROM vw_adlist;" 2>/dev/null)"
# Parse source domains from $sources
mapfile -t sourceDomains <<<"$(
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Logic: Split by folder/port
awk -F '[/:]' '{
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Remove URL protocol & optional username:password@
gsub(/(.*:\/\/|.*:.*@)/, "", $0)
if(length($1)>0){print $1}
else {print "local"}
}' <<<"$(printf '%s\n' "${sources[@]}")" 2>/dev/null
)"
local str="Pulling blocklist source list into range"
echo -e "${OVER} ${TICK} ${str}"
if [[ -z "${sources[*]}" ]] || [[ -z "${sourceDomains[*]}" ]]; then
echo -e " ${INFO} No source list found, or it is empty"
echo ""
unset sources
fi
local url domain str target compression adlist_type
echo ""
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Prepare new gravity database
str="Preparing new gravity database"
echo -ne " ${INFO} ${str}..."
rm "${gravityTEMPfile}" >/dev/null 2>&1
output=$({ pihole-FTL sqlite3 -ni "${gravityTEMPfile}" <"${gravityDBschema}"; } 2>&1)
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to create new database ${gravityTEMPfile}\\n ${output}"
gravity_Cleanup "error"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
else
echo -e "${OVER} ${TICK} ${str}"
fi
str="Creating new gravity databases"
echo -ne " ${INFO} ${str}..."
# Gravity copying SQL script
copyGravity="$(cat "${gravityDBcopy}")"
if [[ "${gravityDBfile}" != "${gravityDBfile_default}" ]]; then
# Replace default gravity script location by custom location
copyGravity="${copyGravity//"${gravityDBfile_default}"/"${gravityDBfile}"}"
fi
output=$({ pihole-FTL sqlite3 -ni "${gravityTEMPfile}" <<<"${copyGravity}"; } 2>&1)
status="$?"
if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to copy data from ${gravityDBfile} to ${gravityTEMPfile}\\n ${output}"
return 1
fi
echo -e "${OVER} ${TICK} ${str}"
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
2021-01-19 18:33:38 +00:00
# Use compression to reduce the amount of data that is transferred
# between the Pi-hole and the ad list provider. Use this feature
# only if it is supported by the locally available version of curl
if curl -V | grep -q "Features:.* libz"; then
compression="--compressed"
echo -e " ${INFO} Using libz compression\n"
else
compression=""
echo -e " ${INFO} Libz compression not available\n"
fi
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Loop through $sources and download each one
for ((i = 0; i < "${#sources[@]}"; i++)); do
url="${sources[$i]}"
domain="${sourceDomains[$i]}"
id="${sourceIDs[$i]}"
if [[ "${sourceTypes[$i]}" -eq "0" ]]; then
# Gravity list
str="blocklist"
adlist_type="gravity"
else
# AntiGravity list
str="allowlist"
adlist_type="antigravity"
fi
# Save the file as list.#.domain
saveLocation="${piholeDir}/list.${id}.${domain}.${domainsExtension}"
activeDomains[$i]="${saveLocation}"
echo -e " ${INFO} Target: ${url}"
local regex check_url
# Check for characters NOT allowed in URLs
regex="[^a-zA-Z0-9:/?&%=~._()-;]"
# this will remove first @ that is after schema and before domain
# \1 is optional schema, \2 is userinfo
check_url="$(sed -re 's#([^:/]*://)?([^/]+)@#\1\2#' <<<"$url")"
if [[ "${check_url}" =~ ${regex} ]]; then
echo -e " ${CROSS} Invalid Target"
else
gravity_DownloadBlocklistFromUrl "${url}" "${sourceIDs[$i]}" "${saveLocation}" "${target}" "${compression}" "${adlist_type}" "${domain}"
fi
echo ""
done
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
gravity_Blackbody=true
}
compareLists() {
local adlistID="${1}" target="${2}"
# Verify checksum when an older checksum exists
if [[ -s "${target}.sha1" ]]; then
if ! sha1sum --check --status --strict "${target}.sha1"; then
# The list changed upstream, we need to update the checksum
sha1sum "${target}" >"${target}.sha1"
echo " ${INFO} List has been updated"
database_adlist_status "${adlistID}" "1"
else
echo " ${INFO} List stayed unchanged"
database_adlist_status "${adlistID}" "2"
fi
else
# No checksum available, create one for comparing on the next run
sha1sum "${target}" >"${target}.sha1"
# We assume here it was changed upstream
database_adlist_status "${adlistID}" "1"
fi
}
# Download specified URL and perform checks on HTTP status and file content
gravity_DownloadBlocklistFromUrl() {
local url="${1}" adlistID="${2}" saveLocation="${3}" target="${4}" compression="${5}" gravity_type="${6}" domain="${7}"
local heisenbergCompensator="" listCurlBuffer str httpCode success="" ip cmd_ext
local file_path permissions ip_addr port blocked=false download=true
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Create temp file to store content on disk instead of RAM
# We don't use '--suffix' here because not all implementations of mktemp support it, e.g. on Alpine
listCurlBuffer="$(mktemp -p "${GRAVITY_TMPDIR}")"
mv "${listCurlBuffer}" "${listCurlBuffer%.*}.phgpb"
listCurlBuffer="${listCurlBuffer%.*}.phgpb"
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Determine if $saveLocation has read permission
if [[ -r "${saveLocation}" && $url != "file"* ]]; then
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Have curl determine if a remote file has been modified since last retrieval
# Uses "Last-Modified" header, which certain web servers do not provide (e.g: raw github urls)
# Note: Don't do this for local files, always download them
heisenbergCompensator="-z ${saveLocation}"
fi
str="Status:"
echo -ne " ${INFO} ${str} Pending..."
blocked=false
case $(getFTLConfigValue dns.blocking.mode) in
"IP-NODATA-AAAA" | "IP")
# Get IP address of this domain
ip="$(dig "${domain}" +short)"
# Check if this IP matches any IP of the system
if [[ -n "${ip}" && $(grep -Ec "inet(|6) ${ip}" <<<"$(ip a)") -gt 0 ]]; then
blocked=true
fi
;;
"NXDOMAIN")
if [[ $(dig "${domain}" | grep "NXDOMAIN" -c) -ge 1 ]]; then
blocked=true
fi
;;
"NODATA")
if [[ $(dig "${domain}" | grep "NOERROR" -c) -ge 1 ]] && [[ -z $(dig +short "${domain}") ]]; then
blocked=true
fi
;;
"NULL" | *)
if [[ $(dig "${domain}" +short | grep "0.0.0.0" -c) -ge 1 ]]; then
blocked=true
fi
;;
esac
# Check if this domain is blocked by Pi-hole but only if the domain is not a
# local file or empty
if [[ $url != "file"* ]] && [[ -n "${domain}" ]]; then
case $(getFTLConfigValue dns.blocking.mode) in
2024-05-08 20:25:26 +00:00
"IP-NODATA-AAAA" | "IP")
# Get IP address of this domain
ip="$(dig "${domain}" +short)"
# Check if this IP matches any IP of the system
2024-05-08 20:25:26 +00:00
if [[ -n "${ip}" && $(grep -Ec "inet(|6) ${ip}" <<<"$(ip a)") -gt 0 ]]; then
blocked=true
2024-05-08 20:25:26 +00:00
fi
;;
"NXDOMAIN")
if [[ $(dig "${domain}" | grep "NXDOMAIN" -c) -ge 1 ]]; then
blocked=true
2024-05-08 20:25:26 +00:00
fi
;;
"NODATA")
if [[ $(dig "${domain}" | grep "NOERROR" -c) -ge 1 ]] && [[ -z $(dig +short "${domain}") ]]; then
2024-05-08 20:25:26 +00:00
blocked=true
fi
;;
"NULL" | *)
if [[ $(dig "${domain}" +short | grep "0.0.0.0" -c) -ge 1 ]]; then
blocked=true
2024-05-08 20:25:26 +00:00
fi
;;
esac
if [[ "${blocked}" == true ]]; then
2024-05-08 20:25:26 +00:00
# Get first defined upstream server
local upstream
upstream="$(getFTLConfigValue dns.upstreams)"
# Isolate first upstream server from a string like
# [ 1.2.3.4#1234, 5.6.7.8#5678, ... ]
upstream="${upstream%%,*}"
upstream="${upstream##*[}"
upstream="${upstream%%]*}"
# Trim leading and trailing spaces and tabs
upstream="${upstream#"${upstream%%[![:space:]]*}"}"
upstream="${upstream%"${upstream##*[![:space:]]}"}"
# Get IP address and port of this upstream server
local ip_addr port
printf -v ip_addr "%s" "${upstream%#*}"
if [[ ${upstream} != *"#"* ]]; then
port=53
else
printf -v port "%s" "${upstream#*#}"
fi
ip=$(dig "@${ip_addr}" -p "${port}" +short "${domain}" | tail -1)
if [[ $(echo "${url}" | awk -F '://' '{print $1}') = "https" ]]; then
port=443
else
port=80
fi
echo -e "${OVER} ${CROSS} ${str} ${domain} is blocked by one of your lists. Using DNS server ${upstream} instead"
echo -ne " ${INFO} ${str} Pending..."
cmd_ext="--resolve $domain:$port:$ip"
fi
fi
# If we are going to "download" a local file, we first check if the target
# file has a+r permission. We explicitly check for all+read because we want
# to make sure that the file is readable by everyone and not just the user
# running the script.
if [[ $url == "file://"* ]]; then
# Get the file path
file_path=$(echo "$url" | cut -d'/' -f3-)
# Check if the file exists and is a regular file (i.e. not a socket, fifo, tty, block). Might still be a symlink.
if [[ ! -f $file_path ]]; then
# Output that the file does not exist
echo -e "${OVER} ${CROSS} ${file_path} does not exist"
download=false
else
# Check if the file or a file referenced by the symlink has a+r permissions
permissions=$(stat -L -c "%a" "$file_path")
if [[ $permissions == *4 || $permissions == *5 || $permissions == *6 || $permissions == *7 ]]; then
# Output that we are using the local file
echo -e "${OVER} ${INFO} Using local file ${file_path}"
else
# Output that the file does not have the correct permissions
echo -e "${OVER} ${CROSS} Cannot read file (file needs to have a+r permission)"
download=false
fi
fi
fi
# Check for allowed protocols
if [[ $url != "http"* && $url != "https"* && $url != "file"* && $url != "ftp"* && $url != "ftps"* && $url != "sftp"* ]]; then
echo -e "${OVER} ${CROSS} ${str} Invalid protocol specified, ignoring list"
download=false
fi
if [[ "${download}" == true ]]; then
# shellcheck disable=SC2086
2024-05-08 20:25:26 +00:00
httpCode=$(curl --connect-timeout ${curl_connect_timeout} -s -L ${compression} ${cmd_ext} ${heisenbergCompensator} -w "%{http_code}" "${url}" -o "${listCurlBuffer}" 2>/dev/null)
fi
case $url in
# Did we "download" a local file?
"file"*)
if [[ -s "${listCurlBuffer}" ]]; then
echo -e "${OVER} ${TICK} ${str} Retrieval successful"
success=true
else
echo -e "${OVER} ${CROSS} ${str} Retrieval failed / empty list"
fi
;;
# Did we "download" a remote file?
*)
# Determine "Status:" output based on HTTP response
case "${httpCode}" in
"200")
echo -e "${OVER} ${TICK} ${str} Retrieval successful"
success=true
;;
"304")
echo -e "${OVER} ${TICK} ${str} No changes detected"
success=true
;;
"000") echo -e "${OVER} ${CROSS} ${str} Connection Refused" ;;
"403") echo -e "${OVER} ${CROSS} ${str} Forbidden" ;;
"404") echo -e "${OVER} ${CROSS} ${str} Not found" ;;
"408") echo -e "${OVER} ${CROSS} ${str} Time-out" ;;
"451") echo -e "${OVER} ${CROSS} ${str} Unavailable For Legal Reasons" ;;
"500") echo -e "${OVER} ${CROSS} ${str} Internal Server Error" ;;
"504") echo -e "${OVER} ${CROSS} ${str} Connection Timed Out (Gateway)" ;;
"521") echo -e "${OVER} ${CROSS} ${str} Web Server Is Down (Cloudflare)" ;;
"522") echo -e "${OVER} ${CROSS} ${str} Connection Timed Out (Cloudflare)" ;;
*) echo -e "${OVER} ${CROSS} ${str} ${url} (${httpCode})" ;;
esac
;;
esac
local done="false"
# Determine if the blocklist was downloaded and saved correctly
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
if [[ "${success}" == true ]]; then
if [[ "${httpCode}" == "304" ]]; then
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Add domains to database table file
pihole-FTL "${gravity_type}" parseList "${saveLocation}" "${gravityTEMPfile}" "${adlistID}"
database_adlist_status "${adlistID}" "2"
done="true"
# Check if $listCurlBuffer is a non-zero length file
elif [[ -s "${listCurlBuffer}" ]]; then
# Determine if blocklist is non-standard and parse as appropriate
gravity_ParseFileIntoDomains "${listCurlBuffer}" "${saveLocation}"
# Remove curl buffer file after its use
rm "${listCurlBuffer}"
# Add domains to database table file
pihole-FTL "${gravity_type}" parseList "${saveLocation}" "${gravityTEMPfile}" "${adlistID}"
# Compare lists, are they identical?
compareLists "${adlistID}" "${saveLocation}"
done="true"
else
# Fall back to previously cached list if $listCurlBuffer is empty
echo -e " ${INFO} Received empty file"
fi
fi
# Do we need to fall back to a cached list (if available)?
if [[ "${done}" != "true" ]]; then
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Determine if cached list has read permission
if [[ -r "${saveLocation}" ]]; then
echo -e " ${CROSS} List download failed: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}"
# Add domains to database table file
pihole-FTL "${gravity_type}" parseList "${saveLocation}" "${gravityTEMPfile}" "${adlistID}"
database_adlist_status "${adlistID}" "3"
else
echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}"
# Manually reset these two numbers because we do not call parseList here
database_adlist_number "${adlistID}" 0 0
database_adlist_status "${adlistID}" "4"
fi
fi
}
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Parse source files into domains format
gravity_ParseFileIntoDomains() {
local src="${1}" destination="${2}"
# Remove comments and print only the domain name
# Most of the lists downloaded are already in hosts file format but the spacing/formatting is not contiguous
# This helps with that and makes it easier to read
# It also helps with debugging so each stage of the script can be researched more in depth
# 1) Convert all characters to lowercase
tr '[:upper:]' '[:lower:]' <"${src}" >"${destination}"
# 2) Remove carriage returns
# 3) Remove lines starting with ! (ABP Comments)
# 4) Remove lines starting with [ (ABP Header)
# 5) Remove lines containing ABP extended CSS selectors ("##", "#$#", "#@#", "#?#") and Adguard JavaScript (#%#) preceded by a letter
# 6) Remove comments (text starting with "#", include possible spaces before the hash sign)
# 7) Remove leading tabs, spaces, etc. (Also removes leading IP addresses)
# 8) Remove empty lines
sed -i -r \
-e 's/\r$//' \
-e 's/\s*!.*//g' \
-e 's/\s*\[.*//g' \
-e '/[a-z]\#[$?@%]{0,3}\#/d' \
-e 's/\s*#.*//g' \
-e 's/^.*\s+//g' \
-e '/^$/d' "${destination}"
chmod 644 "${destination}"
}
# Report number of entries in a table
gravity_Table_Count() {
local table="${1}"
local str="${2}"
local num
num="$(pihole-FTL sqlite3 -ni "${gravityTEMPfile}" "SELECT COUNT(*) FROM ${table};")"
if [[ "${table}" == "gravity" ]]; then
local unique
unique="$(pihole-FTL sqlite3 -ni "${gravityTEMPfile}" "SELECT COUNT(*) FROM (SELECT DISTINCT domain FROM ${table});")"
echo -e " ${INFO} Number of ${str}: ${num} (${COL_BOLD}${unique} unique domains${COL_NC})"
pihole-FTL sqlite3 -ni "${gravityTEMPfile}" "INSERT OR REPLACE INTO info (property,value) VALUES ('gravity_count',${unique});"
else
echo -e " ${INFO} Number of ${str}: ${num}"
fi
}
# Output count of blacklisted domains and regex filters
gravity_ShowCount() {
# Here we use the table "gravity" instead of the view "vw_gravity" for speed.
# It's safe to replace it here, because right after a gravity run both will show the exactly same number of domains.
gravity_Table_Count "gravity" "gravity domains" ""
gravity_Table_Count "vw_blacklist" "exact denied domains"
gravity_Table_Count "vw_regex_blacklist" "regex denied filters"
gravity_Table_Count "vw_whitelist" "exact allowed domains"
gravity_Table_Count "vw_regex_whitelist" "regex allowed filters"
}
2015-11-26 23:48:52 +00:00
# Trap Ctrl-C
gravity_Trap() {
trap '{ echo -e "\\n\\n ${INFO} ${COL_LIGHT_RED}User-abort detected${COL_NC}"; gravity_Cleanup "error"; }' INT
}
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Clean up after Gravity upon exit or cancellation
gravity_Cleanup() {
local error="${1:-}"
str="Cleaning up stray matter"
echo -ne " ${INFO} ${str}..."
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Delete tmp content generated by Gravity
rm ${piholeDir}/pihole.*.txt 2>/dev/null
rm ${piholeDir}/*.tmp 2>/dev/null
# listCurlBuffer location
rm "${GRAVITY_TMPDIR}"/*.phgpb 2>/dev/null
# invalid_domains location
rm "${GRAVITY_TMPDIR}"/*.ph-non-domains 2>/dev/null
# Ensure this function only runs when gravity_SetDownloadOptions() has completed
if [[ "${gravity_Blackbody:-}" == true ]]; then
# Remove any unused .domains files
for file in "${piholeDir}"/*."${domainsExtension}"; do
# If list is not in active array, then remove it
if [[ ! "${activeDomains[*]}" == *"${file}"* ]]; then
rm -f "${file}" 2>/dev/null ||
echo -e " ${CROSS} Failed to remove ${file##*/}"
fi
done
fi
echo -e "${OVER} ${TICK} ${str}"
# Print Pi-hole status if an error occurred
if [[ -n "${error}" ]]; then
"${PIHOLE_COMMAND}" status
exit 1
fi
2015-11-06 02:11:34 +00:00
}
database_recovery() {
local result
local str="Checking integrity of existing gravity database (this can take a while)"
local option="${1}"
echo -ne " ${INFO} ${str}..."
result="$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "PRAGMA integrity_check" 2>&1)"
if [[ ${result} = "ok" ]]; then
echo -e "${OVER} ${TICK} ${str} - no errors found"
str="Checking foreign keys of existing gravity database (this can take a while)"
echo -ne " ${INFO} ${str}..."
unset result
result="$(pihole-FTL sqlite3 -ni "${gravityDBfile}" "PRAGMA foreign_key_check" 2>&1)"
if [[ -z ${result} ]]; then
echo -e "${OVER} ${TICK} ${str} - no errors found"
if [[ "${option}" != "force" ]]; then
return
fi
else
echo -e "${OVER} ${CROSS} ${str} - errors found:"
while IFS= read -r line; do echo " - $line"; done <<<"$result"
fi
else
echo -e "${OVER} ${CROSS} ${str} - errors found:"
while IFS= read -r line; do echo " - $line"; done <<<"$result"
fi
str="Trying to recover existing gravity database"
echo -ne " ${INFO} ${str}..."
# We have to remove any possibly existing recovery database or this will fail
rm -f "${gravityDBfile}.recovered" >/dev/null 2>&1
if result="$(pihole-FTL sqlite3 -ni "${gravityDBfile}" ".recover" | pihole-FTL sqlite3 -ni "${gravityDBfile}.recovered" 2>&1)"; then
echo -e "${OVER} ${TICK} ${str} - success"
mv "${gravityDBfile}" "${gravityDBfile}.old"
mv "${gravityDBfile}.recovered" "${gravityDBfile}"
echo -ne " ${INFO} ${gravityDBfile} has been recovered"
echo -ne " ${INFO} The old ${gravityDBfile} has been moved to ${gravityDBfile}.old"
else
echo -e "${OVER} ${CROSS} ${str} - the following errors happened:"
while IFS= read -r line; do echo " - $line"; done <<<"$result"
echo -e " ${CROSS} Recovery failed. Try \"pihole -r recreate\" instead."
exit 1
fi
echo ""
}
helpFunc() {
echo "Usage: pihole -g
Update domains from blocklists specified in adlists.list
Options:
-f, --force Force the download of all specified blocklists
-h, --help Show this help dialog"
exit 0
}
repairSelector() {
case "$1" in
"recover") recover_database=true ;;
"recreate") recreate_database=true ;;
*)
echo "Usage: pihole -g -r {recover,recreate}
Attempt to repair gravity database
Available options:
pihole -g -r recover Try to recover a damaged gravity database file.
Pi-hole tries to restore as much as possible
from a corrupted gravity database.
pihole -g -r recover force Pi-hole will run the recovery process even when
no damage is detected. This option is meant to be
a last resort. Recovery is a fragile task
consuming a lot of resources and shouldn't be
performed unnecessarily.
pihole -g -r recreate Create a new gravity database file from scratch.
This will remove your existing gravity database
and create a new file from scratch. If you still
have the migration backup created when migrating
to Pi-hole v5.0, Pi-hole will import these files."
exit 0
;;
esac
}
for var in "$@"; do
case "${var}" in
2024-05-08 20:25:26 +00:00
"-f" | "--force") forceDelete=true ;;
"-r" | "--repair") repairSelector "$3" ;;
"-u" | "--upgrade")
upgrade_gravityDB "${gravityDBfile}" "${piholeDir}"
exit 0
;;
"-h" | "--help") helpFunc ;;
esac
2016-08-17 18:08:55 +00:00
done
# Remove OLD (backup) gravity file, if it exists
if [[ -f "${gravityOLDfile}" ]]; then
rm "${gravityOLDfile}"
fi
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
# Trap Ctrl-C
gravity_Trap
if [[ "${recreate_database:-}" == true ]]; then
str="Recreating gravity database from migration backup"
echo -ne "${INFO} ${str}..."
rm "${gravityDBfile}"
pushd "${piholeDir}" >/dev/null || exit
cp migration_backup/* .
popd >/dev/null || exit
echo -e "${OVER} ${TICK} ${str}"
fi
if [[ "${recover_database:-}" == true ]]; then
database_recovery "$4"
fi
# Move possibly existing legacy files to the gravity database
if ! migrate_to_database; then
echo -e " ${CROSS} Unable to migrate to database. Please contact support."
exit 1
fi
Optimised parsing of domains on IPv6 servers * Remove WHITELIST_COMMAND * Place IPv4/IPv6 availability test underneath setupVars.conf source * Improved clarity on comments * Define default lookupDomain on local line * Use `getent hosts` instead of nslookup (faster) * Make gravity_DNSLookup() function more readable * Use bold on "Neutrino emissions detected" * Swap conditionals around on adlists file handling * Add comments to both gravity_Collapse() `awk`s * Removed unnecessary "${str}" from gravity_Pull() * Merge function variables into local line * Place .phgbp suffice on mktemp, so patternbuffers can be cleaned up all at once in gravity_Cleanup() * Removed success="false" from $httpCode case, placed empty success var in local * Reordered $httpCode case numerically because I can * Provide error if Dnsmasq format list is being parsed * Remove IPv4 check when determining URL list (too slow on large lists) * Check ${#sources[@]} to ensure we're checking the number of entries and not the character count * Define empty plural in local line, removing unnecessary plural=; * Optimised readability of gravity_Whitelist() * Removed uninformative "Nothing to blacklist"/"No wildcards used" text * Optimised parsing of domains into hosts format on IPv6 enabled servers * Ensure /etc/hostname is non-zero * Use `: >` instead of `rm` as consistent with the rest of the script * Ensured that gravity_Cleanup() removes ${localList}.tmp * Optimised readability of gravity_ParseUserDomains() * Moved dnsRestart to ${var} case statement, renaming it to dnsRestartType for readability * Set default $listType to ensure script passes "bash strict mode"
2017-09-15 12:39:17 +00:00
if [[ "${forceDelete:-}" == true ]]; then
str="Deleting existing list cache"
echo -ne "${INFO} ${str}..."
rm /etc/pihole/list.* 2>/dev/null || true
echo -e "${OVER} ${TICK} ${str}"
2016-08-17 18:08:55 +00:00
fi
# Gravity downloads blocklists next
if ! gravity_CheckDNSResolutionAvailable; then
echo -e " ${CROSS} Can not complete gravity update, no DNS is available. Please contact support."
exit 1
fi
if ! gravity_DownloadBlocklists; then
echo -e " ${CROSS} Unable to create gravity database. Please try again later. If the problem persists, please contact support."
exit 1
fi
# Update gravity timestamp
update_gravity_timestamp
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
# Ensure proper permissions are set for the database
chown pihole:pihole "${gravityTEMPfile}"
chmod g+w "${piholeDir}" "${gravityTEMPfile}"
# Build the tree
gravity_build_tree
# Compute numbers to be displayed (do this after building the tree to get the
# numbers quickly from the tree instead of having to scan the whole database)
gravity_ShowCount
# Migrate rest of the data from old to new database
# IMPORTANT: Swapping the databases must be the last step before the cleanup
if ! gravity_swap_databases; then
echo -e " ${CROSS} Unable to create database. Please contact support."
exit 1
fi
Improve gravity performance (#3100) * Gravity performance improvements. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not move downloaded lists into migration_backup directory. Signed-off-by: DL6ER <dl6er@dl6er.de> * Do not (strictly) sort domains. Random-leaf access is faster than always-last-leaf access (on average). Signed-off-by: DL6ER <dl6er@dl6er.de> * Append instead of overwrite gravity_new collection list. Signed-off-by: DL6ER <dl6er@dl6er.de> * Rename table gravity_new to gravity_temp to clarify that this is only an intermediate table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Add timers for each of the calls to compute intense parts. They are to be removed before this finally hits the release/v5.0 branch. Signed-off-by: DL6ER <dl6er@dl6er.de> * Fix legacy list files import. It currently doesn't work when the gravity database has already been updated to using the single domainlist table. Signed-off-by: DL6ER <dl6er@dl6er.de> * Simplify database_table_from_file(), remove all to this function for gravity lost downloads. Signed-off-by: DL6ER <dl6er@dl6er.de> * Update gravity.db.sql to version 10 to have newle created databases already reflect the most recent state. Signed-off-by: DL6ER <dl6er@dl6er.de> * Create second gravity database and swap them on success. This has a number of advantages such as instantaneous gravity updates (as seen from FTL) and always available gravity blocking. Furthermore, this saves disk space as the old database is removed on completion. * Add timing output for the database swapping SQLite3 call. Signed-off-by: DL6ER <dl6er@dl6er.de> * Explicitly generate index as a separate process. Signed-off-by: DL6ER <dl6er@dl6er.de> * Remove time measurements. Signed-off-by: DL6ER <dl6er@dl6er.de>
2020-01-24 17:39:13 +00:00
gravity_Cleanup
echo ""
echo " ${TICK} Done."
# "${PIHOLE_COMMAND}" status