Store domains without sorting and unifying them first. This allows us to preserve the relationship of the individual domains to the lists they came from.

Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
DL6ER 2019-09-04 23:14:29 +02:00
parent ab90ff565a
commit a8af2e1837
No known key found for this signature in database
GPG key ID: FB60471F0575164A
2 changed files with 86 additions and 113 deletions

View file

@ -39,4 +39,9 @@ upgrade_gravityDB(){
sqlite3 "${database}" < "/etc/.pihole/advanced/Scripts/database_migration/gravity/2_to_3.sql" sqlite3 "${database}" < "/etc/.pihole/advanced/Scripts/database_migration/gravity/2_to_3.sql"
version=3 version=3
fi fi
if [[ "$version" == "3" ]]; then
# This migration script upgrades ...
sqlite3 "${database}" < "/etc/.pihole/advanced/Scripts/database_migration/gravity/3_to_4.sql"
version=3
fi
} }

View file

@ -97,25 +97,39 @@ update_gravity_timestamp() {
if [[ "${status}" -ne 0 ]]; then if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to update gravity timestamp in database ${gravityDBfile}\\n ${output}" echo -e "\\n ${CROSS} Unable to update gravity timestamp in database ${gravityDBfile}\\n ${output}"
return 1
fi fi
return 0
} }
# Import domains from file and store them in the specified database table database_truncate_table() {
database_table_from_file() { local table
# Define locals
local table source backup_path backup_file
table="${1}" table="${1}"
source="${2}"
backup_path="${piholeDir}/migration_backup"
backup_file="${backup_path}/$(basename "${2}")"
# Truncate table
output=$( { sqlite3 "${gravityDBfile}" <<< "DELETE FROM ${table};"; } 2>&1 ) output=$( { sqlite3 "${gravityDBfile}" <<< "DELETE FROM ${table};"; } 2>&1 )
status="$?" status="$?"
if [[ "${status}" -ne 0 ]]; then if [[ "${status}" -ne 0 ]]; then
echo -e "\\n ${CROSS} Unable to truncate ${table} database ${gravityDBfile}\\n ${output}" echo -e "\\n ${CROSS} Unable to truncate ${table} database ${gravityDBfile}\\n ${output}"
gravity_Cleanup "error" gravity_Cleanup "error"
return 1
fi
return 0
}
# Import domains from file and store them in the specified database table
database_table_from_file() {
# Define locals
local table source backup_path backup_file arg
table="${1}"
source="${2}"
arg="${3}"
backup_path="${piholeDir}/migration_backup"
backup_file="${backup_path}/$(basename "${2}")"
# Truncate table only if not gravity (we add multiple times to this table)
if [[ "${table}" != "gravity" ]]; then
database_truncate_table "${table}"
fi fi
local tmpFile local tmpFile
@ -123,31 +137,30 @@ database_table_from_file() {
local timestamp local timestamp
timestamp="$(date --utc +'%s')" timestamp="$(date --utc +'%s')"
local inputfile local inputfile
if [[ "${table}" == "gravity" ]]; then # Apply format for white-, blacklist, regex, and adlist tables
# No need to modify the input data for the gravity table # Read file line by line
inputfile="${source}" local rowid
else declare -i rowid
# Apply format for white-, blacklist, regex, and adlist tables rowid=1
# Read file line by line grep -v '^ *#' < "${source}" | while IFS= read -r domain
local rowid do
declare -i rowid # Only add non-empty lines
rowid=1 if [[ -n "${domain}" ]]; then
grep -v '^ *#' < "${source}" | while IFS= read -r domain if [[ "${table}" == "domain_audit" ]]; then
do # domain_audit table format (no enable or modified fields)
# Only add non-empty lines echo "${rowid},\"${domain}\",${timestamp}" >> "${tmpFile}"
if [[ -n "${domain}" ]]; then elif [[ "${table}" == "gravity" ]]; then
if [[ "${table}" == "domain_audit" ]]; then # gravity table format
# domain_audit table format (no enable or modified fields) echo "\"${domain}\",${arg}" >> "${tmpFile}"
echo "${rowid},\"${domain}\",${timestamp}" >> "${tmpFile}" else
else # White-, black-, and regexlist format
# White-, black-, and regexlist format echo "${rowid},\"${domain}\",1,${timestamp},${timestamp},\"Migrated from ${source}\"" >> "${tmpFile}"
echo "${rowid},\"${domain}\",1,${timestamp},${timestamp},\"Migrated from ${source}\"" >> "${tmpFile}"
fi
rowid+=1
fi fi
done rowid+=1
inputfile="${tmpFile}" fi
fi done
inputfile="${tmpFile}"
# Store domains in database table specified by ${table} # Store domains in database table specified by ${table}
# Use printf as .mode and .import need to be on separate lines # Use printf as .mode and .import need to be on separate lines
# see https://unix.stackexchange.com/a/445615/83260 # see https://unix.stackexchange.com/a/445615/83260
@ -263,12 +276,13 @@ gravity_CheckDNSResolutionAvailable() {
} }
# Retrieve blocklist URLs and parse domains from adlist.list # Retrieve blocklist URLs and parse domains from adlist.list
gravity_GetBlocklistUrls() { gravity_DownloadBlocklists() {
echo -e " ${INFO} ${COL_BOLD}Neutrino emissions detected${COL_NC}..." echo -e " ${INFO} ${COL_BOLD}Neutrino emissions detected${COL_NC}..."
# Retrieve source URLs from gravity database # Retrieve source URLs from gravity database
# We source only enabled adlists, sqlite3 stores boolean values as 0 (false) or 1 (true) # We source only enabled adlists, sqlite3 stores boolean values as 0 (false) or 1 (true)
mapfile -t sources <<< "$(sqlite3 "${gravityDBfile}" "SELECT address FROM vw_adlist;" 2> /dev/null)" mapfile -t sources <<< "$(sqlite3 "${gravityDBfile}" "SELECT address FROM vw_adlist;" 2> /dev/null)"
mapfile -t sourceIDs <<< "$(sqlite3 "${gravityDBfile}" "SELECT id FROM vw_adlist;" 2> /dev/null)"
# Parse source domains from $sources # Parse source domains from $sources
mapfile -t sourceDomains <<< "$( mapfile -t sourceDomains <<< "$(
@ -285,21 +299,23 @@ gravity_GetBlocklistUrls() {
if [[ -n "${sources[*]}" ]] && [[ -n "${sourceDomains[*]}" ]]; then if [[ -n "${sources[*]}" ]] && [[ -n "${sourceDomains[*]}" ]]; then
echo -e "${OVER} ${TICK} ${str}" echo -e "${OVER} ${TICK} ${str}"
return 0
else else
echo -e "${OVER} ${CROSS} ${str}" echo -e "${OVER} ${CROSS} ${str}"
echo -e " ${INFO} No source list found, or it is empty" echo -e " ${INFO} No source list found, or it is empty"
echo "" echo ""
return 1 return 1
fi fi
}
# Define options for when retrieving blocklists
gravity_SetDownloadOptions() {
local url domain agent cmd_ext str local url domain agent cmd_ext str
echo "" echo ""
# Flush gravity table once before looping over sources
str="Flushing gravity table"
echo -ne " ${INFO} ${str}..."
if database_truncate_table "gravity"; then
echo -e "${OVER} ${TICK} ${str}"
fi
# Loop through $sources and download each one # Loop through $sources and download each one
for ((i = 0; i < "${#sources[@]}"; i++)); do for ((i = 0; i < "${#sources[@]}"; i++)); do
url="${sources[$i]}" url="${sources[$i]}"
@ -319,7 +335,7 @@ gravity_SetDownloadOptions() {
esac esac
echo -e " ${INFO} Target: ${domain} (${url##*/})" echo -e " ${INFO} Target: ${domain} (${url##*/})"
gravity_DownloadBlocklistFromUrl "${url}" "${cmd_ext}" "${agent}" gravity_DownloadBlocklistFromUrl "${url}" "${cmd_ext}" "${agent}" "${sourceIDs[$i]}"
echo "" echo ""
done done
gravity_Blackbody=true gravity_Blackbody=true
@ -327,7 +343,7 @@ gravity_SetDownloadOptions() {
# Download specified URL and perform checks on HTTP status and file content # Download specified URL and perform checks on HTTP status and file content
gravity_DownloadBlocklistFromUrl() { gravity_DownloadBlocklistFromUrl() {
local url="${1}" cmd_ext="${2}" agent="${3}" heisenbergCompensator="" patternBuffer str httpCode success="" local url="${1}" cmd_ext="${2}" agent="${3}" adlistID="${4}" heisenbergCompensator="" patternBuffer str httpCode success=""
# Create temp file to store content on disk instead of RAM # Create temp file to store content on disk instead of RAM
patternBuffer=$(mktemp -p "/tmp" --suffix=".phgpb") patternBuffer=$(mktemp -p "/tmp" --suffix=".phgpb")
@ -408,11 +424,20 @@ gravity_DownloadBlocklistFromUrl() {
# Determine if the blocklist was downloaded and saved correctly # Determine if the blocklist was downloaded and saved correctly
if [[ "${success}" == true ]]; then if [[ "${success}" == true ]]; then
if [[ "${httpCode}" == "304" ]]; then if [[ "${httpCode}" == "304" ]]; then
: # Do not attempt to re-parse file # Add domains to database table
str="Adding to database table"
echo -ne " ${INFO} ${str}..."
database_table_from_file "gravity" "${saveLocation}" "${adlistID}"
echo -e "${OVER} ${TICK} ${str}"
# Check if $patternbuffer is a non-zero length file # Check if $patternbuffer is a non-zero length file
elif [[ -s "${patternBuffer}" ]]; then elif [[ -s "${patternBuffer}" ]]; then
# Determine if blocklist is non-standard and parse as appropriate # Determine if blocklist is non-standard and parse as appropriate
gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}" gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}"
# Add domains to database table
str="Adding to database table"
echo -ne " ${INFO} ${str}..."
database_table_from_file "gravity" "${saveLocation}" "${adlistID}"
echo -e "${OVER} ${TICK} ${str}"
else else
# Fall back to previously cached list if $patternBuffer is empty # Fall back to previously cached list if $patternBuffer is empty
echo -e " ${INFO} Received empty file: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" echo -e " ${INFO} Received empty file: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}"
@ -421,6 +446,11 @@ gravity_DownloadBlocklistFromUrl() {
# Determine if cached list has read permission # Determine if cached list has read permission
if [[ -r "${saveLocation}" ]]; then if [[ -r "${saveLocation}" ]]; then
echo -e " ${CROSS} List download failed: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" echo -e " ${CROSS} List download failed: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}"
# Add domains to database table
str="Adding to database table"
echo -ne " ${INFO} ${str}..."
database_table_from_file "gravity" "${saveLocation}" "${adlistID}"
echo -e "${OVER} ${TICK} ${str}"
else else
echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}" echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}"
fi fi
@ -432,7 +462,7 @@ gravity_ParseFileIntoDomains() {
local source="${1}" destination="${2}" firstLine abpFilter local source="${1}" destination="${2}" firstLine abpFilter
# Determine if we are parsing a consolidated list # Determine if we are parsing a consolidated list
if [[ "${source}" == "${piholeDir}/${matterAndLight}" ]]; then #if [[ "${source}" == "${piholeDir}/${matterAndLight}" ]]; then
# Remove comments and print only the domain name # Remove comments and print only the domain name
# Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
# This helps with that and makes it easier to read # This helps with that and makes it easier to read
@ -449,7 +479,7 @@ gravity_ParseFileIntoDomains() {
sed -r '/([^\.]+\.)+[^\.]{2,}/!d' > "${destination}" sed -r '/([^\.]+\.)+[^\.]{2,}/!d' > "${destination}"
chmod 644 "${destination}" chmod 644 "${destination}"
return 0 return 0
fi #fi
# Individual file parsing: Keep comments, while parsing domains from each line # Individual file parsing: Keep comments, while parsing domains from each line
# We keep comments to respect the list maintainer's licensing # We keep comments to respect the list maintainer's licensing
@ -536,80 +566,23 @@ gravity_ParseFileIntoDomains() {
fi fi
} }
# Create (unfiltered) "Matter and Light" consolidated list
gravity_ConsolidateDownloadedBlocklists() {
local str lastLine
str="Consolidating blocklists"
echo -ne " ${INFO} ${str}..."
# Empty $matterAndLight if it already exists, otherwise, create it
: > "${piholeDir}/${matterAndLight}"
chmod 644 "${piholeDir}/${matterAndLight}"
# Loop through each *.domains file
for i in "${activeDomains[@]}"; do
# Determine if file has read permissions, as download might have failed
if [[ -r "${i}" ]]; then
# Remove windows CRs from file, convert list to lower case, and append into $matterAndLight
tr -d '\r' < "${i}" | tr '[:upper:]' '[:lower:]' >> "${piholeDir}/${matterAndLight}"
# Ensure that the first line of a new list is on a new line
lastLine=$(tail -1 "${piholeDir}/${matterAndLight}")
if [[ "${#lastLine}" -gt 0 ]]; then
echo "" >> "${piholeDir}/${matterAndLight}"
fi
fi
done
echo -e "${OVER} ${TICK} ${str}"
}
# Parse consolidated list into (filtered, unique) domains-only format
gravity_SortAndFilterConsolidatedList() {
local str num
str="Extracting domains from blocklists"
echo -ne " ${INFO} ${str}..."
# Parse into file
gravity_ParseFileIntoDomains "${piholeDir}/${matterAndLight}" "${piholeDir}/${parsedMatter}"
# Format $parsedMatter line total as currency
num=$(printf "%'.0f" "$(wc -l < "${piholeDir}/${parsedMatter}")")
echo -e "${OVER} ${TICK} ${str}"
echo -e " ${INFO} Gravity pulled in ${COL_BLUE}${num}${COL_NC} domains"
str="Removing duplicate domains"
echo -ne " ${INFO} ${str}..."
sort -u "${piholeDir}/${parsedMatter}" > "${piholeDir}/${preEventHorizon}"
chmod 644 "${piholeDir}/${preEventHorizon}"
echo -e "${OVER} ${TICK} ${str}"
# Format $preEventHorizon line total as currency
num=$(printf "%'.0f" "$(wc -l < "${piholeDir}/${preEventHorizon}")")
str="Storing ${COL_BLUE}${num}${COL_NC} unique blocking domains in database"
echo -ne " ${INFO} ${str}..."
database_table_from_file "gravity" "${piholeDir}/${preEventHorizon}"
echo -e "${OVER} ${TICK} ${str}"
}
# Report number of entries in a table # Report number of entries in a table
gravity_Table_Count() { gravity_Table_Count() {
local table="${1}" local table="${1}"
local str="${2}" local str="${2}"
local extra="${3}"
local num local num
num="$(sqlite3 "${gravityDBfile}" "SELECT COUNT(*) FROM ${table} WHERE enabled = 1;")" num="$(sqlite3 "${gravityDBfile}" "SELECT COUNT(*) FROM ${table} ${extra};")"
echo -e " ${INFO} Number of ${str}: ${num}" echo -e " ${INFO} Number of ${str}: ${num}"
} }
# Output count of blacklisted domains and regex filters # Output count of blacklisted domains and regex filters
gravity_ShowCount() { gravity_ShowCount() {
gravity_Table_Count "blacklist" "exact blacklisted domains" gravity_Table_Count "gravity" "gravity domains" ""
gravity_Table_Count "regex_blacklist" "regex blacklist filters" gravity_Table_Count "blacklist" "exact blacklisted domains" "WHERE enabled = 1"
gravity_Table_Count "whitelist" "exact whitelisted domains" gravity_Table_Count "regex_blacklist" "regex blacklist filters" "WHERE enabled = 1"
gravity_Table_Count "regex_whitelist" "regex whitelist filters" gravity_Table_Count "whitelist" "exact whitelisted domains" "WHERE enabled = 1"
gravity_Table_Count "regex_whitelist" "regex whitelist filters" "WHERE enabled = 1"
} }
# Parse list of domains into hosts format # Parse list of domains into hosts format
@ -748,12 +721,7 @@ fi
# Gravity downloads blocklists next # Gravity downloads blocklists next
gravity_CheckDNSResolutionAvailable gravity_CheckDNSResolutionAvailable
if gravity_GetBlocklistUrls; then gravity_DownloadBlocklists
gravity_SetDownloadOptions
# Build preEventHorizon
gravity_ConsolidateDownloadedBlocklists
gravity_SortAndFilterConsolidatedList
fi
# Create local.list # Create local.list
gravity_generateLocalList gravity_generateLocalList