Merge pull request #2236 from pi-hole/new/regex-lists

Use regex lists instead of wildcards for blocking
This commit is contained in:
Dan Schaper 2018-07-14 06:10:44 -07:00 committed by GitHub
commit dbc82cfb6a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 106 additions and 68 deletions

View file

@ -13,10 +13,11 @@ basename=pihole
piholeDir=/etc/"${basename}"
whitelist="${piholeDir}"/whitelist.txt
blacklist="${piholeDir}"/blacklist.txt
readonly wildcardlist="/etc/dnsmasq.d/03-pihole-wildcard.conf"
readonly regexlist="/etc/pihole/regex.list"
reload=false
addmode=true
verbose=true
wildcard=false
domList=()
@ -31,9 +32,9 @@ helpFunc() {
if [[ "${listMain}" == "${whitelist}" ]]; then
param="w"
type="white"
elif [[ "${listMain}" == "${wildcardlist}" ]]; then
elif [[ "${listMain}" == "${regexlist}" ]]; then
param="wild"
type="wildcard black"
type="regex black"
else
param="b"
type="black"
@ -57,7 +58,8 @@ Options:
EscapeRegexp() {
# This way we may safely insert an arbitrary
# string in our regular expressions
# Also remove leading "." if present
# This sed is intentionally executed in three steps to ease maintainability
# The first sed removes any amount of leading dots
echo $* | sed 's/^\.*//' | sed "s/[]\.|$(){}?+*^]/\\\\&/g" | sed "s/\\//\\\\\//g"
}
@ -65,10 +67,14 @@ HandleOther() {
# Convert to lowercase
domain="${1,,}"
# Check validity of domain
# Check validity of domain (don't check for regex entries)
if [[ "${#domain}" -le 253 ]]; then
validDomain=$(grep -P "^((-|_)*[a-z\d]((-|_)*[a-z\d])*(-|_)*)(\.(-|_)*([a-z\d]((-|_)*[a-z\d])*))*$" <<< "${domain}") # Valid chars check
validDomain=$(grep -P "^[^\.]{1,63}(\.[^\.]{1,63})*$" <<< "${validDomain}") # Length of each label
if [[ "${listMain}" == "${regexlist}" && "${wildcard}" == false ]]; then
validDomain="${domain}"
else
validDomain=$(grep -P "^((-|_)*[a-z\\d]((-|_)*[a-z\\d])*(-|_)*)(\\.(-|_)*([a-z\\d]((-|_)*[a-z\\d])*))*$" <<< "${domain}") # Valid chars check
validDomain=$(grep -P "^[^\\.]{1,63}(\\.[^\\.]{1,63})*$" <<< "${validDomain}") # Length of each label
fi
fi
if [[ -n "${validDomain}" ]]; then
@ -94,9 +100,6 @@ PoplistFile() {
if ${addmode}; then
AddDomain "${dom}" "${listMain}"
RemoveDomain "${dom}" "${listAlt}"
if [[ "${listMain}" == "${whitelist}" || "${listMain}" == "${blacklist}" ]]; then
RemoveDomain "${dom}" "${wildcardlist}"
fi
else
RemoveDomain "${dom}" "${listMain}"
fi
@ -109,7 +112,6 @@ AddDomain() {
[[ "${list}" == "${whitelist}" ]] && listname="whitelist"
[[ "${list}" == "${blacklist}" ]] && listname="blacklist"
[[ "${list}" == "${wildcardlist}" ]] && listname="wildcard blacklist"
if [[ "${list}" == "${whitelist}" || "${list}" == "${blacklist}" ]]; then
[[ "${list}" == "${whitelist}" && -z "${type}" ]] && type="--whitelist-only"
@ -121,7 +123,7 @@ AddDomain() {
if [[ "${bool}" == false ]]; then
# Domain not found in the whitelist file, add it!
if [[ "${verbose}" == true ]]; then
echo -e " ${INFO} Adding $1 to $listname..."
echo -e " ${INFO} Adding ${1} to ${listname}..."
fi
reload=true
# Add it to the list we want to add it to
@ -131,28 +133,26 @@ AddDomain() {
echo -e " ${INFO} ${1} already exists in ${listname}, no need to add!"
fi
fi
elif [[ "${list}" == "${wildcardlist}" ]]; then
source "${piholeDir}/setupVars.conf"
# Remove the /* from the end of the IP addresses
IPV4_ADDRESS=${IPV4_ADDRESS%/*}
IPV6_ADDRESS=${IPV6_ADDRESS%/*}
elif [[ "${list}" == "${regexlist}" ]]; then
[[ -z "${type}" ]] && type="--wildcard-only"
bool=true
domain="${1}"
[[ "${wildcard}" == true ]] && domain="((^)|(\\.))${domain//\./\\.}$"
# Is the domain in the list?
grep -e "address=\/${domain}\/" "${wildcardlist}" > /dev/null 2>&1 || bool=false
# Search only for exactly matching lines
grep -Fx "${domain}" "${regexlist}" > /dev/null 2>&1 || bool=false
if [[ "${bool}" == false ]]; then
if [[ "${verbose}" == true ]]; then
echo -e " ${INFO} Adding $1 to wildcard blacklist..."
echo -e " ${INFO} Adding ${domain} to regex list..."
fi
reload="restart"
echo "address=/$1/${IPV4_ADDRESS}" >> "${wildcardlist}"
if [[ "${#IPV6_ADDRESS}" > 0 ]]; then
echo "address=/$1/${IPV6_ADDRESS}" >> "${wildcardlist}"
fi
echo "$domain" >> "${regexlist}"
else
if [[ "${verbose}" == true ]]; then
echo -e " ${INFO} ${1} already exists in wildcard blacklist, no need to add!"
echo -e " ${INFO} ${domain} already exists in regex list, no need to add!"
fi
fi
fi
@ -164,7 +164,6 @@ RemoveDomain() {
[[ "${list}" == "${whitelist}" ]] && listname="whitelist"
[[ "${list}" == "${blacklist}" ]] && listname="blacklist"
[[ "${list}" == "${wildcardlist}" ]] && listname="wildcard blacklist"
if [[ "${list}" == "${whitelist}" || "${list}" == "${blacklist}" ]]; then
bool=true
@ -174,7 +173,7 @@ RemoveDomain() {
grep -Ex -q "${domain}" "${list}" > /dev/null 2>&1 || bool=false
if [[ "${bool}" == true ]]; then
# Remove it from the other one
echo -e " ${INFO} Removing $1 from $listname..."
echo -e " ${INFO} Removing $1 from ${listname}..."
# /I flag: search case-insensitive
sed -i "/${domain}/Id" "${list}"
reload=true
@ -183,20 +182,25 @@ RemoveDomain() {
echo -e " ${INFO} ${1} does not exist in ${listname}, no need to remove!"
fi
fi
elif [[ "${list}" == "${wildcardlist}" ]]; then
elif [[ "${list}" == "${regexlist}" ]]; then
[[ -z "${type}" ]] && type="--wildcard-only"
domain="${1}"
[[ "${wildcard}" == true ]] && domain="((^)|(\\.))${domain//\./\\.}$"
bool=true
# Is it in the list?
grep -e "address=\/${domain}\/" "${wildcardlist}" > /dev/null 2>&1 || bool=false
grep -Fx "${domain}" "${regexlist}" > /dev/null 2>&1 || bool=false
if [[ "${bool}" == true ]]; then
# Remove it from the other one
echo -e " ${INFO} Removing $1 from $listname..."
# /I flag: search case-insensitive
sed -i "/address=\/${domain}/Id" "${list}"
echo -e " ${INFO} Removing $domain from regex list..."
local lineNumber
lineNumber=$(grep -Fnx "$domain" "${list}" | cut -f1 -d:)
sed -i "${lineNumber}d" "${list}"
reload=true
else
if [[ "${verbose}" == true ]]; then
echo -e " ${INFO} ${1} does not exist in ${listname}, no need to remove!"
echo -e " ${INFO} ${domain} does not exist in regex list, no need to remove!"
fi
fi
fi
@ -218,7 +222,7 @@ Displaylist() {
verbose=false
echo -e "Displaying $string:\n"
count=1
while IFS= read -r RD; do
while IFS= read -r RD || [ -n "${RD}" ]; do
echo " ${count}: ${RD}"
count=$((count+1))
done < "${listMain}"
@ -241,7 +245,8 @@ for var in "$@"; do
case "${var}" in
"-w" | "whitelist" ) listMain="${whitelist}"; listAlt="${blacklist}";;
"-b" | "blacklist" ) listMain="${blacklist}"; listAlt="${whitelist}";;
"-wild" | "wildcard" ) listMain="${wildcardlist}";;
"--wild" | "wildcard" ) listMain="${regexlist}"; wildcard=true;;
"--regex" | "regex" ) listMain="${regexlist}";;
"-nr"| "--noreload" ) reload=false;;
"-d" | "--delmode" ) addmode=false;;
"-q" | "--quiet" ) verbose=false;;

View file

@ -0,0 +1,28 @@
#!/bin/bash
# Pi-hole: A black hole for Internet advertisements
# (c) 2017 Pi-hole, LLC (https://pi-hole.net)
# Network-wide ad blocking via your own hardware.
#
# Provides an automated migration subroutine to convert Pi-hole v3.x wildcard domains to Pi-hole v4.x regex filters
#
# This file is copyright under the latest version of the EUPL.
# Please see LICENSE file for your rights under this license.
# regexFile set in gravity.sh
wildcardFile="/etc/dnsmasq.d/03-pihole-wildcard.conf"
convert_wildcard_to_regex() {
if [ ! -f "${wildcardFile}" ]; then
return
fi
local addrlines domains uniquedomains
# Obtain wildcard domains from old file
addrlines="$(grep -oE "/.*/" ${wildcardFile})"
# Strip "/" from domain names and convert "." to regex-compatible "\."
domains="$(sed 's/\///g;s/\./\\./g' <<< "${addrlines}")"
# Remove repeated domains (may have been inserted two times due to A and AAAA blocking)
uniquedomains="$(uniq <<< "${domains}")"
# Automatically generate regex filters and remove old wildcards file
awk '{print "((^)|(\\.))"$0"$"}' <<< "${uniquedomains}" >> "${regexFile:?}" && rm "${wildcardFile}"
}

View file

@ -15,6 +15,8 @@ export LC_ALL=C
coltable="/opt/pihole/COL_TABLE"
source "${coltable}"
regexconverter="/opt/pihole/wildcard_regex_converter.sh"
source "${regexconverter}"
basename="pihole"
PIHOLE_COMMAND="/usr/local/bin/${basename}"
@ -26,7 +28,7 @@ adListDefault="${piholeDir}/adlists.default"
whitelistFile="${piholeDir}/whitelist.txt"
blacklistFile="${piholeDir}/blacklist.txt"
wildcardFile="/etc/dnsmasq.d/03-pihole-wildcard.conf"
regexFile="${piholeDir}/regex.list"
adList="${piholeDir}/gravity.list"
blackList="${piholeDir}/black.list"
@ -452,7 +454,7 @@ gravity_Whitelist() {
echo -e "${OVER} ${INFO} ${str}"
}
# Output count of blacklisted domains and wildcards
# Output count of blacklisted domains and regex filters
gravity_ShowBlockCount() {
local num
@ -461,13 +463,9 @@ gravity_ShowBlockCount() {
echo -e " ${INFO} Number of blacklisted domains: ${num}"
fi
if [[ -f "${wildcardFile}" ]]; then
num=$(grep -c "^" "${wildcardFile}")
# If IPv4 and IPv6 is used, divide total wildcard count by 2
if [[ -n "${IPV4_ADDRESS}" ]] && [[ -n "${IPV6_ADDRESS}" ]];then
num=$(( num/2 ))
fi
echo -e " ${INFO} Number of wildcard blocked domains: ${num}"
if [[ -f "${regexFile}" ]]; then
num=$(grep -c "^(?!#)" "${regexFile}")
echo -e " ${INFO} Number of regex filters: ${num}"
fi
}
@ -645,6 +643,12 @@ if [[ "${skipDownload}" == false ]] || [[ "${listType}" == "whitelist" ]]; then
gravity_Whitelist
fi
# Set proper permissions on the regex file
touch "${regexFile}"
chown pihole:www-data "${regexFile}"
chmod 664 "${regexFile}"
convert_wildcard_to_regex
gravity_ShowBlockCount
# Perform when downloading blocklists, or modifying the white/blacklist (not wildcards)

View file

@ -5,7 +5,7 @@ Pi-hole : A black-hole for internet advertisements
.br
.SH "SYNOPSIS"
\fBpihole\fR (\fB-w\fR|\fB-b\fR|\fB-wild\fR) [options] domain(s)
\fBpihole\fR (\fB-w\fR|\fB-b\fR|\fB--wild\fR|\fB--regex\fR) [options] domain(s)
.br
\fBpihole -a\fR \fB-p\fR password
.br
@ -66,9 +66,14 @@ Available commands and options:
Adds or removes specified domain or domains to the blacklist
.br
\fB-wild, wildcard\fR [options] [<domain1> <domain2 ...>]
\fB--wild, wildcard\fR [options] [<domain1> <domain2 ...>]
.br
Add or removes specified domain, and all subdomains to the blacklist
Add or removes specified domain to the wildcard blacklist
.br
\fB--regex, regex\fR [options] [<regex1> <regex2 ...>]
.br
Add or removes specified regex filter to the regex blacklist
.br
(Whitelist/Blacklist manipulation options):
@ -167,9 +172,9 @@ Available commands and options:
Show a help dialog
.br
\fB-l, logging\fR [on|off|off noflush]
\fB-l, logging\fR [on|off|off noflush]
.br
Specify whether the Pi-hole log should be used
Specify whether the Pi-hole log should be used
.br
(Logging options):
@ -193,7 +198,7 @@ Available commands and options:
.br
Show installed versions of Pi-hole, Web Interface &amp; FTL
.br
.br
(repo options):
.br
@ -232,7 +237,7 @@ Available commands and options:
Disable Pi-hole subsystems, optionally for a set duration
.br
(time options):
(time options):
.br
#s Disable Pi-hole functionality for # second(s)
.br
@ -273,11 +278,15 @@ Some usage examples
Whitelist/blacklist manipulation
.br
\fBpihole -w iloveads.example.com\fR Add "iloveads.example.com" to whitelist
\fBpihole -w iloveads.example.com\fR Add "iloveads.example.com" to whitelist
.br
\fBpihole -b -d noads.example.com\fR Remove "noads.example.com" from blacklist
\fBpihole -b -d noads.example.com\fR Remove "noads.example.com" from blacklist
.br
\fBpihole -wild example.com\fR Add "example.com" as wildcard - would block ads.example.com, www.example.com etc.
\fBpihole --wild example.com\fR Add example.com as a wildcard - would
block all subdomains of example.com, including example.com itself.
.br
\fBpihole --regex "ad.*\.example\.com$"\fR Add "ad.*\.example\.com$" to the regex
blacklist - would block all subdomains of example.com which start with "ad"
.br
Changing the Web Interface password

22
pihole
View file

@ -33,17 +33,7 @@ webpageFunc() {
exit 0
}
whitelistFunc() {
"${PI_HOLE_SCRIPT_DIR}"/list.sh "$@"
exit 0
}
blacklistFunc() {
"${PI_HOLE_SCRIPT_DIR}"/list.sh "$@"
exit 0
}
wildcardFunc() {
listFunc() {
"${PI_HOLE_SCRIPT_DIR}"/list.sh "$@"
exit 0
}
@ -386,7 +376,8 @@ Add '-h' after specific commands for more information on usage
Whitelist/Blacklist Options:
-w, whitelist Whitelist domain(s)
-b, blacklist Blacklist domain(s)
-wild, wildcard Blacklist domain(s), and all its subdomains
--wild, wildcard Wildcard blacklist domain(s)
--regex, regex Regex blacklist domains(s)
Add '-h' for more info on whitelist/blacklist usage
Debugging Options:
@ -428,9 +419,10 @@ fi
# Handle redirecting to specific functions based on arguments
case "${1}" in
"-w" | "whitelist" ) whitelistFunc "$@";;
"-b" | "blacklist" ) blacklistFunc "$@";;
"-wild" | "wildcard" ) wildcardFunc "$@";;
"-w" | "whitelist" ) listFunc "$@";;
"-b" | "blacklist" ) listFunc "$@";;
"--wild" | "wildcard" ) listFunc "$@";;
"--regex" | "regex" ) listFunc "$@";;
"-d" | "debug" ) debugFunc "$@";;
"-f" | "flush" ) flushFunc "$@";;
"-up" | "updatePihole" ) updatePiholeFunc "$@";;