better, faster, stronger

- curl is not executed if last execution is newer than Last-modified in http request. (option -z)
- extraction of domain list from file 'hosts' standardized and shortened
- temporary files are not overwritten if not necessary.
- domains with no dots (i.e. localhost) removed automatically from list
- the code to reduce duplicates and create the output is now shorter
- "/etc/dnsmasq.d/adList.conf" is not overwritten and dnsmasq not restarted when is not necessary
This commit is contained in:
pizzafritta 2015-04-12 18:01:21 +02:00
parent 5f931553b9
commit 7dedfbb77a

View file

@ -6,12 +6,19 @@ piholeIP="127.0.0.1"
#piholeIP=$(hostname -I)
# Config file to hold URL rules
eventHorizion="/etc/dnsmasq.d/adList.conf"
eventHorizon="/etc/dnsmasq.d/adList.conf"
piholeDir='/etc/pihole/'
blacklist=$piholeDir'blacklist.txt'
whitelist=$piholeDir'whitelist.txt'
tmpDir='./tmp/'
tmpAdPrefix=$tmpDir'matter.pihole'
tmpAdList=$tmpAdPrefix'.txt'
tmpConf=$tmpDir'andLight.pihole.txt'
tmpWhiteList=$tmpDir'yang.pihole.txt'
tmpBlackList=$tmpDir'yin.pihole.txt'
# Create the pihole resource directory if it doesn't exist. Future files will be stored here
if [[ -d $piholeDir ]];then
:
@ -20,64 +27,90 @@ else
sudo mkdir $piholeDir
fi
tmpDir='/tmp/'
tmpAdList=$tmpDir'matter.pihole.txt'
tmpConf=$tmpDir'andLight.pihole.txt'
tmpWhiteList=$tmpDir'yang.pihole.txt'
tmpBlackList=$tmpDir'yin.pihole.txt'
#if pipe is not empty write it into file $1
writeifne () {
read pipe || return 1
{ printf "%s\n" "$pipe"; cat; } > "$1"
}
echo "Getting yoyo ad list..." # Approximately 2452 domains at the time of writing
curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > $tmpAdList
curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? -z $tmpAdPrefix."yoyo.txt" -o $tmpAdPrefix."yoyo.txt"
echo "Getting winhelp2002 ad list..." # 12985 domains
curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' >> $tmpAdList
curl -s http://winhelp2002.mvps.org/hosts.txt -z $tmpAdPrefix."winhelp2002.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."winhelp2002.txt"
echo "Getting adaway ad list..." # 445 domains
curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
curl -s https://adaway.org/hosts.txt -z $tmpAdPrefix."adaway.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | writeifne $tmpAdPrefix."adaway.txt"
echo "Getting hosts-file ad list..." # 28050 domains
curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
curl -s http://hosts-file.net/.%5Cad_servers.txt -z $tmpAdPrefix."hosts-file.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."hosts-file.txt"
echo "Getting malwaredomainlist ad list..." # 1352 domains
curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt -z $tmpAdPrefix."malwaredomainlist.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."malwaredomainlist.txt"
echo "Getting adblock.gjtech ad list..." # 696 domains
curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
curl -s http://adblock.gjtech.net/?format=unix-hosts -z $tmpAdPrefix."gjtech.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."gjtech.txt"
echo "Getting someone who cares ad list..." # 10600
curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
curl -s http://someonewhocares.org/hosts/hosts -z $tmpAdPrefix."someonewhocares.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."someonewhocares.txt"
echo "Getting Mother of All Ad Blocks list..." # 102168 domains!! Thanks Kacy
curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' >> $tmpAdList
curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ -z $tmpAdPrefix."mahakala.txt" | awk '/^[0-9]{1,3}(\.[0-9]{1,3}){3}/ {print $2}' | sed $'s/\r$//' | writeifne $tmpAdPrefix."mahakala.txt"
# Add entries from the local blacklist file if it exists in $piholeDir directory
echo -n "" > $tmpBlackList
if [[ -f $blacklist ]];then
# Merge temporary files. Remove lines with no dots (i.e. localhost, localdomain, etc)
echo -n "" > $tmpAdList
for i in $tmpAdPrefix.*.txt
do
grep '\.' $i >> $tmpAdList
done
# If newer, add entries from the local blacklist file if it exists in $piholeDir directory
# Remove empty lines and comments
if [[ -f $blacklist ]] && [[ $tmpBlackList -ot $blacklist ]]; then
echo "Getting the local blacklist from $piholeDir directory"
awk -F'[# \t]' 'NF>0&&$1!="" {print $1}' $blacklist > $tmpBlackList
cat $tmpBlackList >> $tmpAdList
else
if [[ -f $tmpBlackList ]]; then
echo "No need to update temporary blacklist."
else
echo -n "" > $tmpBlackList
fi
fi
cat $tmpBlackList >> $tmpAdList
# Clean-up entries from the local whitelist file if it exists in $piholeDir directory
echo -n "^$" > $tmpWhiteList
if [[ -f $whitelist ]];then
# If newer, clean-up entries from the local whitelist file if it exists in $piholeDir directory
# Remove empty lines and comments
if [[ -f $whitelist ]] && [[ $tmpWhiteList -ot $whitelist ]]; then
echo "Getting the local whitelist from $piholeDir directory"
awk -F'[# \t]' 'NF>0&&$1!="" {print $1"$"}' $whitelist > $tmpWhiteList
else
if [[ -f $tmpWhiteList ]]; then
echo "No need to update temporary whitelist."
else
echo -n "^$" > $tmpWhiteList
fi
fi
# Sort the aggregated results and remove any duplicates
# Remove entries from the whitelist file if it exists in $piholeDir folder
# Remove all subdomains if domain is already in list
echo "Removing duplicates, whitelisting, and formatting the list of domains..."
grep -vhE "^\s*(#|$)" $tmpAdList|
sed $'s/\r$//'|
awk -F. '{for (i=NF; i>1; --i) printf "%s.",$i;print $1}'|
awk -F. '{for (i=NF; i>1; --i) printf "%s.",$i;print $1}' $tmpAdList |
sort -t'.' -k1,2| uniq |
awk -F. 'NR!=1&&substr($0,1,length(p))==p {next} {p=$0".";for (i=NF; i>1; --i) printf "%s.",$i;print $1}'|
grep -vwf $tmpWhiteList |
awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > $tmpConf
# Count how many entries from blacklist/whitelist were added so it can be displayed to the user
numberOfSitesWhitelisted=$(cat $tmpWhiteList | wc -l | sed 's/^[ \t]*//')
numberOfSitesBlacklisted=$(cat $tmpBlackList | wc -l | sed 's/^[ \t]*//')
echo "$numberOfSitesWhitelisted domain(s) whitelisted, $numberOfSitesBlacklisted domain(s) blacklisted."
# Count how many domains/whitelists were added so it can be displayed to the user
# Count how many domains were added so it can be displayed to the user
numberOfAdsBlocked=$(cat $tmpConf | wc -l | sed 's/^[ \t]*//')
echo "$numberOfAdsBlocked ad domains blocked."
# Turn the file into a dnsmasq config file
sudo mv $tmpConf $eventHorizion
# Restart DNS
sudo service dnsmasq restart
# Turn the file into a dnsmasq config file if necessary
if cmp -s $tmpConf $eventHorizon
then
echo "dnsmasq config file: $eventHorizon doesn't need to be updated"
rm $tmpConf
else
echo "...updating configuration file and restarting dnsmasq"
sudo mv $tmpConf $eventHorizon
# Restart DNS
sudo service dnsmasq restart
fi