From 46c1009aa8cf3d02a207e59c34494b7e7f9fab4c Mon Sep 17 00:00:00 2001 From: pizzafritta Date: Mon, 23 Mar 2015 12:40:20 +0100 Subject: [PATCH] Update gravity-adv.sh What's new: - whitelist and blacklist bug fix - subdomains of items in whitelist are whitelisted too - subdomains of items in blacklist are blacklisted too - comments (lines starting with #) and blank lines are allowed in whitelist and in blacklist - definition of variables at the beginning of the code - smarter algorithm that reduces the number of entries in dnsmaq configuration file (i.e. if adsite.com is blocked there is no need to block www.adsite.com ad1.adsite.com whatever.adsite.com etc.) this reduce the number of entries in the output from ~ 140k down to ~ 92k. - domain list is sorted in reverse (right to left) order: TLD first, then domain name and subdomains at the end. --- gravity-adv.sh | 73 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/gravity-adv.sh b/gravity-adv.sh index 331af4a2..ba5cbff8 100755 --- a/gravity-adv.sh +++ b/gravity-adv.sh @@ -1,58 +1,79 @@ #!/bin/bash -# The Pi-hole now blocks over 120,000 ad domains +# The Pi-hole now blocks over 90,000 ad domains # Address to send ads to (the RPi) -piholeIP="127.0.0.1" +piholeIP="192.168.1.110" # Optionally, uncomment to automatically detect the address. Thanks Gregg #piholeIP=$(ifconfig eth0 | awk '/inet addr/{print substr($2,6)}') # Config file to hold URL rules -eventHorizion="/etc/dnsmasq.d/adList.conf" -whitelist=/etc/pihole/whitelist.txt -blacklist=/etc/pihole/blacklist.txt +piholeDir='/etc/pihole/' +eventHorizion='/etc/dnsmasq.d/adList.conf' + + +whitelist=$piholeDir'whitelist.txt' +blacklist=$piholeDir'blacklist.txt' # Create the pihole resource directory if it doesn't exist. Future files will be stored here -if [[ -d /etc/pihole/ ]];then +if [[ -d $piholeDir ]];then : else echo "Forming pihole directory..." - sudo mkdir /etc/pihole + sudo mkdir piholeDir +fi + +tmpDir='/tmp/' +tmpAdList=$tmpDir'matter.pihole.txt' +tmpConf=$tmpDir'andLight.pihole.txt' +tmpWhiteList=$tmpDir'yang.pihole.txt' +tmpBlackList=$tmpDir'yin.pihole.txt' + + +echo -n "" > $tmpWhiteList +if [[ -f $whitelist ]];then + grep -vE "^\s*(#|$)" $whitelist | sed "s|$|\$|" > $tmpWhiteList +fi + +echo -n "" > $tmpBlackList +if [[ -f $blacklist ]];then + grep -vE "^\s*(#|$)" $blacklist > $tmpBlackList fi echo "Getting yoyo ad list..." # Approximately 2452 domains at the time of writing -curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > /tmp/matter.txt +curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > $tmpAdList echo "Getting winhelp2002 ad list..." # 12985 domains -curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt +curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' >> $tmpAdList echo "Getting adaway ad list..." # 445 domains -curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt +curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList echo "Getting hosts-file ad list..." # 28050 domains -curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt +curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList echo "Getting malwaredomainlist ad list..." # 1352 domains -curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt +curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList echo "Getting adblock.gjtech ad list..." # 696 domains -curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt +curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList echo "Getting someone who cares ad list..." # 10600 -curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt +curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList echo "Getting Mother of All Ad Blocks list..." # 102168 domains!! Thanks Kacy -curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' | sort >> /tmp/matter.txt +curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' >> $tmpAdList # Sort the aggregated results and remove any duplicates # Remove entries from the whitelist file if it exists at the root of the current user's home folder -if [[ -f $whitelist ]];then - echo "Removing duplicates, whitelisting, and formatting the list of domains..." - cat /tmp/matter.txt $blacklist| sed $'s/\r$//' | awk -F "." '{for(i=NF; i > 1; i--) printf "%s.", $i; print $1}' | sort | awk -F "." '{for(i=NF; i > 1; i--) printf "%s.", $i; print $1}' | uniq | sed '/^$/d' | grep -vwf $whitelist | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt - numberOfSitesWhitelisted=$(cat $whitelist | wc -l | sed 's/^[ \t]*//') - echo "$numberOfSitesWhitelisted domains whitelisted." -else - echo "Removing duplicates and formatting the list of domains..." - cat /tmp/matter.txt | sed $'s/\r$//' | sort | uniq | sed '/^$/d' | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt -fi +echo "Removing duplicates, whitelisting, and formatting the list of domains..." +grep -vhE "^\s*(#|$)" $tmpAdList $tmpBlackList | + sed $'s/\r$//'| + awk -F. '{for (i=NF; i>1; --i) printf "%s.",$i;print $1}'| + sort -t'.' -k1,2| uniq | grep -vwf $tmpWhiteList | + awk -F. 'NR!=1&&substr($0,0,length(p))==p{next} {p=$0".";for (i=NF; i>1; --i) printf "%s.",$i;print $1}'| + awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > $tmpConf +numberOfSitesWhitelisted=$(cat $tmpWhiteList | wc -l | sed 's/^[ \t]*//') +numberOfSitesBlacklisted=$(cat $tmpBlackList | wc -l | sed 's/^[ \t]*//') +echo "$numberOfSitesWhitelisted domain(s) whitelisted, $numberOfSitesBlacklisted domain(s) blacklisted." # Count how many domains/whitelists were added so it can be displayed to the user -numberOfAdsBlocked=$(cat /tmp/andLight.txt | wc -l | sed 's/^[ \t]*//') +numberOfAdsBlocked=$(cat $tmpConf | wc -l | sed 's/^[ \t]*//') echo "$numberOfAdsBlocked ad domains blocked." # Turn the file into a dnsmasq config file -sudo mv /tmp/andLight.txt $eventHorizion +sudo mv $tmpConf $eventHorizion # Restart DNS sudo service dnsmasq restart