Update gravity-adv.sh

What's new:
- whitelist and blacklist bug fix
- subdomains of items in whitelist are whitelisted too
- subdomains of items in blacklist are blacklisted too
- comments (lines starting with #) and blank lines are allowed in whitelist and in blacklist
- definition of variables at the beginning of the code
- smarter algorithm that reduces the number of entries in dnsmaq configuration file (i.e. if adsite.com is blocked there is no need to block www.adsite.com ad1.adsite.com whatever.adsite.com etc.) this reduce the number of entries in the output from ~ 140k down to ~ 92k.
- domain list is sorted in reverse (right to left) order: TLD first, then domain name and subdomains at the end.
This commit is contained in:
pizzafritta 2015-03-23 12:40:20 +01:00
parent 594036d808
commit 46c1009aa8

View file

@ -1,58 +1,79 @@
#!/bin/bash #!/bin/bash
# The Pi-hole now blocks over 120,000 ad domains # The Pi-hole now blocks over 90,000 ad domains
# Address to send ads to (the RPi) # Address to send ads to (the RPi)
piholeIP="127.0.0.1" piholeIP="192.168.1.110"
# Optionally, uncomment to automatically detect the address. Thanks Gregg # Optionally, uncomment to automatically detect the address. Thanks Gregg
#piholeIP=$(ifconfig eth0 | awk '/inet addr/{print substr($2,6)}') #piholeIP=$(ifconfig eth0 | awk '/inet addr/{print substr($2,6)}')
# Config file to hold URL rules # Config file to hold URL rules
eventHorizion="/etc/dnsmasq.d/adList.conf" piholeDir='/etc/pihole/'
whitelist=/etc/pihole/whitelist.txt eventHorizion='/etc/dnsmasq.d/adList.conf'
blacklist=/etc/pihole/blacklist.txt
whitelist=$piholeDir'whitelist.txt'
blacklist=$piholeDir'blacklist.txt'
# Create the pihole resource directory if it doesn't exist. Future files will be stored here # Create the pihole resource directory if it doesn't exist. Future files will be stored here
if [[ -d /etc/pihole/ ]];then if [[ -d $piholeDir ]];then
: :
else else
echo "Forming pihole directory..." echo "Forming pihole directory..."
sudo mkdir /etc/pihole sudo mkdir piholeDir
fi
tmpDir='/tmp/'
tmpAdList=$tmpDir'matter.pihole.txt'
tmpConf=$tmpDir'andLight.pihole.txt'
tmpWhiteList=$tmpDir'yang.pihole.txt'
tmpBlackList=$tmpDir'yin.pihole.txt'
echo -n "" > $tmpWhiteList
if [[ -f $whitelist ]];then
grep -vE "^\s*(#|$)" $whitelist | sed "s|$|\$|" > $tmpWhiteList
fi
echo -n "" > $tmpBlackList
if [[ -f $blacklist ]];then
grep -vE "^\s*(#|$)" $blacklist > $tmpBlackList
fi fi
echo "Getting yoyo ad list..." # Approximately 2452 domains at the time of writing echo "Getting yoyo ad list..." # Approximately 2452 domains at the time of writing
curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > /tmp/matter.txt curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > $tmpAdList
echo "Getting winhelp2002 ad list..." # 12985 domains echo "Getting winhelp2002 ad list..." # 12985 domains
curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' >> $tmpAdList
echo "Getting adaway ad list..." # 445 domains echo "Getting adaway ad list..." # 445 domains
curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
echo "Getting hosts-file ad list..." # 28050 domains echo "Getting hosts-file ad list..." # 28050 domains
curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
echo "Getting malwaredomainlist ad list..." # 1352 domains echo "Getting malwaredomainlist ad list..." # 1352 domains
curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
echo "Getting adblock.gjtech ad list..." # 696 domains echo "Getting adblock.gjtech ad list..." # 696 domains
curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
echo "Getting someone who cares ad list..." # 10600 echo "Getting someone who cares ad list..." # 10600
curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' >> $tmpAdList
echo "Getting Mother of All Ad Blocks list..." # 102168 domains!! Thanks Kacy echo "Getting Mother of All Ad Blocks list..." # 102168 domains!! Thanks Kacy
curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' | sort >> /tmp/matter.txt curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' >> $tmpAdList
# Sort the aggregated results and remove any duplicates # Sort the aggregated results and remove any duplicates
# Remove entries from the whitelist file if it exists at the root of the current user's home folder # Remove entries from the whitelist file if it exists at the root of the current user's home folder
if [[ -f $whitelist ]];then
echo "Removing duplicates, whitelisting, and formatting the list of domains..." echo "Removing duplicates, whitelisting, and formatting the list of domains..."
cat /tmp/matter.txt $blacklist| sed $'s/\r$//' | awk -F "." '{for(i=NF; i > 1; i--) printf "%s.", $i; print $1}' | sort | awk -F "." '{for(i=NF; i > 1; i--) printf "%s.", $i; print $1}' | uniq | sed '/^$/d' | grep -vwf $whitelist | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt grep -vhE "^\s*(#|$)" $tmpAdList $tmpBlackList |
numberOfSitesWhitelisted=$(cat $whitelist | wc -l | sed 's/^[ \t]*//') sed $'s/\r$//'|
echo "$numberOfSitesWhitelisted domains whitelisted." awk -F. '{for (i=NF; i>1; --i) printf "%s.",$i;print $1}'|
else sort -t'.' -k1,2| uniq | grep -vwf $tmpWhiteList |
echo "Removing duplicates and formatting the list of domains..." awk -F. 'NR!=1&&substr($0,0,length(p))==p{next} {p=$0".";for (i=NF; i>1; --i) printf "%s.",$i;print $1}'|
cat /tmp/matter.txt | sed $'s/\r$//' | sort | uniq | sed '/^$/d' | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > $tmpConf
fi numberOfSitesWhitelisted=$(cat $tmpWhiteList | wc -l | sed 's/^[ \t]*//')
numberOfSitesBlacklisted=$(cat $tmpBlackList | wc -l | sed 's/^[ \t]*//')
echo "$numberOfSitesWhitelisted domain(s) whitelisted, $numberOfSitesBlacklisted domain(s) blacklisted."
# Count how many domains/whitelists were added so it can be displayed to the user # Count how many domains/whitelists were added so it can be displayed to the user
numberOfAdsBlocked=$(cat /tmp/andLight.txt | wc -l | sed 's/^[ \t]*//') numberOfAdsBlocked=$(cat $tmpConf | wc -l | sed 's/^[ \t]*//')
echo "$numberOfAdsBlocked ad domains blocked." echo "$numberOfAdsBlocked ad domains blocked."
# Turn the file into a dnsmasq config file # Turn the file into a dnsmasq config file
sudo mv /tmp/andLight.txt $eventHorizion sudo mv $tmpConf $eventHorizion
# Restart DNS # Restart DNS
sudo service dnsmasq restart sudo service dnsmasq restart