2014-06-08 15:03:56 +00:00
#!/bin/bash
2015-05-19 18:31:37 +00:00
# http://pi-hole.net
2015-10-28 22:30:57 +00:00
# Compiles a list of ad-serving domains by downloading them from multiple sources
piholeIPfile = /tmp/piholeIP
if [ [ -f $piholeIPfile ] ] ; then
# If the file exists, it means it was exported from the installation script and we should use that value instead of detecting it in this script
piholeIP = $( cat $piholeIPfile )
rm $piholeIPfile
else
# Otherwise, the IP address can be taken directly from the machine, which will happen when the script is run by the user and not the installation script
piholeIP = $( ip -4 addr show | awk '{match($0,/[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/); ip = substr($0,RSTART,RLENGTH); print ip}' | sed '/^\s*$/d' | grep -v "127.0.0.1" )
fi
2015-06-14 03:01:12 +00:00
2015-05-19 18:31:37 +00:00
# Ad-list sources--one per line in single quotes
2015-10-28 22:30:57 +00:00
# The mahakala source is commented out due to many users having issues with it blocking legitimate domains. Uncomment at your own risk
2015-05-19 18:31:37 +00:00
sources = ( 'https://adaway.org/hosts.txt'
'http://adblock.gjtech.net/?format=unix-hosts'
2015-10-11 14:28:11 +00:00
#'http://adblock.mahakala.is/'
2015-05-19 18:31:37 +00:00
'http://hosts-file.net/.%5Cad_servers.txt'
'http://www.malwaredomainlist.com/hostslist/hosts.txt'
'http://pgl.yoyo.org/adservers/serverlist.php?'
'http://someonewhocares.org/hosts/hosts'
'http://winhelp2002.mvps.org/hosts.txt' )
2014-06-08 15:03:56 +00:00
2015-05-19 18:31:37 +00:00
# Variables for various stages of downloading and formatting the list
2015-07-30 16:24:24 +00:00
adList = /etc/pihole/gravity.list
2015-06-07 04:34:32 +00:00
origin = /etc/pihole
2015-05-19 18:31:37 +00:00
piholeDir = /etc/pihole
justDomainsExtension = domains
matter = pihole.0.matter.txt
andLight = pihole.1.andLight.txt
supernova = pihole.2.supernova.txt
eventHorizon = pihole.3.eventHorizon.txt
accretionDisc = pihole.4.accretionDisc.txt
eyeOfTheNeedle = pihole.5.wormhole.txt
blacklist = $piholeDir /blacklist.txt
latentBlacklist = $origin /latentBlacklist.txt
whitelist = $piholeDir /whitelist.txt
latentWhitelist = $origin /latentWhitelist.txt
2014-06-08 15:03:56 +00:00
2015-09-12 02:54:37 +00:00
# After setting defaults, check if there's local overrides
if [ [ -r $piholeDir /pihole.conf ] ] ; then
echo "** Local calibration requested..."
. $piholeDir /pihole.conf
fi
2015-05-19 18:31:37 +00:00
echo "** Neutrino emissions detected..."
2014-06-08 15:03:56 +00:00
2015-05-19 18:31:37 +00:00
# Create the pihole resource directory if it doesn't exist. Future files will be stored here
2015-06-14 03:01:12 +00:00
if [ [ -d $piholeDir ] ] ; then
2015-05-19 18:31:37 +00:00
:
else
echo "** Creating pihole directory..."
2015-06-14 03:01:12 +00:00
sudo mkdir $piholeDir
2015-05-19 18:31:37 +00:00
fi
2015-08-25 23:01:54 +00:00
# Add additional swap to prevent the "Error fork: unable to allocate memory" message: https://github.com/jacobsalmela/pi-hole/issues/37
function createSwapFile( )
#########################
{
echo "** Creating more swap space to accomodate large solar masses..."
sudo dphys-swapfile swapoff
sudo curl -s -o /etc/dphys-swapfile https://raw.githubusercontent.com/jacobsalmela/pi-hole/master/advanced/dphys-swapfile
sudo dphys-swapfile setup
sudo dphys-swapfile swapon
}
2015-10-28 22:30:57 +00:00
2015-09-12 02:54:37 +00:00
2015-09-13 19:23:29 +00:00
if [ [ -n " $noSwap " ] ] ; then
2015-09-12 02:54:37 +00:00
# if $noSwap is set, don't do anything
:
elif [ [ -f /etc/dphys-swapfile ] ] ; then
2015-08-25 23:01:54 +00:00
swapSize = $( cat /etc/dphys-swapfile | grep -m1 CONF_SWAPSIZE | cut -d'=' -f2)
if [ [ $swapSize != 500 ] ] ; then
mv /etc/dphys-swapfile /etc/dphys-swapfile.orig
echo " ** Current swap size is $swapSize "
createSwapFile
else
:
fi
else
echo "** No swap file found. Creating one..."
createSwapFile
fi
2015-05-19 18:31:37 +00:00
# Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines
for ( ( i = 0; i < " ${# sources [@] } " ; i++) )
do
2015-08-22 23:22:07 +00:00
url = ${ sources [ $i ] }
2015-05-19 18:31:37 +00:00
# Get just the domain from the URL
2015-08-22 23:22:07 +00:00
domain = $( echo " $url " | cut -d'/' -f3)
2015-10-28 22:30:57 +00:00
2015-05-19 18:31:37 +00:00
# Save the file as list.#.domain
2015-08-22 23:04:54 +00:00
saveLocation = $origin /list.$i .$domain .$justDomainsExtension
2015-10-28 22:30:57 +00:00
2015-08-23 00:33:30 +00:00
echo -n " Getting $domain list... "
2015-05-19 18:31:37 +00:00
# Use a case statement to download lists that need special cURL commands to complete properly
2015-07-13 11:59:22 +00:00
case " $domain " in
2015-08-22 23:22:07 +00:00
"adblock.mahakala.is" ) data = $( curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation $url ) ; ;
2015-10-28 22:30:57 +00:00
2015-08-22 23:22:07 +00:00
"pgl.yoyo.org" ) data = $( curl -s -d mimetype = plaintext -d hostformat = hosts -z $saveLocation $url ) ; ;
2015-05-19 18:31:37 +00:00
2015-08-22 23:22:07 +00:00
*) data = $( curl -s -z $saveLocation -A "Mozilla/10.0" $url ) ; ;
2015-05-19 18:31:37 +00:00
esac
2015-10-28 22:30:57 +00:00
2015-05-19 18:31:37 +00:00
if [ [ -n " $data " ] ] ; then
# Remove comments and print only the domain name
2015-06-04 13:21:44 +00:00
# Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
# This helps with that and makes it easier to read
# It also helps with debugging so each stage of the script can be researched more in depth
2015-08-23 00:05:19 +00:00
echo " $data " | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' | \
2015-08-23 00:47:22 +00:00
sed -e 's/^[. \t]*//' -e 's/\.\.\+/./g' -e 's/[. \t]*$//' | grep "\." > $saveLocation
2015-08-23 00:33:30 +00:00
echo "Done."
2015-05-19 18:31:37 +00:00
else
2015-08-23 00:33:30 +00:00
echo "Skipping list because it does not have any new entries."
2015-05-19 18:31:37 +00:00
fi
done
2014-06-08 15:03:56 +00:00
2015-07-13 11:59:22 +00:00
# Find all files with the .domains extension and compile them into one file and remove CRs
2015-05-19 18:31:37 +00:00
echo "** Aggregating list of domains..."
2015-06-22 20:03:15 +00:00
find $origin / -type f -name " *. $justDomainsExtension " -exec cat { } \; | tr -d '\r' > $origin /$matter
2015-05-19 18:31:37 +00:00
# Append blacklist entries if they exist
if [ [ -f $blacklist ] ] ; then
2015-08-22 22:56:32 +00:00
numberOf = $( cat $blacklist | sed '/^\s*$/d' | wc -l)
echo " ** Blacklisting $numberOf domain(s)... "
cat $blacklist >> $origin /$matter
2014-06-08 15:03:56 +00:00
else
2015-08-22 22:56:32 +00:00
:
2014-06-08 15:03:56 +00:00
fi
2015-05-19 18:31:37 +00:00
function gravity_advanced( )
###########################
{
2015-07-18 01:49:03 +00:00
numberOf = $( cat $origin /$andLight | sed '/^\s*$/d' | wc -l)
2015-10-28 22:30:57 +00:00
echo " ** $numberOf domains being pulled in by gravity... "
2015-05-19 18:31:37 +00:00
# Remove carriage returns and preceding whitespace
cat $origin /$andLight | sed $'s/\r$//' | sed '/^\s*$/d' > $origin /$supernova
# Sort and remove duplicates
cat $origin /$supernova | sort | uniq > $origin /$eventHorizon
2015-07-18 01:49:03 +00:00
numberOf = $( cat $origin /$eventHorizon | sed '/^\s*$/d' | wc -l)
2015-05-19 18:31:37 +00:00
echo " ** $numberOf unique domains trapped in the event horizon. "
2015-06-14 03:01:12 +00:00
# Format domain list as "192.168.x.x domain.com"
2015-05-19 18:31:37 +00:00
echo "** Formatting domains into a HOSTS file..."
2015-10-06 11:12:52 +00:00
cat $origin /$eventHorizon | awk '{sub(/\r$/,""); print "' " $piholeIP " '" $0}' > $origin /$accretionDisc
2015-07-30 16:24:24 +00:00
# Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it
2015-05-19 18:31:37 +00:00
sudo cp $origin /$accretionDisc $adList
2015-07-17 18:05:38 +00:00
kill -HUP $( pidof dnsmasq)
2015-05-19 18:31:37 +00:00
}
2015-10-28 22:30:57 +00:00
2015-05-19 18:31:37 +00:00
# Whitelist (if applicable) then remove duplicates and format for dnsmasq
if [ [ -f $whitelist ] ] ; then
# Remove whitelist entries
2015-07-18 01:49:03 +00:00
numberOf = $( cat $whitelist | sed '/^\s*$/d' | wc -l)
2015-08-23 04:44:41 +00:00
plural = ; [ [ " $numberOf " != "1" ] ] && plural = s
echo " ** Whitelisting $numberOf domain ${ plural } ... "
2015-08-23 06:37:01 +00:00
# Append a "$" to the end, prepend a "^" to the beginning, and
# replace "." with "\." of each line to turn each entry into a
# regexp so it can be parsed out with grep -x
awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist
2015-05-19 18:31:37 +00:00
else
2015-08-23 04:44:41 +00:00
rm $latentWhitelist
2015-06-19 20:31:51 +00:00
fi
2015-08-23 04:44:41 +00:00
# Prevent our sources from being pulled into the hole
plural = ; [ [ " ${# sources [@] } " != "1" ] ] && plural = s
echo " ** Whitelisting ${# sources [@] } ad list source ${ plural } ... "
for url in ${ sources [@] }
do
2015-08-23 06:37:01 +00:00
echo " $url " | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist
2015-08-23 04:44:41 +00:00
done
2015-08-23 06:37:01 +00:00
grep -vxf $latentWhitelist $origin /$matter > $origin /$andLight
2015-08-23 04:44:41 +00:00
gravity_advanced