From f02c37ec07a33c60588823c0420a9e9888d4af7f Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Sun, 22 Nov 2015 23:49:38 -0800 Subject: [PATCH 1/9] Start code refactor, eliminate some repetitive code Begin code refactor prepatation, prevent DRY where possible. --- gravity.sh | 174 ++++++++++++++++++++--------------------------------- 1 file changed, 66 insertions(+), 108 deletions(-) diff --git a/gravity.sh b/gravity.sh index 4c191c39..62a2cf2e 100755 --- a/gravity.sh +++ b/gravity.sh @@ -1,11 +1,18 @@ -#!/bin/bash +#!/usr/bin/env bash # http://pi-hole.net # Compiles a list of ad-serving domains by downloading them from multiple sources - -# This script should only be run after you have a static IP address set on the Pi -piholeIP=$(hostname -I) +piholeIPfile=/tmp/piholeIP +if [[ -f $piholeIPfile ]];then + # If the file exists, it means it was exported from the installation script and we should use that value instead of detecting it in this script + piholeIP=$(cat $piholeIPfile) + rm $piholeIPfile +else + # Otherwise, the IP address can be taken directly from the machine, which will happen when the script is run by the user and not the installation script + piholeIP=$(ip -4 addr show | awk '{match($0,/[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/); ip = substr($0,RSTART,RLENGTH); print ip}' | sed '/^\s*$/d' | grep -v "127.0.0.1" | (head -n1)) +fi # Ad-list sources--one per line in single quotes +# The mahakala source is commented out due to many users having issues with it blocking legitimate domains. Uncomment at your own risk sources=('https://adaway.org/hosts.txt' 'http://adblock.gjtech.net/?format=unix-hosts' #'http://adblock.mahakala.is/' @@ -16,139 +23,90 @@ sources=('https://adaway.org/hosts.txt' 'http://winhelp2002.mvps.org/hosts.txt') # Variables for various stages of downloading and formatting the list -adList=/etc/pihole/gravity.list -origin=/etc/pihole -piholeDir=/etc/pihole -justDomainsExtension=domains -matter=pihole.0.matter.txt -andLight=pihole.1.andLight.txt -supernova=pihole.2.supernova.txt -eventHorizon=pihole.3.eventHorizon.txt -accretionDisc=pihole.4.accretionDisc.txt -eyeOfTheNeedle=pihole.5.wormhole.txt +basename=pihole +piholeDir=/etc/$basename +adList=$piholeDir/gravity.list blacklist=$piholeDir/blacklist.txt whitelist=$piholeDir/whitelist.txt -latentWhitelist=$origin/latentWhitelist.txt +latentWhitelist=$piholeDir/latentWhitelist.txt +justDomainsExtension=domains +matter=$basename.0.matter.txt +andLight=$basename.1.andLight.txt +supernova=$basename.2.supernova.txt +eventHorizon=$basename.3.eventHorizon.txt +accretionDisc=$basename.4.accretionDisc.txt +eyeOfTheNeedle=$basename.5.wormhole.txt # After setting defaults, check if there's local overrides if [[ -r $piholeDir/pihole.conf ]];then echo "** Local calibration requested..." - . $piholeDir/pihole.conf + . $piholeDir/pihole.conf fi echo "** Neutrino emissions detected..." # Create the pihole resource directory if it doesn't exist. Future files will be stored here if [[ -d $piholeDir ]];then - : + : else - echo "** Creating pihole directory..." - sudo mkdir $piholeDir + echo "** Creating pihole directory..." + sudo mkdir $piholeDir fi # Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines for ((i = 0; i < "${#sources[@]}"; i++)) do - url=${sources[$i]} - # Get just the domain from the URL - domain=$(echo "$url" | cut -d'/' -f3) + url=${sources[$i]} + # Get just the domain from the URL + domain=$(echo "$url" | cut -d'/' -f3) - # Save the file as list.#.domain - saveLocation=$origin/list.$i.$domain.$justDomainsExtension + # Save the file as list.#.domain + saveLocation=$piholeDir/list.$i.$domain.$justDomainsExtension - agent="Mozilla/10.0" + agent="Mozilla/10.0" - echo -n "Getting $domain list... " + echo -n "Getting $domain list... " - # Use a case statement to download lists that need special cURL commands - # to complete properly and reset the user agent when required - case "$domain" in - "adblock.mahakala.is") - agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' - cmd="curl -e http://forum.xda-developers.com/" - ;; + # Use a case statement to download lists that need special cURL commands + # to complete properly and reset the user agent when required + case "$domain" in + "adblock.mahakala.is") + agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' + cmd_ext="-e http://forum.xda-developers.com/" + ;; - "pgl.yoyo.org") - cmd="curl -d mimetype=plaintext -d hostformat=hosts" - ;; + "pgl.yoyo.org") + echo "** $numberOf domains being pulled in by gravity..." - # Default is a simple curl request - *) cmd="curl" - esac + # Remove carriage returns and preceding whitespace + # not really needed anymore? + cp $piholeDir/$andLight $piholeDir/$supernova - # tmp file, so we don't have to store the (long!) lists in RAM - patternBuffer=$(mktemp) - heisenbergCompensator="" - if [[ -r $saveLocation ]]; then - heisenbergCompensator="-z $saveLocation" - fi - CMD="$cmd -s $heisenbergCompensator -A '$agent' $url > $patternBuffer" - $cmd -s $heisenbergCompensator -A "$agent" $url > $patternBuffer + # Sort and remove duplicates + sort -u $piholeDir/$supernova > $piholeDir/$eventHorizon + numberOf=$(wc -l < $piholeDir/$eventHorizon) + echo "** $numberOf unique domains trapped in the event horizon." - - if [[ -s "$patternBuffer" ]];then - # Remove comments and print only the domain name - # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious - # This helps with that and makes it easier to read - # It also helps with debugging so each stage of the script can be researched more in depth - awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $patternBuffer | \ - sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $saveLocation - echo "Done." - else - echo "Skipping pattern because transporter logic detected no changes..." - fi - - # Cleanup - rm -f $patternBuffer -done - -# Find all files with the .domains extension and compile them into one file and remove CRs -echo "** Aggregating list of domains..." -find $origin/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\r' > $origin/$matter - -# Append blacklist entries if they exist -if [[ -r $blacklist ]];then - numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l) - echo "** Blacklisting $numberOf domain(s)..." - cat $blacklist >> $origin/$matter -fi - -########################### -function gravity_advanced() { - - numberOf=$(wc -l < $origin/$andLight) - echo "** $numberOf domains being pulled in by gravity..." - - # Remove carriage returns and preceding whitespace - # not really needed anymore? - cp $origin/$andLight $origin/$supernova - - # Sort and remove duplicates - sort -u $origin/$supernova > $origin/$eventHorizon - numberOf=$(wc -l < $origin/$eventHorizon) - echo "** $numberOf unique domains trapped in the event horizon." - - # Format domain list as "192.168.x.x domain.com" - echo "** Formatting domains into a HOSTS file..." - awk '{print "'"$piholeIP"'" $1}' $origin/$eventHorizon > $origin/$accretionDisc - - # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it - sudo cp $origin/$accretionDisc $adList - kill -HUP $(pidof dnsmasq) + # Format domain list as "192.168.x.x domain.com" + echo "** Formatting domains into a HOSTS file..." + cat $piholeDir/$eventHorizon | awk '{sub(/\r$/,""); print "'"$piholeIP"' " $0}' > $piholeDir/$accretionDisc + # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it + sudo cp $piholeDir/$accretionDisc $adList + kill -HUP $(pidof dnsmasq) } # Whitelist (if applicable) then remove duplicates and format for dnsmasq if [[ -r $whitelist ]];then - # Remove whitelist entries - numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) - plural=; [[ "$numberOf" != "1" ]] && plural=s - echo "** Whitelisting $numberOf domain${plural}..." + # Remove whitelist entries + numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) + plural=; [[ "$numberOf" != "1" ]] && plural=s + echo "** Whitelisting $numberOf domain${plural}..." - # Append a "$" to the end, prepend a "^" to the beginning, and - # replace "." with "\." of each line to turn each entry into a - # regexp so it can be parsed out with grep -x - awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist + # Append a "$" to the end, prepend a "^" to the beginning, and + # replace "." with "\." of each line to turn each entry into a + # regexp so it can be parsed out with grep -x + awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist else - rm $latentWhitelist + rm $latentWhitelist fi # Prevent our sources from being pulled into the hole @@ -156,10 +114,10 @@ plural=; [[ "${#sources[@]}" != "1" ]] && plural=s echo "** Whitelisting ${#sources[@]} ad list source${plural}..." for url in ${sources[@]} do - echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist + echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist done # Remove whitelist entries from deduped list -grep -vxf $latentWhitelist $origin/$matter > $origin/$andLight +grep -vxf $latentWhitelist $piholeDir/$matter > $piholeDir/$andLight gravity_advanced From 668d14e32d6b1f506d3ff34c41a561aa34855bfc Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 00:36:01 -0800 Subject: [PATCH 2/9] Add functions Start to add functions for code reuse --- gravity.sh | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/gravity.sh b/gravity.sh index 62a2cf2e..eb4c150f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -46,36 +46,35 @@ echo "** Neutrino emissions detected..." # Create the pihole resource directory if it doesn't exist. Future files will be stored here if [[ -d $piholeDir ]];then - : + # Temporary hack to allow non-root access to pihole directory + # Will update later, needed for existing installs, new installs should + # create this directory as non-root + sudo chmod 777 $piholeDir + find "$piholeDir" -type f -exec sudo chmod 666 {} \; else echo "** Creating pihole directory..." - sudo mkdir $piholeDir + mkdir $piholeDir fi -# Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines -for ((i = 0; i < "${#sources[@]}"; i++)) -do - url=${sources[$i]} - # Get just the domain from the URL - domain=$(echo "$url" | cut -d'/' -f3) +########################### +function gravity_patterncheck() { - # Save the file as list.#.domain - saveLocation=$piholeDir/list.$i.$domain.$justDomainsExtension + patternBuffer=$1 - agent="Mozilla/10.0" - - echo -n "Getting $domain list... " - - # Use a case statement to download lists that need special cURL commands - # to complete properly and reset the user agent when required - case "$domain" in - "adblock.mahakala.is") - agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' - cmd_ext="-e http://forum.xda-developers.com/" - ;; - - "pgl.yoyo.org") - echo "** $numberOf domains being pulled in by gravity..." + # check if the patternbuffer is a non-zero length file + if [[ -s "$patternBuffer" ]];then + # Remove comments and print only the domain name + # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious + # This helps with that and makes it easier to read + # It also helps with debugging so each stage of the script can be researched more in depth + awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $patternBuffer | \ + sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $saveLocation + echo "Done." + else + # curl didn't download any host files, probably because of the date check + echo "Transporter logic detected no changes, pattern skipped..." + fi +} # Remove carriage returns and preceding whitespace # not really needed anymore? @@ -90,10 +89,12 @@ do echo "** Formatting domains into a HOSTS file..." cat $piholeDir/$eventHorizon | awk '{sub(/\r$/,""); print "'"$piholeIP"' " $0}' > $piholeDir/$accretionDisc # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it - sudo cp $piholeDir/$accretionDisc $adList - kill -HUP $(pidof dnsmasq) + cp $piholeDir/$accretionDisc $adList + sudo kill -HUP $(pidof dnsmasq) } +gravity_spinup + # Whitelist (if applicable) then remove duplicates and format for dnsmasq if [[ -r $whitelist ]];then # Remove whitelist entries From a1e609d0059b1db0212494621cad0f824bb52315 Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 01:16:00 -0800 Subject: [PATCH 3/9] Ugly hack permissions Temporary hack for /etc/pihole directory and file permissions to pull out sudo requirement. (Irony is that is requires sudo.) This will be fixed later, but for now it's needed for existing installs so they don't break. --- gravity.sh | 58 +++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/gravity.sh b/gravity.sh index eb4c150f..f3b8f828 100755 --- a/gravity.sh +++ b/gravity.sh @@ -57,10 +57,10 @@ else fi ########################### -function gravity_patterncheck() { - +# patternCheck - check to see if curl downloaded any new files, and then process those +# files so they are in host format. +function gravity_patternCheck() { patternBuffer=$1 - # check if the patternbuffer is a non-zero length file if [[ -s "$patternBuffer" ]];then # Remove comments and print only the domain name @@ -76,6 +76,32 @@ function gravity_patterncheck() { fi } + # Use a case statement to download lists that need special cURL commands + # to complete properly and reset the user agent when required + case "$domain" in + "adblock.mahakala.is") + agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' + cmd_ext="-e http://forum.xda-developers.com/" + ;; + + "pgl.yoyo.org") + cmd_ext="-d mimetype=plaintext -d hostformat=hosts" + ;; + + # Default is a simple request + *) cmd_ext="" + esac + gravity_transport $url $cmd_ext $agent +done + # Compress files to singularity and white/blacklist + gravity_Schwarzchild +} + +function gravity_advanced() { + + numberOf=$(wc -l < $piholeDir/$andLight) + echo "** $numberOf domains being pulled in by gravity..." + # Remove carriage returns and preceding whitespace # not really needed anymore? cp $piholeDir/$andLight $piholeDir/$supernova @@ -95,30 +121,4 @@ function gravity_patterncheck() { gravity_spinup -# Whitelist (if applicable) then remove duplicates and format for dnsmasq -if [[ -r $whitelist ]];then - # Remove whitelist entries - numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) - plural=; [[ "$numberOf" != "1" ]] && plural=s - echo "** Whitelisting $numberOf domain${plural}..." - - # Append a "$" to the end, prepend a "^" to the beginning, and - # replace "." with "\." of each line to turn each entry into a - # regexp so it can be parsed out with grep -x - awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist -else - rm $latentWhitelist -fi - -# Prevent our sources from being pulled into the hole -plural=; [[ "${#sources[@]}" != "1" ]] && plural=s -echo "** Whitelisting ${#sources[@]} ad list source${plural}..." -for url in ${sources[@]} -do - echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist -done - -# Remove whitelist entries from deduped list -grep -vxf $latentWhitelist $piholeDir/$matter > $piholeDir/$andLight - gravity_advanced From fe46afd08d2058899f34722e5113b67f8b8a320f Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 01:47:24 -0800 Subject: [PATCH 4/9] More functions More refactoring --- gravity.sh | 73 +++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/gravity.sh b/gravity.sh index f3b8f828..9cb017d0 100755 --- a/gravity.sh +++ b/gravity.sh @@ -42,6 +42,10 @@ if [[ -r $piholeDir/pihole.conf ]];then echo "** Local calibration requested..." . $piholeDir/pihole.conf fi +########################### +# collapse - begin formation of pihole +function gravity_collapse() { + echo "** Neutrino emissions detected..." # Create the pihole resource directory if it doesn't exist. Future files will be stored here @@ -55,46 +59,42 @@ else echo "** Creating pihole directory..." mkdir $piholeDir fi - -########################### -# patternCheck - check to see if curl downloaded any new files, and then process those -# files so they are in host format. -function gravity_patternCheck() { - patternBuffer=$1 - # check if the patternbuffer is a non-zero length file - if [[ -s "$patternBuffer" ]];then - # Remove comments and print only the domain name - # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious - # This helps with that and makes it easier to read - # It also helps with debugging so each stage of the script can be researched more in depth - awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $patternBuffer | \ - sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $saveLocation - echo "Done." - else - # curl didn't download any host files, probably because of the date check - echo "Transporter logic detected no changes, pattern skipped..." - fi } - # Use a case statement to download lists that need special cURL commands - # to complete properly and reset the user agent when required - case "$domain" in - "adblock.mahakala.is") - agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' - cmd_ext="-e http://forum.xda-developers.com/" - ;; +# spinup - main gravity function +function gravity_spinup() { - "pgl.yoyo.org") - cmd_ext="-d mimetype=plaintext -d hostformat=hosts" - ;; +# Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines +for ((i = 0; i < "${#sources[@]}"; i++)) +do + url=${sources[$i]} + # Get just the domain from the URL +# Whitelist (if applicable) domains +if [[ -r $whitelist ]];then + # Remove whitelist entries + numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) + plural=; [[ "$numberOf" != "1" ]] && plural=s + echo "** Whitelisting $numberOf domain${plural}..." - # Default is a simple request - *) cmd_ext="" - esac - gravity_transport $url $cmd_ext $agent + # Append a "$" to the end, prepend a "^" to the beginning, and + # replace "." with "\." of each line to turn each entry into a + # regexp so it can be parsed out with grep -x + awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist +else + rm $latentWhitelist +fi + +# Prevent our sources from being pulled into the hole +plural=; [[ "${#sources[@]}" != "1" ]] && plural=s +echo "** Whitelisting ${#sources[@]} ad list source${plural}..." +for url in ${sources[@]} +do + echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist done - # Compress files to singularity and white/blacklist - gravity_Schwarzchild + +# Remove whitelist entries from list +grep -vxf $latentWhitelist $piholeDir/$matter > $piholeDir/$andLight + } function gravity_advanced() { @@ -120,5 +120,6 @@ function gravity_advanced() { } gravity_spinup - +gravity_transport +gravity_Schwartzchild gravity_advanced From af909e0ee67bf5893e476c047c40e78a0085678c Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 02:52:12 -0800 Subject: [PATCH 5/9] Copyright issues Some of the blocklists are copyright and need to be pulled and stored as is. They can be processed further in the script to remove comments. --- gravity.sh | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/gravity.sh b/gravity.sh index 9cb017d0..f8f91bd3 100755 --- a/gravity.sh +++ b/gravity.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Pi-hole: A black hole for Internet advertisements +# (c) 2015 by Jacob Salmela GPL 2.0 +# Network-wide ad blocking via your Raspberry Pi # http://pi-hole.net # Compiles a list of ad-serving domains by downloading them from multiple sources piholeIPfile=/tmp/piholeIP @@ -45,7 +48,6 @@ fi ########################### # collapse - begin formation of pihole function gravity_collapse() { - echo "** Neutrino emissions detected..." # Create the pihole resource directory if it doesn't exist. Future files will be stored here @@ -61,6 +63,44 @@ else fi } +# patternCheck - check to see if curl downloaded any new files, and then process those +# files so they are in host format. +function gravity_patternCheck() { + patternBuffer=$1 + # check if the patternbuffer is a non-zero length file + if [[ -s "$patternBuffer" ]];then + # Some of the blocklists are copyright, they need to be downloaded + # and stored as is. They can be processed for content after they + # have been saved. + cp $patternBuffer $saveLocation + echo "Done." + else + # curl didn't download any host files, probably because of the date check + echo "Transporter logic detected no changes, pattern skipped..." + fi +} + +# transport - curl the specified url with any needed command extentions, then patternCheck +function gravity_transport() { + url=$1 + cmd_ext=$2 + agent=$3 + # tmp file, so we don't have to store the (long!) lists in RAM + patternBuffer=$(mktemp) + heisenbergCompensator="" + if [[ -r $saveLocation ]]; then + # if domain has been saved, add file for date check to only download newer + heisenbergCompensator="-z $saveLocation" + fi + # Silently curl url + curl -s $cmd_ext $heisenbergCompensator -A "$agent" $url > $patternBuffer + + gravity_patternCheck $patternBuffer + + # Cleanup + rm -f $patternBuffer + +} # spinup - main gravity function function gravity_spinup() { @@ -69,6 +109,48 @@ for ((i = 0; i < "${#sources[@]}"; i++)) do url=${sources[$i]} # Get just the domain from the URL + domain=$(echo "$url" | cut -d'/' -f3) + + # Save the file as list.#.domain + saveLocation=$piholeDir/list.$i.$domain.$justDomainsExtension + + agent="Mozilla/10.0" + + echo -n "Getting $domain list... " + + # Use a case statement to download lists that need special cURL commands + # to complete properly and reset the user agent when required + case "$domain" in + "adblock.mahakala.is") + agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' + cmd_ext="-e http://forum.xda-developers.com/" + ;; + + "pgl.yoyo.org") + cmd_ext="-d mimetype=plaintext -d hostformat=hosts" + ;; + + # Default is a simple request + *) cmd_ext="" + esac + gravity_transport $url $cmd_ext $agent +done +} + +# Schwarzchild - aggregate domains to one list, and then white/blacklist unified list +function gravity_Schwarzchild() { + +# Find all files with the .domains extension and compile them into one file and remove CRs +echo "** Aggregating list of domains..." +find $piholeDir/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\r' > $piholeDir/$matter + +# Append blacklist entries if they exist +if [[ -r $blacklist ]];then + numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l) + echo "** Blacklisting $numberOf domain(s)..." + cat $blacklist >> $piholeDir/$matter +fi + # Whitelist (if applicable) domains if [[ -r $whitelist ]];then # Remove whitelist entries @@ -102,9 +184,12 @@ function gravity_advanced() { numberOf=$(wc -l < $piholeDir/$andLight) echo "** $numberOf domains being pulled in by gravity..." - # Remove carriage returns and preceding whitespace - # not really needed anymore? - cp $piholeDir/$andLight $piholeDir/$supernova + # Remove comments and print only the domain name + # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious + # This helps with that and makes it easier to read + # It also helps with debugging so each stage of the script can be researched more in depth + awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $piholeDir/$andLight | \ + sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $piholeDir/$supernova # Sort and remove duplicates sort -u $piholeDir/$supernova > $piholeDir/$eventHorizon @@ -120,6 +205,5 @@ function gravity_advanced() { } gravity_spinup -gravity_transport -gravity_Schwartzchild +gravity_Schwarzchild gravity_advanced From 4f1bb12d36b64c239cb1ed2bb3f117bb64995498 Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 03:11:16 -0800 Subject: [PATCH 6/9] More functions Splitting into more functions --- gravity.sh | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/gravity.sh b/gravity.sh index f8f91bd3..4a57112f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -179,6 +179,19 @@ grep -vxf $latentWhitelist $piholeDir/$matter > $piholeDir/$andLight } +function gravity_unique() { + # Sort and remove duplicates + sort -u $piholeDir/$supernova > $piholeDir/$eventHorizon + numberOf=$(wc -l < $piholeDir/$eventHorizon) + echo "** $numberOf unique domains trapped in the event horizon." +} +function gravity_hostFormat() { + # Format domain list as "192.168.x.x domain.com" + echo "** Formatting domains into a HOSTS file..." + cat $piholeDir/$eventHorizon | awk '{sub(/\r$/,""); print "'"$piholeIP"' " $0}' > $piholeDir/$accretionDisc + # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it + cp $piholeDir/$accretionDisc $adList +} function gravity_advanced() { numberOf=$(wc -l < $piholeDir/$andLight) @@ -191,19 +204,11 @@ function gravity_advanced() { awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $piholeDir/$andLight | \ sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $piholeDir/$supernova - # Sort and remove duplicates - sort -u $piholeDir/$supernova > $piholeDir/$eventHorizon - numberOf=$(wc -l < $piholeDir/$eventHorizon) - echo "** $numberOf unique domains trapped in the event horizon." - - # Format domain list as "192.168.x.x domain.com" - echo "** Formatting domains into a HOSTS file..." - cat $piholeDir/$eventHorizon | awk '{sub(/\r$/,""); print "'"$piholeIP"' " $0}' > $piholeDir/$accretionDisc - # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it - cp $piholeDir/$accretionDisc $adList sudo kill -HUP $(pidof dnsmasq) } gravity_spinup gravity_Schwarzchild +gravity_unique +gravity_hostFormat gravity_advanced From a5d29a1048f1629b7854dc8cefe25675b904b088 Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 03:26:10 -0800 Subject: [PATCH 7/9] Fix function ordering Unique is now called after the comments are scrubbed from the unified domain list. --- gravity.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/gravity.sh b/gravity.sh index 4a57112f..af1e98dc 100755 --- a/gravity.sh +++ b/gravity.sh @@ -137,7 +137,7 @@ do done } -# Schwarzchild - aggregate domains to one list, and then white/blacklist unified list +# Schwarzchild - aggregate domains to one list and add blacklisted domains function gravity_Schwarzchild() { # Find all files with the .domains extension and compile them into one file and remove CRs @@ -150,6 +150,9 @@ if [[ -r $blacklist ]];then echo "** Blacklisting $numberOf domain(s)..." cat $blacklist >> $piholeDir/$matter fi +} + +function gravity_pulsar() { # Whitelist (if applicable) domains if [[ -r $whitelist ]];then @@ -176,7 +179,6 @@ done # Remove whitelist entries from list grep -vxf $latentWhitelist $piholeDir/$matter > $piholeDir/$andLight - } function gravity_unique() { @@ -194,9 +196,6 @@ function gravity_hostFormat() { } function gravity_advanced() { - numberOf=$(wc -l < $piholeDir/$andLight) - echo "** $numberOf domains being pulled in by gravity..." - # Remove comments and print only the domain name # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # This helps with that and makes it easier to read @@ -204,11 +203,16 @@ function gravity_advanced() { awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $piholeDir/$andLight | \ sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $piholeDir/$supernova + numberOf=$(wc -l < $piholeDir/$supernova) + echo "** $numberOf domains being pulled in by gravity..." + gravity_unique + sudo kill -HUP $(pidof dnsmasq) } gravity_spinup gravity_Schwarzchild -gravity_unique +gravity_pulsar gravity_hostFormat gravity_advanced + From e2b518568ea1477068a90587c4dedf0d5859ee9b Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 12:39:47 -0800 Subject: [PATCH 8/9] Forgot to start the directory check function --- gravity.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/gravity.sh b/gravity.sh index af1e98dc..e3337e42 100755 --- a/gravity.sh +++ b/gravity.sh @@ -210,6 +210,7 @@ function gravity_advanced() { sudo kill -HUP $(pidof dnsmasq) } +gravity_collapse gravity_spinup gravity_Schwarzchild gravity_pulsar From 20399b82620a67a2322a4da51feb79e1620062e4 Mon Sep 17 00:00:00 2001 From: Dan Schaper Date: Mon, 23 Nov 2015 13:12:11 -0800 Subject: [PATCH 9/9] Build array of active domains to concatenate Fix #86 Builds an array of active domains, and then loops through that list to pick files to concatenate after truncating Matter. --- gravity.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gravity.sh b/gravity.sh index e3337e42..ae20d439 100755 --- a/gravity.sh +++ b/gravity.sh @@ -113,6 +113,7 @@ do # Save the file as list.#.domain saveLocation=$piholeDir/list.$i.$domain.$justDomainsExtension + activeDomains[$i]=$saveLocation agent="Mozilla/10.0" @@ -140,9 +141,13 @@ done # Schwarzchild - aggregate domains to one list and add blacklisted domains function gravity_Schwarzchild() { -# Find all files with the .domains extension and compile them into one file and remove CRs +# Find all active domains and compile them into one file and remove CRs echo "** Aggregating list of domains..." -find $piholeDir/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\r' > $piholeDir/$matter +truncate -s 0 $piholeDir/$matter +for i in "${activeDomains[@]}" +do + cat $i |tr -d '\r' >> $piholeDir/$matter +done # Append blacklist entries if they exist if [[ -r $blacklist ]];then