diff --git a/gravity.sh b/gravity.sh index 71b194dd..2bef29b7 100755 --- a/gravity.sh +++ b/gravity.sh @@ -72,30 +72,33 @@ fi # Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines for ((i = 0; i < "${#sources[@]}"; i++)) do + url=${sources[$i]} # Get just the domain from the URL - domain=$(echo "${sources[$i]}" | cut -d'/' -f3) + domain=$(echo "$url" | cut -d'/' -f3) # Save the file as list.#.domain - saveLocation=$origin/"list"."$i"."$domain" + saveLocation=$origin/list.$i.$domain.$justDomainsExtension + echo -n "Getting $domain list... " # Use a case statement to download lists that need special cURL commands to complete properly case "$domain" in - "adblock.mahakala.is") data=$(curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation."$justDomainsExtension" "${sources[$i]}");; + "adblock.mahakala.is") data=$(curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation $url);; - "pgl.yoyo.org") data=$(curl -s -d mimetype=plaintext -d hostformat=hosts -z $saveLocation."$justDomainsExtension" "${sources[$i]}");; + "pgl.yoyo.org") data=$(curl -s -d mimetype=plaintext -d hostformat=hosts -z $saveLocation $url);; - *) data=$(curl -s -z $saveLocation."$justDomainsExtension" -A "Mozilla/10.0" "${sources[$i]}");; + *) data=$(curl -s -z $saveLocation -A "Mozilla/10.0" $url);; esac if [[ -n "$data" ]];then - echo "Getting $domain list..." # Remove comments and print only the domain name # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # This helps with that and makes it easier to read # It also helps with debugging so each stage of the script can be researched more in depth - echo "$data" | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' > $saveLocation."$justDomainsExtension" + echo "$data" | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' | \ + sed -e 's/^[. \t]*//' -e 's/\.\.\+/./g' -e 's/[. \t]*$//' | grep "\." > $saveLocation + echo "Done." else - echo "Skipping $domain list because it does not have any new entries..." + echo "Skipping list because it does not have any new entries." fi done @@ -105,11 +108,11 @@ find $origin/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\ # Append blacklist entries if they exist if [[ -f $blacklist ]];then - numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l) - echo "** Blacklisting $numberOf domain(s)..." - cat $blacklist >> $origin/$matter + numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l) + echo "** Blacklisting $numberOf domain(s)..." + cat $blacklist >> $origin/$matter else - : + : fi function gravity_advanced() @@ -135,13 +138,24 @@ function gravity_advanced() if [[ -f $whitelist ]];then # Remove whitelist entries numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) - echo "** Whitelisting $numberOf domain(s)..." - # Append a "$" to the end of each line so it can be parsed out with grep -w - echo -n "^$" > $latentWhitelist - awk -F '[# \t]' 'NF>0&&$1!="" {print $1"$"}' $whitelist > $latentWhitelist - cat $origin/$matter | grep -vwf $latentWhitelist > $origin/$andLight - gravity_advanced + plural=; [[ "$numberOf" != "1" ]] && plural=s + echo "** Whitelisting $numberOf domain${plural}..." + # Append a "$" to the end, prepend a "^" to the beginning, and + # replace "." with "\." of each line to turn each entry into a + # regexp so it can be parsed out with grep -x + awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist else - cat $origin/$matter > $origin/$andLight - gravity_advanced + rm $latentWhitelist fi + +# Prevent our sources from being pulled into the hole +plural=; [[ "${#sources[@]}" != "1" ]] && plural=s +echo "** Whitelisting ${#sources[@]} ad list source${plural}..." +for url in ${sources[@]} +do + echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist +done + +grep -vxf $latentWhitelist $origin/$matter > $origin/$andLight + +gravity_advanced