Merge pull request #38 from korhadris/master

Fixes #32 and fixes #35
This commit is contained in:
Jacob Salmela 2015-09-06 10:11:39 -05:00
commit e19a6c3624

View file

@ -72,30 +72,33 @@ fi
# Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines # Loop through domain list. Download each one and remove commented lines (lines beginning with '# 'or '/') and blank lines
for ((i = 0; i < "${#sources[@]}"; i++)) for ((i = 0; i < "${#sources[@]}"; i++))
do do
url=${sources[$i]}
# Get just the domain from the URL # Get just the domain from the URL
domain=$(echo "${sources[$i]}" | cut -d'/' -f3) domain=$(echo "$url" | cut -d'/' -f3)
# Save the file as list.#.domain # Save the file as list.#.domain
saveLocation=$origin/"list"."$i"."$domain" saveLocation=$origin/list.$i.$domain.$justDomainsExtension
echo -n "Getting $domain list... "
# Use a case statement to download lists that need special cURL commands to complete properly # Use a case statement to download lists that need special cURL commands to complete properly
case "$domain" in case "$domain" in
"adblock.mahakala.is") data=$(curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation."$justDomainsExtension" "${sources[$i]}");; "adblock.mahakala.is") data=$(curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation $url);;
"pgl.yoyo.org") data=$(curl -s -d mimetype=plaintext -d hostformat=hosts -z $saveLocation."$justDomainsExtension" "${sources[$i]}");; "pgl.yoyo.org") data=$(curl -s -d mimetype=plaintext -d hostformat=hosts -z $saveLocation $url);;
*) data=$(curl -s -z $saveLocation."$justDomainsExtension" -A "Mozilla/10.0" "${sources[$i]}");; *) data=$(curl -s -z $saveLocation -A "Mozilla/10.0" $url);;
esac esac
if [[ -n "$data" ]];then if [[ -n "$data" ]];then
echo "Getting $domain list..."
# Remove comments and print only the domain name # Remove comments and print only the domain name
# Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
# This helps with that and makes it easier to read # This helps with that and makes it easier to read
# It also helps with debugging so each stage of the script can be researched more in depth # It also helps with debugging so each stage of the script can be researched more in depth
echo "$data" | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' > $saveLocation."$justDomainsExtension" echo "$data" | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' | \
sed -e 's/^[. \t]*//' -e 's/\.\.\+/./g' -e 's/[. \t]*$//' | grep "\." > $saveLocation
echo "Done."
else else
echo "Skipping $domain list because it does not have any new entries..." echo "Skipping list because it does not have any new entries."
fi fi
done done
@ -135,13 +138,24 @@ function gravity_advanced()
if [[ -f $whitelist ]];then if [[ -f $whitelist ]];then
# Remove whitelist entries # Remove whitelist entries
numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l)
echo "** Whitelisting $numberOf domain(s)..." plural=; [[ "$numberOf" != "1" ]] && plural=s
# Append a "$" to the end of each line so it can be parsed out with grep -w echo "** Whitelisting $numberOf domain${plural}..."
echo -n "^$" > $latentWhitelist # Append a "$" to the end, prepend a "^" to the beginning, and
awk -F '[# \t]' 'NF>0&&$1!="" {print $1"$"}' $whitelist > $latentWhitelist # replace "." with "\." of each line to turn each entry into a
cat $origin/$matter | grep -vwf $latentWhitelist > $origin/$andLight # regexp so it can be parsed out with grep -x
gravity_advanced awk -F '[# \t]' 'NF>0&&$1!="" {print "^"$1"$"}' $whitelist | sed 's/\./\\./g' > $latentWhitelist
else else
cat $origin/$matter > $origin/$andLight rm $latentWhitelist
gravity_advanced
fi fi
# Prevent our sources from being pulled into the hole
plural=; [[ "${#sources[@]}" != "1" ]] && plural=s
echo "** Whitelisting ${#sources[@]} ad list source${plural}..."
for url in ${sources[@]}
do
echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist
done
grep -vxf $latentWhitelist $origin/$matter > $origin/$andLight
gravity_advanced