From 54bfaa28c1ac41fc8839c935003a1900320195c2 Mon Sep 17 00:00:00 2001 From: MMotti Date: Mon, 3 Jun 2019 19:23:27 +0100 Subject: [PATCH 1/6] Fix for regexp queries through pihole -q Taking inspiration from: https://github.com/pi-hole/pi-hole/pull/2346 We will use awk to iterate through each regexp and print any that match the domain query. Signed-off-by: MMotti --- advanced/Scripts/query.sh | 64 +++++++++++++++------------------------ 1 file changed, 25 insertions(+), 39 deletions(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index 9cb298df..840c03da 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -12,7 +12,7 @@ # Globals piholeDir="/etc/pihole" gravityDBfile="${piholeDir}/gravity.db" -wildcardlist="/etc/dnsmasq.d/03-pihole-wildcard.conf" +regexlist="/etc/pihole/regex.list" options="$*" adlist="" all="" @@ -23,27 +23,11 @@ matchType="match" colfile="/opt/pihole/COL_TABLE" source "${colfile}" -# Print each subdomain -# e.g: foo.bar.baz.com = "foo.bar.baz.com bar.baz.com baz.com com" -processWildcards() { - IFS="." read -r -a array <<< "${1}" - for (( i=${#array[@]}-1; i>=0; i-- )); do - ar="" - for (( j=${#array[@]}-1; j>${#array[@]}-i-2; j-- )); do - if [[ $j == $((${#array[@]}-1)) ]]; then - ar="${array[$j]}" - else - ar="${array[$j]}.${ar}" - fi - done - echo "${ar}" - done -} - +# Scan an array of files for matching strings # Scan an array of files for matching strings scanList(){ # Escape full stops - local domain="${1//./\\.}" lists="${2}" type="${3:-}" + local domain="${1}" esc_domain="${1//./\\.}" lists="${2}" type="${3:-}" # Prevent grep from printing file path cd "$piholeDir" || exit 1 @@ -54,9 +38,9 @@ scanList(){ # /dev/null forces filename to be printed when only one list has been generated # shellcheck disable=SC2086 case "${type}" in - "exact" ) grep -i -E -l "(^|(?/dev/null;; - "wc" ) grep -i -o -m 1 "/${domain}/" ${lists} 2>/dev/null;; - * ) grep -i "${domain}" ${lists} /dev/null 2>/dev/null;; + "exact" ) grep -i -E -l "(^|(?/dev/null;; + "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' ${lists} <(echo "$domain") 2>/dev/null;; + * ) grep -i "${esc_domain}" ${lists} /dev/null 2>/dev/null;; esac } @@ -145,24 +129,26 @@ scanDatabaseTable() { scanDatabaseTable "${domainQuery}" "whitelist" "${exact}" scanDatabaseTable "${domainQuery}" "blacklist" "${exact}" -# Scan Wildcards -if [[ -e "${wildcardlist}" ]]; then - # Determine all subdomains, domain and TLDs - mapfile -t wildcards <<< "$(processWildcards "${domainQuery}")" - for match in "${wildcards[@]}"; do - # Search wildcard list for matches - mapfile -t results <<< "$(scanList "${match}" "${wildcardlist}" "wc")" - if [[ -n "${results[*]}" ]]; then - if [[ -z "${wcMatch:-}" ]] && [[ -z "${blockpage}" ]]; then - wcMatch=true - echo " ${matchType^} found in ${COL_BOLD}Wildcards${COL_NC}:" - fi - case "${blockpage}" in - true ) echo "π ${wildcardlist##*/}"; exit 0;; - * ) echo " *.${match}";; - esac +# Scan Regex +if [[ -e "${regexlist}" ]]; then + # Return portion(s) of string that is found in the regex list + mapfile -t results <<< "$(scanList "${domainQuery}" "${regexlist}" "rx")" + + if [[ -n "${results[*]}" ]]; then + # A result is found + str="Phrase ${matchType}ed within ${COL_BOLD}regex list${COL_NC}" + result="${COL_BOLD}$(printf '%s\n' ${results[*]})${COL_NC}" + + if [[ -z "${blockpage}" ]]; then + wcMatch=true + echo " $str" fi - done + + case "${blockpage}" in + true ) echo "π ${regexlist##*/}"; exit 0;; + * ) awk '{print " "$0}' <<< "${result}";; + esac + fi fi # Get version sorted *.domains filenames (without dir path) From 97df6d7415bfc2daf10b401a8c8712a47af0ab2d Mon Sep 17 00:00:00 2001 From: MMotti Date: Mon, 3 Jun 2019 19:55:29 +0100 Subject: [PATCH 2/6] Stickler fix Signed-off-by: MMotti --- advanced/Scripts/query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index 840c03da..827f97a3 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -137,7 +137,7 @@ if [[ -e "${regexlist}" ]]; then if [[ -n "${results[*]}" ]]; then # A result is found str="Phrase ${matchType}ed within ${COL_BOLD}regex list${COL_NC}" - result="${COL_BOLD}$(printf '%s\n' ${results[*]})${COL_NC}" + result="${COL_BOLD}$(IFS=$'\n'; echo "${results[*]}")${COL_NC}" if [[ -z "${blockpage}" ]]; then wcMatch=true From 09532638d5d6cbbf82aa44ec0e04a67e64402905 Mon Sep 17 00:00:00 2001 From: MMotti Date: Mon, 3 Jun 2019 23:59:58 +0100 Subject: [PATCH 3/6] Read from DB instead of regex.list Signed-off-by: MMotti --- advanced/Scripts/query.sh | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index 827f97a3..a4cf02b1 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -39,7 +39,7 @@ scanList(){ # shellcheck disable=SC2086 case "${type}" in "exact" ) grep -i -E -l "(^|(?/dev/null;; - "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' ${lists} <(echo "$domain") 2>/dev/null;; + "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' <(echo "$lists") <(echo "$domain") 2>/dev/null;; * ) grep -i "${esc_domain}" ${lists} /dev/null 2>/dev/null;; esac } @@ -97,8 +97,9 @@ scanDatabaseTable() { # behavior. The "ESCAPE '\'" clause specifies that an underscore preceded by an '\' should be matched # as a literal underscore character. We pretreat the $domain variable accordingly to escape underscores. case "${type}" in - "exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";; - * ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";; + "exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";; + "retrievetable" ) querystr="SELECT domain FROM vw_${table}";; + * ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";; esac # Send prepared query to gravity database @@ -108,6 +109,13 @@ scanDatabaseTable() { return fi + # If we are only retrieving the table + # Just output and return + if [[ "${type}" == "retrievetable" ]]; then + echo "${result[*]}" + return + fi + # Mark domain as having been white-/blacklist matched (global variable) wbMatch=true @@ -129,14 +137,21 @@ scanDatabaseTable() { scanDatabaseTable "${domainQuery}" "whitelist" "${exact}" scanDatabaseTable "${domainQuery}" "blacklist" "${exact}" -# Scan Regex -if [[ -e "${regexlist}" ]]; then +# Scan Regex table +regexlist=$(scanDatabaseTable "" "regex" "retrievetable") + +if [[ -n "${regexlist}" ]]; then # Return portion(s) of string that is found in the regex list mapfile -t results <<< "$(scanList "${domainQuery}" "${regexlist}" "rx")" - if [[ -n "${results[*]}" ]]; then - # A result is found - str="Phrase ${matchType}ed within ${COL_BOLD}regex list${COL_NC}" + # If a result is found + if [[ -n "${results[*]}" ]]; then + # Count the matches + regexCount=${#results[@]} + # Determine plural string + [[ $regexCount -gt 1 ]] && plu="es" + # Form output strings + str="${COL_BOLD}${regexCount}${COL_NC} ${matchType}${plu:-} found in ${COL_BOLD}regex${COL_NC} table" result="${COL_BOLD}$(IFS=$'\n'; echo "${results[*]}")${COL_NC}" if [[ -z "${blockpage}" ]]; then From b49c702f331c5bfecc2b4622039740f8b32aa247 Mon Sep 17 00:00:00 2001 From: MMotti Date: Tue, 4 Jun 2019 00:03:37 +0100 Subject: [PATCH 4/6] Consistency tweak Signed-off-by: MMotti --- advanced/Scripts/query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index a4cf02b1..f4aed05b 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -39,7 +39,7 @@ scanList(){ # shellcheck disable=SC2086 case "${type}" in "exact" ) grep -i -E -l "(^|(?/dev/null;; - "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' <(echo "$lists") <(echo "$domain") 2>/dev/null;; + "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' <(echo "${lists}") <(echo "${domain}") 2>/dev/null;; * ) grep -i "${esc_domain}" ${lists} /dev/null 2>/dev/null;; esac } From 7613e94ef6f987d12203dc13e26b5637a5e31d48 Mon Sep 17 00:00:00 2001 From: MMotti Date: Tue, 4 Jun 2019 06:06:17 +0100 Subject: [PATCH 5/6] Minor tweaks Mainly for consistency Signed-off-by: MMotti --- advanced/Scripts/query.sh | 44 +++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index f4aed05b..5b8baa63 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -12,7 +12,6 @@ # Globals piholeDir="/etc/pihole" gravityDBfile="${piholeDir}/gravity.db" -regexlist="/etc/pihole/regex.list" options="$*" adlist="" all="" @@ -23,7 +22,6 @@ matchType="match" colfile="/opt/pihole/COL_TABLE" source "${colfile}" -# Scan an array of files for matching strings # Scan an array of files for matching strings scanList(){ # Escape full stops @@ -39,7 +37,12 @@ scanList(){ # shellcheck disable=SC2086 case "${type}" in "exact" ) grep -i -E -l "(^|(?/dev/null;; - "rx" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' <(echo "${lists}") <(echo "${domain}") 2>/dev/null;; + # Create array of regexps + # Iterate through each regexp and check whether it matches the domainQuery + # If it does, print the matching regexp and continue looping + # Input 1 - regexps | Input 2 - domainQuery + "regex" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' \ + <(echo "${lists}") <(echo "${domain}") 2>/dev/null;; * ) grep -i "${esc_domain}" ${lists} /dev/null 2>/dev/null;; esac } @@ -97,9 +100,8 @@ scanDatabaseTable() { # behavior. The "ESCAPE '\'" clause specifies that an underscore preceded by an '\' should be matched # as a literal underscore character. We pretreat the $domain variable accordingly to escape underscores. case "${type}" in - "exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";; - "retrievetable" ) querystr="SELECT domain FROM vw_${table}";; - * ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";; + "exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";; + * ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";; esac # Send prepared query to gravity database @@ -109,13 +111,6 @@ scanDatabaseTable() { return fi - # If we are only retrieving the table - # Just output and return - if [[ "${type}" == "retrievetable" ]]; then - echo "${result[*]}" - return - fi - # Mark domain as having been white-/blacklist matched (global variable) wbMatch=true @@ -138,20 +133,19 @@ scanDatabaseTable "${domainQuery}" "whitelist" "${exact}" scanDatabaseTable "${domainQuery}" "blacklist" "${exact}" # Scan Regex table -regexlist=$(scanDatabaseTable "" "regex" "retrievetable") +mapfile -t regexlist <<< "$(sqlite3 "${gravityDBfile}" "SELECT domain FROM vw_regex" 2> /dev/null)" +# Split results over new line and store in a string +# ready for processing +str_regexlist=$(IFS=$'\n'; echo "${regexlist[*]}") +# If there are regexps in the DB +if [[ -n "${str_regexlist}" ]]; then + # Return any regexps that match the domainQuery + mapfile -t results <<< "$(scanList "${domainQuery}" "${str_regexlist}" "regex")" -if [[ -n "${regexlist}" ]]; then - # Return portion(s) of string that is found in the regex list - mapfile -t results <<< "$(scanList "${domainQuery}" "${regexlist}" "rx")" - - # If a result is found + # If there are matches to the domain query if [[ -n "${results[*]}" ]]; then - # Count the matches - regexCount=${#results[@]} - # Determine plural string - [[ $regexCount -gt 1 ]] && plu="es" # Form output strings - str="${COL_BOLD}${regexCount}${COL_NC} ${matchType}${plu:-} found in ${COL_BOLD}regex${COL_NC} table" + str="${matchType^} found in ${COL_BOLD}regex list${COL_NC}" result="${COL_BOLD}$(IFS=$'\n'; echo "${results[*]}")${COL_NC}" if [[ -z "${blockpage}" ]]; then @@ -160,7 +154,7 @@ if [[ -n "${regexlist}" ]]; then fi case "${blockpage}" in - true ) echo "π ${regexlist##*/}"; exit 0;; + true ) echo "π regex list"; exit 0;; * ) awk '{print " "$0}' <<< "${result}";; esac fi From cf21efa10350cfbb257d0428c6be08594e8139ae Mon Sep 17 00:00:00 2001 From: MMotti Date: Wed, 5 Jun 2019 14:36:43 +0100 Subject: [PATCH 6/6] Minor grammar changes Signed-off-by: MMotti --- advanced/Scripts/query.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/advanced/Scripts/query.sh b/advanced/Scripts/query.sh index 5b8baa63..9134dc0f 100755 --- a/advanced/Scripts/query.sh +++ b/advanced/Scripts/query.sh @@ -145,7 +145,7 @@ if [[ -n "${str_regexlist}" ]]; then # If there are matches to the domain query if [[ -n "${results[*]}" ]]; then # Form output strings - str="${matchType^} found in ${COL_BOLD}regex list${COL_NC}" + str="${matchType^} found in ${COL_BOLD}Regex list${COL_NC}" result="${COL_BOLD}$(IFS=$'\n'; echo "${results[*]}")${COL_NC}" if [[ -z "${blockpage}" ]]; then @@ -154,7 +154,7 @@ if [[ -n "${str_regexlist}" ]]; then fi case "${blockpage}" in - true ) echo "π regex list"; exit 0;; + true ) echo "π Regex list"; exit 0;; * ) awk '{print " "$0}' <<< "${result}";; esac fi