Merge pull request #760 from matrix-org/matthew/preview_url_ip_whitelist

add a url_preview_ip_range_whitelist config param
This commit is contained in:
Matthew Hodgson 2016-05-16 13:13:26 +01:00
commit 2d98c960ec
4 changed files with 57 additions and 41 deletions

View file

@ -100,8 +100,13 @@ class ContentRepositoryConfig(Config):
"to work" "to work"
) )
if "url_preview_url_blacklist" in config: self.url_preview_ip_range_whitelist = IPSet(
self.url_preview_url_blacklist = config["url_preview_url_blacklist"] config.get("url_preview_ip_range_whitelist", ())
)
self.url_preview_url_blacklist = config.get(
"url_preview_url_blacklist", ()
)
def default_config(self, **kwargs): def default_config(self, **kwargs):
media_store = self.default_path("media_store") media_store = self.default_path("media_store")
@ -162,6 +167,15 @@ class ContentRepositoryConfig(Config):
# - '10.0.0.0/8' # - '10.0.0.0/8'
# - '172.16.0.0/12' # - '172.16.0.0/12'
# - '192.168.0.0/16' # - '192.168.0.0/16'
#
# List of IP address CIDR ranges that the URL preview spider is allowed
# to access even if they are specified in url_preview_ip_range_blacklist.
# This is useful for specifying exceptions to wide-ranging blacklisted
# target IP ranges - e.g. for enabling URL previews for a specific private
# website only visible in your network.
#
# url_preview_ip_range_whitelist:
# - '192.168.1.1'
# Optional list of URL matches that the URL preview spider is # Optional list of URL matches that the URL preview spider is
# denied from accessing. You should use url_preview_ip_range_blacklist # denied from accessing. You should use url_preview_ip_range_blacklist

View file

@ -380,13 +380,14 @@ class CaptchaServerHttpClient(SimpleHttpClient):
class SpiderEndpointFactory(object): class SpiderEndpointFactory(object):
def __init__(self, hs): def __init__(self, hs):
self.blacklist = hs.config.url_preview_ip_range_blacklist self.blacklist = hs.config.url_preview_ip_range_blacklist
self.whitelist = hs.config.url_preview_ip_range_whitelist
self.policyForHTTPS = hs.get_http_client_context_factory() self.policyForHTTPS = hs.get_http_client_context_factory()
def endpointForURI(self, uri): def endpointForURI(self, uri):
logger.info("Getting endpoint for %s", uri.toBytes()) logger.info("Getting endpoint for %s", uri.toBytes())
if uri.scheme == "http": if uri.scheme == "http":
return SpiderEndpoint( return SpiderEndpoint(
reactor, uri.host, uri.port, self.blacklist, reactor, uri.host, uri.port, self.blacklist, self.whitelist,
endpoint=TCP4ClientEndpoint, endpoint=TCP4ClientEndpoint,
endpoint_kw_args={ endpoint_kw_args={
'timeout': 15 'timeout': 15
@ -395,7 +396,7 @@ class SpiderEndpointFactory(object):
elif uri.scheme == "https": elif uri.scheme == "https":
tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port)
return SpiderEndpoint( return SpiderEndpoint(
reactor, uri.host, uri.port, self.blacklist, reactor, uri.host, uri.port, self.blacklist, self.whitelist,
endpoint=SSL4ClientEndpoint, endpoint=SSL4ClientEndpoint,
endpoint_kw_args={ endpoint_kw_args={
'sslContextFactory': tlsPolicy, 'sslContextFactory': tlsPolicy,

View file

@ -79,12 +79,13 @@ class SpiderEndpoint(object):
"""An endpoint which refuses to connect to blacklisted IP addresses """An endpoint which refuses to connect to blacklisted IP addresses
Implements twisted.internet.interfaces.IStreamClientEndpoint. Implements twisted.internet.interfaces.IStreamClientEndpoint.
""" """
def __init__(self, reactor, host, port, blacklist, def __init__(self, reactor, host, port, blacklist, whitelist,
endpoint=TCP4ClientEndpoint, endpoint_kw_args={}): endpoint=TCP4ClientEndpoint, endpoint_kw_args={}):
self.reactor = reactor self.reactor = reactor
self.host = host self.host = host
self.port = port self.port = port
self.blacklist = blacklist self.blacklist = blacklist
self.whitelist = whitelist
self.endpoint = endpoint self.endpoint = endpoint
self.endpoint_kw_args = endpoint_kw_args self.endpoint_kw_args = endpoint_kw_args
@ -93,7 +94,10 @@ class SpiderEndpoint(object):
address = yield self.reactor.resolve(self.host) address = yield self.reactor.resolve(self.host)
from netaddr import IPAddress from netaddr import IPAddress
if IPAddress(address) in self.blacklist: ip_address = IPAddress(address)
if ip_address in self.blacklist:
if self.whitelist is None or ip_address not in self.whitelist:
raise ConnectError( raise ConnectError(
"Refusing to spider blacklisted IP address %s" % address "Refusing to spider blacklisted IP address %s" % address
) )

View file

@ -56,7 +56,6 @@ class PreviewUrlResource(Resource):
self.client = SpiderHttpClient(hs) self.client = SpiderHttpClient(hs)
self.media_repo = media_repo self.media_repo = media_repo
if hasattr(hs.config, "url_preview_url_blacklist"):
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
# simple memory cache mapping urls to OG metadata # simple memory cache mapping urls to OG metadata
@ -86,8 +85,6 @@ class PreviewUrlResource(Resource):
else: else:
ts = self.clock.time_msec() ts = self.clock.time_msec()
# impose the URL pattern blacklist
if hasattr(self, "url_preview_url_blacklist"):
url_tuple = urlparse.urlsplit(url) url_tuple = urlparse.urlsplit(url)
for entry in self.url_preview_url_blacklist: for entry in self.url_preview_url_blacklist:
match = True match = True