diff --git a/goose/network.py b/goose/network.py index 666a7d61..cd7cd2f6 100644 --- a/goose/network.py +++ b/goose/network.py @@ -21,6 +21,8 @@ limitations under the License. """ import urllib2 +import StringIO +import gzip class HtmlFetcher(object): @@ -48,9 +50,16 @@ def get_html(self, url): headers=self.headers) # do request try: - self.result = urllib2.urlopen( + response = urllib2.urlopen( self.request, timeout=self.config.http_timeout) + if response.info().get('Content-Encoding') == 'gzip': + buf = StringIO.StringIO(response.read()) + f = gzip.GzipFile(fileobj=buf) + self.result = f + else: + self.result = request + except Exception: self.result = None