From ae4a9b986e0c9a93ae74697eb806a6932bf420d8 Mon Sep 17 00:00:00 2001 From: Maks Snegov Date: Tue, 17 Jun 2014 22:31:02 +0400 Subject: [PATCH] add gzip support --- nevernote.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/nevernote.py b/nevernote.py index 626c570..8baeffa 100755 --- a/nevernote.py +++ b/nevernote.py @@ -5,6 +5,7 @@ import http.client import html.parser import sys from urllib.parse import urlparse +import zlib class TitleParser(html.parser.HTMLParser): @@ -52,12 +53,20 @@ def get_page(url): if not c_type.startswith('text'): raise ValueError('incorrect Content-Type for HTML page: %s' % c_type) + c_encoding = response.getheader('Content-Encoding') + if c_encoding: + if c_encoding == 'gzip': + page_binary = zlib.decompress(response.read(), 16+zlib.MAX_WBITS) + else: + raise NotImplementedError( + 'content encoding %s is not implemented' % c_encoding) + else: + page_binary = response.read() + charset = 'iso-8859-1' ct_spl = c_type.split('; ') if len(ct_spl) > 1: charset = ct_spl[1].split('=')[1] - - page_binary = response.read() page = page_binary.decode(charset) return page