add gzip support

2014-06-17 22:31:02 +04:00
parent 2666d7911a
commit ae4a9b986e
1 changed files with 11 additions and 2 deletions
--- a/nevernote.py
+++ b/nevernote.py
@@ -5,6 +5,7 @@ import http.client
 import html.parser
 import sys
 from urllib.parse import urlparse
 import zlib
 class TitleParser(html.parser.HTMLParser):
@@ -52,12 +53,20 @@ def get_page(url):
    if not c_type.startswith('text'):
        raise ValueError('incorrect Content-Type for HTML page: %s' % c_type)
    c_encoding = response.getheader('Content-Encoding')
    if c_encoding:
        if c_encoding == 'gzip':
            page_binary = zlib.decompress(response.read(), 16+zlib.MAX_WBITS)
        else:
            raise NotImplementedError(
                'content encoding %s is not implemented' % c_encoding)
    else:
        page_binary = response.read()
    charset = 'iso-8859-1'
    ct_spl = c_type.split('; ')
    if len(ct_spl) > 1:
        charset = ct_spl[1].split('=')[1]
    page_binary = response.read()
    page = page_binary.decode(charset)
    return page