fix charset from response header

there are can be headers withous charset, like
Content-Type: text/html
This commit is contained in:
Maks Snegov 2013-11-09 22:39:35 +04:00
parent 5818b0e096
commit 67b7dc81e9

View File

@ -29,10 +29,12 @@ def get_page(url):
conn.request("GET", up.path, None, headers) conn.request("GET", up.path, None, headers)
response = conn.getresponse() response = conn.getresponse()
# determine page charset # get page charset from response header
contenttype = response.getheader('Content-Type') contenttype = response.getheader('Content-Type')
if contenttype: if contenttype:
charset = contenttype.split('; ')[1].split('=')[1] ct_spl = contenttype.split('; ')
if len(ct_spl) > 1:
charset = ct_spl[1].split('=')[1]
page_binary = response.read() page_binary = response.read()
page = page_binary.decode(charset) page = page_binary.decode(charset)