fix charset from response header
there are can be headers withous charset, like Content-Type: text/html
This commit is contained in:
parent
5818b0e096
commit
67b7dc81e9
@ -29,10 +29,12 @@ def get_page(url):
|
|||||||
conn.request("GET", up.path, None, headers)
|
conn.request("GET", up.path, None, headers)
|
||||||
response = conn.getresponse()
|
response = conn.getresponse()
|
||||||
|
|
||||||
# determine page charset
|
# get page charset from response header
|
||||||
contenttype = response.getheader('Content-Type')
|
contenttype = response.getheader('Content-Type')
|
||||||
if contenttype:
|
if contenttype:
|
||||||
charset = contenttype.split('; ')[1].split('=')[1]
|
ct_spl = contenttype.split('; ')
|
||||||
|
if len(ct_spl) > 1:
|
||||||
|
charset = ct_spl[1].split('=')[1]
|
||||||
|
|
||||||
page_binary = response.read()
|
page_binary = response.read()
|
||||||
page = page_binary.decode(charset)
|
page = page_binary.decode(charset)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user