use default charset utf-8 if not set in headers

This commit is contained in:
Maks Snegov 2014-07-20 13:31:20 +04:00
parent 45f30ca9de
commit 514b39d287

View File

@ -45,9 +45,11 @@ def get_text(url, content='text/html'):
raise RuntimeError('None content type for %s' % url)
if not ctype.startswith(content):
raise RuntimeError('Incorrect content-type for %s: %s' % (url, ctype))
encoding = ctype.split(';')[1].split('=')[1].lower()
# get charset from 'Content-type' header
charset = ctype.split(';')[1].split('=')[1] if 'charset' in ctype else 'utf-8'
data = u.read()
page = data.decode(encoding)
page = data.decode(charset.lower())
return page