From 514b39d28756eb56ad71be53365586a5907cd8c6 Mon Sep 17 00:00:00 2001 From: Maks Snegov Date: Sun, 20 Jul 2014 13:31:20 +0400 Subject: [PATCH] use default charset utf-8 if not set in headers --- nevernote.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nevernote.py b/nevernote.py index a913a0a..dcfeabc 100755 --- a/nevernote.py +++ b/nevernote.py @@ -45,9 +45,11 @@ def get_text(url, content='text/html'): raise RuntimeError('None content type for %s' % url) if not ctype.startswith(content): raise RuntimeError('Incorrect content-type for %s: %s' % (url, ctype)) - encoding = ctype.split(';')[1].split('=')[1].lower() + + # get charset from 'Content-type' header + charset = ctype.split(';')[1].split('=')[1] if 'charset' in ctype else 'utf-8' data = u.read() - page = data.decode(encoding) + page = data.decode(charset.lower()) return page