use set of content-types for checking

This commit is contained in:
Maks Snegov 2014-07-23 08:45:12 +04:00
parent fbf52e9544
commit cf626546e7

View File

@ -54,7 +54,7 @@ def charset_header(content_type):
return None return None
def get_text(url, content='text/html', charset='utf-8'): def get_text(url, content={'text/html'}, charset='utf-8'):
response = urlopen(url) response = urlopen(url)
if response.status != 200: if response.status != 200:
raise urllib.error.HTTPError( raise urllib.error.HTTPError(
@ -65,7 +65,10 @@ def get_text(url, content='text/html', charset='utf-8'):
ctype = response.headers.get('content-type') ctype = response.headers.get('content-type')
if ctype is None: if ctype is None:
raise RuntimeError('None content type for %s' % url) raise RuntimeError('None content type for %s' % url)
if not ctype.startswith(content): for cnt in content:
if ctype.startswith(cnt):
break
else:
raise RuntimeError('Incorrect content-type for %s: %s' % (url, ctype)) raise RuntimeError('Incorrect content-type for %s: %s' % (url, ctype))
# get charset from 'Content-type' header # get charset from 'Content-type' header
@ -118,7 +121,7 @@ def embed_css(page, css_urls, base_url=None):
css_start = page.rindex('<', 0, page.index(url)) css_start = page.rindex('<', 0, page.index(url))
css_end = page.index('>', css_start) + 1 css_end = page.index('>', css_start) + 1
css_tag = ('<style media="screen" type="text/css">%s</style>' % get_text( css_tag = ('<style media="screen" type="text/css">%s</style>' % get_text(
complete_url(url, base_url), content='text/css',charset=base_char)) complete_url(url, base_url), content={'text/css'}, charset=base_char))
page = page[:css_start] + css_tag + page[css_end:] page = page[:css_start] + css_tag + page[css_end:]
return page return page