skip http error pages
This commit is contained in:
parent
09346f4a70
commit
fb3870e9dd
@ -157,12 +157,18 @@ def process_url(url):
|
||||
except UrlDuplicateError as e:
|
||||
print(e)
|
||||
return
|
||||
|
||||
try:
|
||||
page = get_text(url)
|
||||
parser = TitleParser(strict=False)
|
||||
parser.feed(page)
|
||||
|
||||
page = embed_pictures(page, parser.images, base_url=url)
|
||||
page = embed_css(page, parser.css, base_url=url)
|
||||
except urllib.error.HTTPError as e:
|
||||
print('Error with URL "%s": %s' % (url,e))
|
||||
return False
|
||||
|
||||
write_file(page, parser.title, comment=url)
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user