Refactor code
This commit is contained in:
parent
44b8a17841
commit
91cddfab7c
27
nevernote.py
27
nevernote.py
@ -10,8 +10,6 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
class UrlDuplicateError(Exception): pass
|
|
||||||
URLDUP = re.compile(r'^<!-- URL: (.*) -->$')
|
URLDUP = re.compile(r'^<!-- URL: (.*) -->$')
|
||||||
|
|
||||||
|
|
||||||
@ -48,8 +46,8 @@ def get_text(url):
|
|||||||
return response.text
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
def embedded_image(url):
|
def get_embedded_binary(url):
|
||||||
'''Download content from URL and return bytes if target is image'''
|
"""Download content from URL and return bytes if target is image"""
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
ctype = response.headers.get('Content-Type')
|
ctype = response.headers.get('Content-Type')
|
||||||
@ -59,17 +57,19 @@ def embedded_image(url):
|
|||||||
|
|
||||||
|
|
||||||
def embed_pictures(page, pict_urls, base_url=None):
|
def embed_pictures(page, pict_urls, base_url=None):
|
||||||
|
"""Write all pictures in HTML file"""
|
||||||
for url in pict_urls:
|
for url in pict_urls:
|
||||||
print('New picture: %s' % url)
|
print('New picture: %s' % url)
|
||||||
try:
|
try:
|
||||||
page = page.replace(
|
page = page.replace(
|
||||||
url, embedded_image(complete_url(url, base_url)))
|
url, get_embedded_binary(complete_url(url, base_url)))
|
||||||
except requests.exceptions.HTTPError:
|
except requests.exceptions.HTTPError:
|
||||||
pass
|
pass
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
|
||||||
def embed_css(page, css_urls, base_url=None):
|
def embed_css(page, css_urls, base_url=None):
|
||||||
|
"""Write all CSS's in HTML file"""
|
||||||
for url in css_urls:
|
for url in css_urls:
|
||||||
if not url:
|
if not url:
|
||||||
continue
|
continue
|
||||||
@ -83,18 +83,20 @@ def embed_css(page, css_urls, base_url=None):
|
|||||||
|
|
||||||
|
|
||||||
def embed_scripts(page, script_urls, base_url=None):
|
def embed_scripts(page, script_urls, base_url=None):
|
||||||
|
"""Write all scripts in HTML file"""
|
||||||
for url in script_urls:
|
for url in script_urls:
|
||||||
print('New script: %s' % url)
|
print('New script: %s' % url)
|
||||||
try:
|
try:
|
||||||
page = page.replace(
|
page = page.replace(
|
||||||
url, embedded_image(complete_url(url, base_url)))
|
url, get_embedded_binary(complete_url(url, base_url)))
|
||||||
except requests.exceptions.HTTPError:
|
except requests.exceptions.HTTPError:
|
||||||
pass
|
pass
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
|
||||||
def url_duplicate(url):
|
def url_duplicate(url):
|
||||||
for htmlfile in os.listdir():
|
"""Check if url was already downloaded"""
|
||||||
|
for htmlfile in os.listdir(path='.'):
|
||||||
if not htmlfile.endswith('.html'):
|
if not htmlfile.endswith('.html'):
|
||||||
continue
|
continue
|
||||||
with open(htmlfile) as h:
|
with open(htmlfile) as h:
|
||||||
@ -105,6 +107,7 @@ def url_duplicate(url):
|
|||||||
|
|
||||||
|
|
||||||
def write_file(page, title, comment=None):
|
def write_file(page, title, comment=None):
|
||||||
|
"""Save HTML to file on a disk"""
|
||||||
write_inc = lambda i: '_%d' % i if i > 1 else ''
|
write_inc = lambda i: '_%d' % i if i > 1 else ''
|
||||||
inc = 0
|
inc = 0
|
||||||
while True:
|
while True:
|
||||||
@ -120,7 +123,8 @@ def write_file(page, title, comment=None):
|
|||||||
a_file.write(page)
|
a_file.write(page)
|
||||||
|
|
||||||
|
|
||||||
def complete_url(url, base_url):
|
def complete_url(url, base_url=None):
|
||||||
|
"""Create absolute URL from relative paths"""
|
||||||
base_up = urlparse(base_url)
|
base_up = urlparse(base_url)
|
||||||
if base_url is not None:
|
if base_url is not None:
|
||||||
up = urlparse(url)
|
up = urlparse(url)
|
||||||
@ -132,6 +136,7 @@ def complete_url(url, base_url):
|
|||||||
|
|
||||||
|
|
||||||
def process_url(url):
|
def process_url(url):
|
||||||
|
"""Save single URL to a file"""
|
||||||
print('Processing URL: %s' % url)
|
print('Processing URL: %s' % url)
|
||||||
try:
|
try:
|
||||||
url_duplicate(url)
|
url_duplicate(url)
|
||||||
@ -158,7 +163,7 @@ def main():
|
|||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Nevernote - download pages locally.')
|
description='Nevernote - download pages locally.')
|
||||||
parser.add_argument('urls', metavar='URL', type=str, nargs='+',
|
parser.add_argument('urls', metavar='URL', type=str, nargs='+',
|
||||||
help='URL of page to download')
|
help='URL of page to download')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
for arg in args.urls:
|
for arg in args.urls:
|
||||||
@ -170,5 +175,9 @@ def main():
|
|||||||
process_url(arg)
|
process_url(arg)
|
||||||
|
|
||||||
|
|
||||||
|
class UrlDuplicateError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user