add url as comment to saved pages

This commit is contained in:
Maks Snegov 2014-06-21 20:23:25 +04:00
parent e2009e7f08
commit 5837451ed7

View File

@ -3,6 +3,7 @@
import argparse import argparse
import http.client import http.client
import html.parser import html.parser
import os.path
import sys import sys
from urllib.parse import urlparse from urllib.parse import urlparse
import zlib import zlib
@ -74,22 +75,24 @@ def get_page(url):
return page return page
def write_file(page): def write_file(page, comment=None):
parser = TitleParser(strict=False) parser = TitleParser(strict=False)
parser.feed(page) parser.feed(page)
fname = parser.title.replace('/', '_') + '.html' fname = parser.title.replace('/', '_') + '.html'
inc = 1 inc = 1
while True: while True:
try: if not os.path.exists(fname):
with open(fname, 'x') as a_file:
print('Saving in file "%s"' % fname)
a_file.write(page)
break break
except FileExistsError:
inc += 1 inc += 1
fname = parser.title.replace('/', '_') + '_%d.html' % inc fname = parser.title.replace('/', '_') + '_%d.html' % inc
with open(fname, 'x', newline='\n') as a_file:
print('Saving in file "%s"' % fname)
if comment:
a_file.write('<!-- URL: %s -->' % comment)
a_file.write(page)
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -101,7 +104,7 @@ def main():
for url in args.urls: for url in args.urls:
page = get_page(url) page = get_page(url)
write_file(page) write_file(page, comment=url)
if __name__ == '__main__': if __name__ == '__main__':