add url as comment to saved pages

This commit is contained in:
Maks Snegov 2014-06-21 20:23:25 +04:00
parent e2009e7f08
commit 5837451ed7

View File

@ -3,6 +3,7 @@
import argparse import argparse
import http.client import http.client
import html.parser import html.parser
import os.path
import sys import sys
from urllib.parse import urlparse from urllib.parse import urlparse
import zlib import zlib
@ -74,21 +75,23 @@ def get_page(url):
return page return page
def write_file(page): def write_file(page, comment=None):
parser = TitleParser(strict=False) parser = TitleParser(strict=False)
parser.feed(page) parser.feed(page)
fname = parser.title.replace('/', '_') + '.html' fname = parser.title.replace('/', '_') + '.html'
inc = 1 inc = 1
while True: while True:
try: if not os.path.exists(fname):
with open(fname, 'x') as a_file: break
print('Saving in file "%s"' % fname) inc += 1
a_file.write(page) fname = parser.title.replace('/', '_') + '_%d.html' % inc
break
except FileExistsError: with open(fname, 'x', newline='\n') as a_file:
inc += 1 print('Saving in file "%s"' % fname)
fname = parser.title.replace('/', '_') + '_%d.html' % inc if comment:
a_file.write('<!-- URL: %s -->' % comment)
a_file.write(page)
def main(): def main():
@ -101,7 +104,7 @@ def main():
for url in args.urls: for url in args.urls:
page = get_page(url) page = get_page(url)
write_file(page) write_file(page, comment=url)
if __name__ == '__main__': if __name__ == '__main__':