set result file name by page title

This commit is contained in:
Maks Snegov 2013-12-24 23:00:43 +04:00
parent fe61491292
commit 6cbfec5067

View File

@ -4,6 +4,7 @@ import argparse
import http.client import http.client
import sys import sys
from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
def get_page(url): def get_page(url):
@ -28,6 +29,8 @@ def get_page(url):
conn.request("GET", up.path, None, headers) conn.request("GET", up.path, None, headers)
response = conn.getresponse() response = conn.getresponse()
# follow redirects
if (response.status == http.client.MOVED_PERMANENTLY) \ if (response.status == http.client.MOVED_PERMANENTLY) \
or (response.status == http.client.FOUND): or (response.status == http.client.FOUND):
new_url = response.getheader('Location') new_url = response.getheader('Location')
@ -47,8 +50,14 @@ def get_page(url):
return page return page
def get_title(page):
soup = BeautifulSoup(page)
return soup.title.string
def write_file(page): def write_file(page):
with open('tmp.html', 'w') as a_file: fname = get_title(page) + '.html'
with open(fname, 'w') as a_file:
a_file.write(page) a_file.write(page)