set result file name by page title
This commit is contained in:
parent
fe61491292
commit
6cbfec5067
11
nevernote.py
11
nevernote.py
@ -4,6 +4,7 @@ import argparse
|
|||||||
import http.client
|
import http.client
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
def get_page(url):
|
def get_page(url):
|
||||||
@ -28,6 +29,8 @@ def get_page(url):
|
|||||||
|
|
||||||
conn.request("GET", up.path, None, headers)
|
conn.request("GET", up.path, None, headers)
|
||||||
response = conn.getresponse()
|
response = conn.getresponse()
|
||||||
|
|
||||||
|
# follow redirects
|
||||||
if (response.status == http.client.MOVED_PERMANENTLY) \
|
if (response.status == http.client.MOVED_PERMANENTLY) \
|
||||||
or (response.status == http.client.FOUND):
|
or (response.status == http.client.FOUND):
|
||||||
new_url = response.getheader('Location')
|
new_url = response.getheader('Location')
|
||||||
@ -47,8 +50,14 @@ def get_page(url):
|
|||||||
return page
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
def get_title(page):
|
||||||
|
soup = BeautifulSoup(page)
|
||||||
|
return soup.title.string
|
||||||
|
|
||||||
|
|
||||||
def write_file(page):
|
def write_file(page):
|
||||||
with open('tmp.html', 'w') as a_file:
|
fname = get_title(page) + '.html'
|
||||||
|
with open(fname, 'w') as a_file:
|
||||||
a_file.write(page)
|
a_file.write(page)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user