web-programming/solutions/python/basics/exercise8.py at master · kbalog/web-programming · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
"""
Write a function to parse a website's title (e.g., [this](http://www.pythonscraping.com/pages/page1.html)).
Use try-except to capture the exceptions like the web can not be opened or the title does not exist.
It is recommended that you make use of existing packages (BeautifulSoup or urllib.request) in this exercise.
"""
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup


def getTitle(url):
    try:
        html = urlopen(url)
    except HTTPError as e:
        return None
    try:
        bsObj = BeautifulSoup(html.read(), "lxml")
        title = bsObj.title
    except AttributeError as e:
        return None
    return title


title = getTitle("http://www.pythonscraping.com/pages/page1.html")
if title == None:
    print("Title could not be found!")
else:
    print(title)