2.2 BeautifulSoup 解析网页: CSS
Last updated
Last updated
from bs4 import BeautifulSoup
from urllib.request import urlopen
# if has Chinese, apply decode()
html = urlopen("https://morvanzhou.github.io/static/scraping/list.html").read().decode('utf-8')
print(html)<head>
...
<style>
.jan {
background-color: yellow;
}
...
.month {
color: red;
}
</style>
</head>
<body>
...
<ul>
<li class="month">一月</li>
<ul class="jan">
<li>一月一号</li>
<li>一月二号</li>
<li>一月三号</li>
</ul>
...
</ul>
</body>soup = BeautifulSoup(html, features='lxml')
# use class to narrow search
month = soup.find_all('li', {"class": "month"})
for m in month:
print(m.get_text())
"""
一月
二月
三月
四月
五月
"""jan = soup.find('ul', {"class": 'jan'})
d_jan = jan.find_all('li') # use jan as a parent
for d in d_jan:
print(d.get_text())
"""
一月一号
一月二号
一月三号
"""