1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| from bs4 import BeautifulSoup
import urllib.request import urllib.parse url='https://blog.xinhaojin.top' resp=urllib.request.urlopen(url) data=resp.read() html=data.decode()
soup=BeautifulSoup(html,'lxml')
tag=soup.find('title') print(type(tag),tag)
tags=soup.find_all('a') for tag in tags: print(tag)
tags=soup.find_all('div',attrs={'class':"entry excerpt entry-summary"}) for tag in tags: print(tag)
tags=soup.find_all(name=None,attrs={'class':"entry excerpt entry-summary"}) for tag in tags: print(tag)
print(tag['class'])
print(tag.text)
def myFilter(tag): return (tag.name=='a' and tag.has_attr("href") and tag['href']=="https://blog.xinhaojin.top/2020/08/")
tag=soup.find_all(myFilter) print(tag)
|