import time
import random
from bs4 import BeautifulSoup as bs
import urllib
import urllib.request as url
html = urllib.request.urlopen('https://www.yelp.com/biz/the-stillery-nashville?osq=Restaurants+Nashville+Tn').read().decode('utf-8')
soup = bs(html, 'html.parser')
relevant= soup.find_all('p', class_='comment__09f24__gu0rG css-qgunke')
for div in relevant:
for html_class in div.find_all('span',class_="raw__09f24__T4Ezm"):
text = html_class.find('span')
review = html_class.getText()
print(review)
这个有效。但是这个不有效。我不明白为什么第二个不起作用。
import time
import random
from bs4 import BeautifulSoup
import urllib
import urllib.request as url
import html
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
url='https://www.yelp.com/biz/the-hampton-social-nashville-nashville-2?osq=Restaurants+Nashville+Tn'
response=requests.get(url, headers=headers)
soup2 = BeautifulSoup(response.text, 'html.parser')
relevant2= soup2.find_all('p', class_='comment__09f24__gu0rG css-qgunke')
for div in relevant2:
for html_class in div.find_all('span',class_="raw__09f24__T4Ezm"):
#text = html_class.find('span')
review2 = html_class.get_text()
print(review2)
我期待得到评论。
我尝试了上面列出的从Yelp数据集中抓取餐馆的代码
1条答案
按热度按时间jc3wubiy1#
从这里https://stackoverflow.com/a/38114548
使用
requests.get(url, headers=headers, verify=False)