Python/Python Crawling

01. 뷰티풀 수프 BeautifulSoup 02

HicKee 2023. 2. 13. 23:45
import requests as requests
from bs4 import BeautifulSoup
import re

http protocol(네트워크 규약)
request(클라이언트) -> response(서버)

 

네이버 뉴스 속보

URL = 'https://news.naver.com/main/list.nhn'
res = requests.get(URL, headers={'User-Agent':'Mozilla/5.0'})
html = res.text
soup = BeautifulSoup(html, 'html.parser')
print(soup)

for i in soup.select('span[class=lede]'):
	   print(i.text.strip())
URL = 'https://news.naver.com/main/list.nhn'
res = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'})
html = res.text
soup = BeautifulSoup(html, 'html.parser')
print(soup)

# 순수 데이터 처리하기
for i in soup.select('span[class=lede]'):
    print(i)

네이버 영화 조회순 순위

 

# find: 태그를 검색하여 원하는 부분 추출
# select: tag 객체를 검색하여 추출

URL = 'https://movie.naver.com/movie/sdb/rank/rmovie.nhn'
res = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'})
html = res.text
soup = BeautifulSoup(html, 'html.parser')
# print(soup)
movielist = []
moviedic = {}

num = 1
# for i in soup.select('div.tit3 > a'):
for i in soup.select('div[class=tit3] > a'):
    # print(i)
    print(i.text)
    movielist.append(i.text)
    moviedic[num] = i.text
    num += 1

print(movielist)
print(movielist[0])

print(moviedic)

for key, value in moviedic.items():
    print(key, value)

네이버 영화 평점순 순위

URL = 'https://movie.naver.com/movie/sdb/rank/rmovie.naver?sel=cur&date=20230115'
res = requests.get(URL, headers={'User-Agent': 'Mozilla/5.0'})
html = res.text
soup = BeautifulSoup(html, 'html.parser')
# print(soup)
movieTitlelist = []
movieScorelist = []
moviedic = {}

for i in soup.select('div[class=tit5] > a'):
    # print(i)
    print(i.text)
    movieTitlelist.append(i.text)

    # print('타이틀 : ', movieTitlelist)

for j in soup.select('td[class=point]'):
    # print(i)
    print(j.text)
    movieScorelist.append(j.text)

    # print('평점 : ', movieScorelist)

for k in range(len(movieTitlelist)):
    moviedic[k] = {movieTitlelist[k]: movieScorelist[k]}

for key, value in moviedic.items():
    pkey = str(value.keys())[12:-3]
    pvalue = str(value.values())[14:-3]
    print(f'{key} {pkey} {pvalue}')

'Python > Python Crawling' 카테고리의 다른 글

04. 판다스 (pandas) 02  (0) 2023.03.05
03. 판다스 (pandas) 01  (0) 2023.02.24
02. 셀레니움 selenium 02  (0) 2023.02.19
02. 셀레니움 selenium 01  (0) 2023.02.15
01. 뷰티풀 수프 BeautifulSoup 01  (0) 2023.02.12