본문 바로가기
● 크롤링, 자동화/Selenium

colab에서 selenium 사용하는 법

by 0ver-grow 2021. 8. 12.
반응형

Colab에선 별도로 chrome webdriver를 설치해줘야함

!pip install selenium
!apt-get update
!apt install chromium-chromedriver
from selenium import webdriver
from urllib.request import urlopen
from bs4 import BeautifulSoup as bs
from urllib.parse import quote_plus
from selenium.webdriver.common.keys import Keys
import time

url = 'https://search.naver.com/search.naver?where=image&sm=tab_jum&query='
kword = input('검색어를 입력하세요 : ')
base_url = url + quote_plus(kword)
base_url

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver', chrome_options=chrome_options)
driver.get(base_url)

body = driver.find_element_by_css_selector('body')

# 페이지 다운시켜서 더 많은 이미지가 나오게 한다
for i in range(20):
    body.send_keys(Keys.PAGE_DOWN)
    time.sleep(1)

imgs = driver.find_elements_by_css_selector('img._img')

for idx, img in enumerate(imgs):
    # print(idx,img.get_attribute('src'))
    imgUrl = img.get_attribute('src')
    imgName = '/content/drive/My Drive/Colab Notebooks/crawling/'+kword + str(idx)+'.jpg'
    urllib.request.urlretrieve(imgUrl, imgName)

자료 출처

반응형