seleniumを利用してgoogle検索結果イメージ保存

python selenium

import os
import time
from selenium import webdriver
import chromedriver_binary
from PIL import Image
import io
import requests
import hashlib
import os.path

filename = 'search_list.txt'

# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

# load a pre-defined list of photo identifiers
def load_set(filename):
    doc = load_doc(filename)
    dataset = list()
    # process line by line
    for line in doc.split('\n'):
        # skip empty lines
        if len(line) < 1:
            continue
        # get the image identifier
        identifier = line.split('.')[0]
        dataset.append(identifier)
    return set(dataset)

def run(search_keyword):
    # クリックなど動作後に待つ時間(秒)
    sleep_between_interactions = 2
    # ダウンロードする枚数
    download_num = 3
    # 検索ワード
    # query = "cat"
    query = search_keyword

    # 画像検索用のurl
    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    # サムネイル画像のURL取得
    # wd = webdriver.Chrome(executable_path=DRIVER_PATH)
    options = webdriver.ChromeOptions()
    wd = webdriver.Chrome(chrome_options=options)
    wd.get(search_url.format(q=query))
    # サムネイル画像のリンクを取得(ここでコケる場合はセレクタを実際に確認して変更する)
    thumbnail_results = wd.find_elements_by_css_selector("img.rg_i")

    # サムネイルをクリックして、各画像URLを取得
    image_urls = set()
    for img in thumbnail_results[:download_num]:
        try:
            img.click()
            time.sleep(sleep_between_interactions)
        except Exception:
            continue
        # 一発でurlを取得できないので、候補を出してから絞り込む(やり方あれば教えて下さい)
        # 'n3VNCb'は変更されることあるので、クリックした画像のエレメントをみて適宜変更する
        url_candidates = wd.find_elements_by_class_name('n3VNCb')
        for candidate in url_candidates:
            url = candidate.get_attribute('src')
            if url and 'https' in url:
                image_urls.add(url)
    # 少し待たないと正常終了しなかったので3秒追加
    time.sleep(sleep_between_interactions+3)
    wd.quit()

    # 画像のダウンロード
    image_save_folder_path = query
    if not os.path.isdir(image_save_folder_path):
        os.makedirs(image_save_folder_path)

    for url in image_urls:
        try:
            image_content = requests.get(url).content
        except Exception as e:
            print(f"ERROR - Could not download {url} - {e}")

        try:
            image_file = io.BytesIO(image_content)
            image = Image.open(image_file).convert('RGB')
            file_path = os.path.join(image_save_folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg')
            with open(file_path, 'wb') as f:
                image.save(f, "JPEG", quality=90)
            print(f"SUCCESS - saved {url} - as {file_path}")
        except Exception as e:
            print(f"ERROR - Could not save {url} - {e}")

search_keywords = load_set(filename)
for search_keyword in search_keywords:
    run(search_keyword)