2024年2月21日 星期三

Python 開發筆記 - 使用 Selenium / undetected_chromedriver / ChatGoogleGenerativeAI / gemini-pro-vision 完成自動登入網站的流程(含 retry 驗證碼架構)

有些工作任務需要去下載表單做一些自動化應用,因此有了要自動登入的需求,當然也會碰到認證碼辨識問題。此篇是延續 Python 開發筆記 - 使用 Google AI, Generative Language API, gemini-pro-vision 辨識圖片認證碼

整個處理原理:
  1. 使用 Selenium 去偵測網頁的狀態,取得登入要用的帳號, 密碼, 認證碼圖片, 認證碼數值, 登入按鈕
  2. 使用 ChatGoogleGenerativeAI/gemini-pro-vision 分析圖片內容,設法分析出認證碼數值
  3. 觸發 登入按鈕 送出表單
  4. 檢視登入流程,檢查是否有登入失敗的訊息,或是反過來思考怎樣判斷登入成功,若登入失敗重回 (1) 去取得新的認證碼圖片 
引入的函式庫:

import getpass
import os
import sys
import time
import json
import base64
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
 
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI

先採用 undetected_chromedriver 來包裝一下取得 browser driver:

def getBrowserDriver():
    option = uc.ChromeOptions()
    option.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36')
    #option.add_argument('--window-size=%d,%d' % self.res)
    #option.add_argument('--headless')
    driver = uc.Chrome(options=option)
    return driver

辨別圖片文字靠 ChatGoogleGenerativeAI model="gemini-pro-vision":

def codeDetection(imageBase64URL: str):
    llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
    message = HumanMessage(
        content=[
            {
                "type": "text",
                "text": "Please identify the English or numbers appearing in the image. The output format is 'The answer is: XXXX'"
,
            },
            {"type": "image_url", "image_url": imageBase64URL},
        ]
    )
    result = llm.invoke([message])
    return result

處理流程:

if __name__ == '__main__':
    if "GOOGLE_API_KEY" not in os.environ:
        os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key: ")
    if not os.environ["GOOGLE_API_KEY"]:
        print('ERROR, no GOOGLE_API_KEY info')
        sys.exit(1)

    output = {
        'status': False,
        'time': [],
    }

    browser = getBrowserDriver()
    start_time = time.time()

    browser.get(LOGIN_URL)

    # 15s timeout
    wait = WebDriverWait(browser, 15)

    # 等待關鍵的表單資料
    conditions = [
        EC.presence_of_element_located((By.ID, "input_user")),
        EC.presence_of_element_located((By.ID, "input_password")),
        EC.presence_of_element_located((By.ID, "input_velidation_code")),
        EC.presence_of_element_located((By.ID, "velidation_code_image")),
        EC.presence_of_element_located((By.ID, "login_button")),
    ]
    if wait.until(lambda driver: all(condition(driver) for condition in conditions)):
        output['status'] = True
    output['time'].append( time.time() - start_time )

    # 取得圖片元素
    imageElement = wait.until(EC.presence_of_element_located((By.ID, "velidation_code_image")))

    # 取得圖片的 HTML code
    imageHTMLCode = imageElement.get_attribute("outerHTML")
    print("Image HTML Code:", imageHTMLCode)

    # 取得圖片的 URL
    imageSrcURL = imageElement.get_attribute("src")
    print("Image URL:", imageSrcURL)

    # 透過 JavaScript 監聽 src 屬性變化
    script = f"""
        var target = document.getElementById('velidation_code_image');
        var observer = new MutationObserver(function(mutations) {{
            mutations.forEach(function(mutation) {{
                if (mutation.attributeName === 'src') {{
                    console.log('src attribute changed:', target.getAttribute('src'));
                }}
            }});
        }});
    
        var config = {{ attributes: true }};
        observer.observe(target, config);
    """
    
    # 執行 JavaScript 代碼
    browser.execute_script(script)

    # 等待一段時間,確保有足夠的時間監聽 src 屬性的變化
    time.sleep(5)

    # 取得更新後的圖片的 URL
    updatedImageSrc = imageElement.get_attribute("src")
    print("Updated Image URL:", updatedImageSrc)

    if not updatedImageSrc:
        print('ERROR, velidation_code_image not found')
        sys.exit(1)

    result = codeDetection(updatedImageSrc)
    print(result.content)
    loginCode = ''
    for c in result.content.split(':', 2)[1]:
        if c == '' or c == ' ':
            continue
        loginCode += c

    print(f"LoginCode: {loginCode}")
    element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))
    element.send_keys('YourAccountName')
    element = wait.until(EC.presence_of_element_located((By.ID, "input_password")))
    element.send_keys('YourPassword')
    element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))
    element.send_keys(loginCode)
    element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))

    start_time = time.time()
    element.click()

    loginDone = False
    loginRetry = 0
    while loginDone == False and loginRetry <= 3:
        try:
            wait = WebDriverWait(browser, 5)
            element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessage")))
            div_element = element.find_element(By.TAG_NAME, "div")
            span_element = div_element.find_element(By.TAG_NAME, "span")
            inner_html = span_element.get_attribute('innerHTML')
            # 驗證碼輸入錯誤
            print(f"retry: {loginRetry}, inner HTML: {inner_html}")

            # 關閉錯誤訊息
            element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessageWindow")))
            div_element = element.find_element(By.TAG_NAME, "div")
            button_element = element.find_element(By.TAG_NAME, "button")
            button_element.click()

            loginRetry += 1

            updatedImageSrc = imageElement.get_attribute("src")
            print("Updated Image URL:", updatedImageSrc)
            result = codeDetection(updatedImageSrc)
            print(result.content)
            loginCode = ''
            for c in result.content.split(':', 2)[1]:
                if c == '' or c == ' ':
                    continue
                loginCode += c

            print(f"LoginCode: {loginCode}")
            element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))
            element.clear()
            element.send_keys('YourAccountName')
            time.sleep(1)
            element = wait.until(EC.presence_of_element_located((By.ID, "input_password"))) 
            element.clear()
            element.send_keys('YourPassword')
            time.sleep(1)
            element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))
            element.clear()
            element.send_keys(loginCode)
            time.sleep(1)
            element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))
            element.click()
        except:
            loginDone = True

    output['time'].append( time.time() - start_time )

    if loginDone:
        print("Login Successful")
    else:
        print(f"Login Failed with retry times: {loginRetry}")
 
    print(json.dumps(output, indent=4))
    while True:
        time.sleep(1)

沒有留言:

張貼留言