整個處理原理:
- 使用 Selenium 去偵測網頁的狀態,取得登入要用的帳號, 密碼, 認證碼圖片, 認證碼數值, 登入按鈕
- 使用 ChatGoogleGenerativeAI/gemini-pro-vision 分析圖片內容,設法分析出認證碼數值
- 觸發 登入按鈕 送出表單
- 檢視登入流程,檢查是否有登入失敗的訊息,或是反過來思考怎樣判斷登入成功,若登入失敗重回 (1) 去取得新的認證碼圖片
引入的函式庫:
import getpass
import os
import sys
import time
import json
import base64
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI
先採用 undetected_chromedriver 來包裝一下取得 browser driver:
def getBrowserDriver():
option = uc.ChromeOptions()
option.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36')
#option.add_argument('--window-size=%d,%d' % self.res)
#option.add_argument('--headless')
driver = uc.Chrome(options=option)
return driver
辨別圖片文字靠 ChatGoogleGenerativeAI model="gemini-pro-vision":
def codeDetection(imageBase64URL: str):
llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
message = HumanMessage(
content=[
{
"type": "text",
"text": "Please identify the English or numbers appearing in the image. The output format is 'The answer is: XXXX'"
,
},
{"type": "image_url", "image_url": imageBase64URL},
]
)
result = llm.invoke([message])
return result
處理流程:
if __name__ == '__main__':
if "GOOGLE_API_KEY" not in os.environ:
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key: ")
if not os.environ["GOOGLE_API_KEY"]:
print('ERROR, no GOOGLE_API_KEY info')
sys.exit(1)
output = {
'status': False,
'time': [],
}
browser = getBrowserDriver()
start_time = time.time()
browser.get(LOGIN_URL)
# 15s timeout
wait = WebDriverWait(browser, 15)
# 等待關鍵的表單資料
conditions = [
EC.presence_of_element_located((By.ID, "input_user")),
EC.presence_of_element_located((By.ID, "input_password")),
EC.presence_of_element_located((By.ID, "input_velidation_code")),
EC.presence_of_element_located((By.ID, "velidation_code_image")),
EC.presence_of_element_located((By.ID, "login_button")),
]
if wait.until(lambda driver: all(condition(driver) for condition in conditions)):
output['status'] = True
output['time'].append( time.time() - start_time )
# 取得圖片元素
imageElement = wait.until(EC.presence_of_element_located((By.ID, "velidation_code_image")))
# 取得圖片的 HTML code
imageHTMLCode = imageElement.get_attribute("outerHTML")
print("Image HTML Code:", imageHTMLCode)
# 取得圖片的 URL
imageSrcURL = imageElement.get_attribute("src")
print("Image URL:", imageSrcURL)
# 透過 JavaScript 監聽 src 屬性變化
script = f"""
var target = document.getElementById('velidation_code_image');
var observer = new MutationObserver(function(mutations) {{
mutations.forEach(function(mutation) {{
if (mutation.attributeName === 'src') {{
console.log('src attribute changed:', target.getAttribute('src'));
}}
}});
}});
var config = {{ attributes: true }};
observer.observe(target, config);
"""
# 執行 JavaScript 代碼
browser.execute_script(script)
# 等待一段時間,確保有足夠的時間監聽 src 屬性的變化
time.sleep(5)
# 取得更新後的圖片的 URL
updatedImageSrc = imageElement.get_attribute("src")
print("Updated Image URL:", updatedImageSrc)
if not updatedImageSrc:
print('ERROR, velidation_code_image not found')
sys.exit(1)
result = codeDetection(updatedImageSrc)
print(result.content)
loginCode = ''
for c in result.content.split(':', 2)[1]:
if c == '' or c == ' ':
continue
loginCode += c
print(f"LoginCode: {loginCode}")
element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))
element.send_keys('YourAccountName')
element = wait.until(EC.presence_of_element_located((By.ID, "input_password")))
element.send_keys('YourPassword')
element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))
element.send_keys(loginCode)
element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))
start_time = time.time()
element.click()
loginDone = False
loginRetry = 0
while loginDone == False and loginRetry <= 3:
try:
wait = WebDriverWait(browser, 5)
element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessage")))
div_element = element.find_element(By.TAG_NAME, "div")
span_element = div_element.find_element(By.TAG_NAME, "span")
inner_html = span_element.get_attribute('innerHTML')
# 驗證碼輸入錯誤
print(f"retry: {loginRetry}, inner HTML: {inner_html}")
# 關閉錯誤訊息
element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessageWindow")))
div_element = element.find_element(By.TAG_NAME, "div")
button_element = element.find_element(By.TAG_NAME, "button")
button_element.click()
loginRetry += 1
updatedImageSrc = imageElement.get_attribute("src")
print("Updated Image URL:", updatedImageSrc)
result = codeDetection(updatedImageSrc)
print(result.content)
loginCode = ''
for c in result.content.split(':', 2)[1]:
if c == '' or c == ' ':
continue
loginCode += c
print(f"LoginCode: {loginCode}")
element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))
element.clear()
element.send_keys('YourAccountName')
time.sleep(1)
element = wait.until(EC.presence_of_element_located((By.ID, "input_password")))
element.clear()
element.send_keys('YourPassword')
time.sleep(1)
element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))
element.clear()
element.send_keys(loginCode)
time.sleep(1)
element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))
element.click()
except:
loginDone = True
output['time'].append( time.time() - start_time )
if loginDone:
print("Login Successful")
else:
print(f"Login Failed with retry times: {loginRetry}")
print(json.dumps(output, indent=4))
while True:
time.sleep(1)