有些工作任務需要去下載表單做一些自動化應用,因此有了要自動登入的需求,當然也會碰到認證碼辨識問題。此篇是延續 Python 開發筆記 - 使用 Google AI, Generative Language API, gemini-pro-vision 辨識圖片認證碼 。
整個處理原理:
- 使用 Selenium 去偵測網頁的狀態,取得登入要用的帳號, 密碼, 認證碼圖片, 認證碼數值, 登入按鈕
- 使用 ChatGoogleGenerativeAI/gemini-pro-vision 分析圖片內容,設法分析出認證碼數值
- 觸發 登入按鈕 送出表單
- 檢視登入流程,檢查是否有登入失敗的訊息,或是反過來思考怎樣判斷登入成功,若登入失敗重回 (1) 去取得新的認證碼圖片
引入的函式庫:
import getpassimport osimport sysimport timeimport jsonimport base64import undetected_chromedriver as ucfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.ui import WebDriverWaitfrom langchain_core.messages import HumanMessagefrom langchain_google_genai import ChatGoogleGenerativeAI
先採用 undetected_chromedriver 來包裝一下取得 browser driver:
def getBrowserDriver():option = uc.ChromeOptions()option.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36')#option.add_argument('--window-size=%d,%d' % self.res)#option.add_argument('--headless')driver = uc.Chrome(options=option)return driver
辨別圖片文字靠 ChatGoogleGenerativeAI model="gemini-pro-vision":
def codeDetection(imageBase64URL: str):llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")message = HumanMessage(content=[{"type": "text","text": "Please identify the English or numbers appearing in the image. The output format is 'The answer is: XXXX'",},{"type": "image_url", "image_url": imageBase64URL},])result = llm.invoke([message])return result
處理流程:
if __name__ == '__main__':if "GOOGLE_API_KEY" not in os.environ:os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key: ")if not os.environ["GOOGLE_API_KEY"]:print('ERROR, no GOOGLE_API_KEY info')sys.exit(1)output = {'status': False,'time': [],}browser = getBrowserDriver()start_time = time.time()browser.get(LOGIN_URL)# 15s timeoutwait = WebDriverWait(browser, 15)# 等待關鍵的表單資料conditions = [EC.presence_of_element_located((By.ID, "input_user")),EC.presence_of_element_located((By.ID, "input_password")),EC.presence_of_element_located((By.ID, "input_velidation_code")),EC.presence_of_element_located((By.ID, "velidation_code_image")),EC.presence_of_element_located((By.ID, "login_button")),]if wait.until(lambda driver: all(condition(driver) for condition in conditions)):output['status'] = Trueoutput['time'].append( time.time() - start_time )# 取得圖片元素imageElement = wait.until(EC.presence_of_element_located((By.ID, "velidation_code_image")))# 取得圖片的 HTML codeimageHTMLCode = imageElement.get_attribute("outerHTML")print("Image HTML Code:", imageHTMLCode)# 取得圖片的 URLimageSrcURL = imageElement.get_attribute("src")print("Image URL:", imageSrcURL)# 透過 JavaScript 監聽 src 屬性變化script = f"""var target = document.getElementById('velidation_code_image');var observer = new MutationObserver(function(mutations) {{mutations.forEach(function(mutation) {{if (mutation.attributeName === 'src') {{console.log('src attribute changed:', target.getAttribute('src'));}}}});}});var config = {{ attributes: true }};observer.observe(target, config);"""# 執行 JavaScript 代碼browser.execute_script(script)
# 等待一段時間,確保有足夠的時間監聽 src 屬性的變化time.sleep(5)# 取得更新後的圖片的 URLupdatedImageSrc = imageElement.get_attribute("src")print("Updated Image URL:", updatedImageSrc)if not updatedImageSrc:print('ERROR, velidation_code_image not found')sys.exit(1)result = codeDetection(updatedImageSrc)print(result.content)loginCode = ''for c in result.content.split(':', 2)[1]:if c == '' or c == ' ':continueloginCode += cprint(f"LoginCode: {loginCode}")element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))element.send_keys('YourAccountName')element = wait.until(EC.presence_of_element_located((By.ID, "input_password")))element.send_keys('YourPassword')element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))element.send_keys(loginCode)element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))start_time = time.time()element.click()loginDone = FalseloginRetry = 0while loginDone == False and loginRetry <= 3:try:wait = WebDriverWait(browser, 5)element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessage")))div_element = element.find_element(By.TAG_NAME, "div")span_element = div_element.find_element(By.TAG_NAME, "span")inner_html = span_element.get_attribute('innerHTML')# 驗證碼輸入錯誤print(f"retry: {loginRetry}, inner HTML: {inner_html}")# 關閉錯誤訊息element = wait.until(EC.presence_of_element_located((By.ID, "WebsiteErrorMessageWindow")))div_element = element.find_element(By.TAG_NAME, "div")button_element = element.find_element(By.TAG_NAME, "button")button_element.click()loginRetry += 1updatedImageSrc = imageElement.get_attribute("src")print("Updated Image URL:", updatedImageSrc)result = codeDetection(updatedImageSrc)print(result.content)loginCode = ''for c in result.content.split(':', 2)[1]:if c == '' or c == ' ':continueloginCode += cprint(f"LoginCode: {loginCode}")element = wait.until(EC.presence_of_element_located((By.ID, "input_user")))element.clear()element.send_keys('YourAccountName')time.sleep(1)element = wait.until(EC.presence_of_element_located((By.ID, "input_password")))
element.clear()element.send_keys('YourPassword')time.sleep(1)element = wait.until(EC.presence_of_element_located((By.ID, "input_velidation_code")))element.clear()element.send_keys(loginCode)time.sleep(1)element = wait.until(EC.presence_of_element_located((By.ID, "login_button")))element.click()except:loginDone = Trueoutput['time'].append( time.time() - start_time )if loginDone:print("Login Successful")else:print(f"Login Failed with retry times: {loginRetry}")print(json.dumps(output, indent=4))while True:time.sleep(1)
沒有留言:
張貼留言