# modules/or_address_handler.py import pandas as pd import os import time from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC def process_addresses(driver): """ 处理 o-address.csv 文件,访问 Etherscan 并更新地址信息。 :param driver: Selenium WebDriver 实例。 """ # 定义文件路径 csv_file_path = os.path.join('data', 'o-address.csv') # 1. 读取 CSV 文件 if not os.path.exists(csv_file_path): print(f"错误: 文件未找到 at {csv_file_path}") return df = pd.read_csv(csv_file_path) print(f"成功读取 {len(df)} 条地址。") # 2. 循环处理每个地址 for index, row in df.iterrows(): address = row['address'] # 检查 main_address 是否已经有值,如果有则跳过 # 使用 pd.isna() 检查 NaN,并检查是否为空字符串 if 'main_address' in df.columns and pd.notna(row['main_address']) and row['main_address']: print(f"地址 {address} 已有 main_address,跳过。") continue # 构建 URL 并访问 url = f"https://etherscan.io/address/{address}" print(f"正在访问: {url}") driver.get(url) try: # 等待页面加载并找到 #mainaddress 元素 wait = WebDriverWait(driver, 20) # 最多等待20秒 main_address_element = wait.until( EC.presence_of_element_located((By.ID, "mainaddress")) ) # 获取并保存值 main_address_value = main_address_element.text df.loc[index, 'main_address'] = main_address_value print(f" -> 成功获取 main_address: {main_address_value}") except Exception as e: print(f" -> 无法为地址 {address} 找到 main_address。") df.loc[index, 'main_address'] = "Not Found" # 标记为未找到 # 每次处理后都保存一次,防止中途中断 df.to_csv(csv_file_path, index=False) # 短暂延时,避免请求过于频繁 time.sleep(2) # 3. 所有处理完成后,再次保存最终结果 df.to_csv(csv_file_path, index=False) print(f"\n处理完成!结果已保存到 {csv_file_path}")