代码优化
This commit is contained in:
96
coinbus/Macroeconomic_USTreasuriesYields_v2.py
Normal file
96
coinbus/Macroeconomic_USTreasuriesYields_v2.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import pymysql
|
||||
import time
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from w3lib.html import remove_tags
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
|
||||
# 配置 Selenium
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||
chrome_options.add_argument('blink-settings=imagesEnabled=false')
|
||||
|
||||
# 在 Selenium 3 中,直接指定 chrome_options 参数即可
|
||||
browser = webdriver.Chrome(executable_path="chromedriver", options=chrome_options)
|
||||
# 将月份映射整理成一个字典
|
||||
MONTH_MAPPING = {
|
||||
"Jan": "/1/", "Feb": "/2/", "Mar": "/3/", "Apr": "/4/",
|
||||
"May": "/5/", "Jun": "/6/", "Jul": "/7/", "Aug": "/8/",
|
||||
"Sep": "/9/", "Oct": "/10/", "Nov": "/11/", "Dec": "/12/"
|
||||
}
|
||||
|
||||
# 数据库连接配置
|
||||
DB_CONFIG = {
|
||||
"host": "127.0.0.1",
|
||||
"user": "root",
|
||||
"password": "2GS@bPYcgiMyL14A",
|
||||
"database": "Macroeconomics",
|
||||
"port": 4423
|
||||
}
|
||||
|
||||
def fetch_web_data():
|
||||
"""抓取网页数据并解析日期和利率数据"""
|
||||
browser.get("https://www.federalreserve.gov/releases/h15/")
|
||||
soup = BeautifulSoup(browser.page_source, 'html.parser')
|
||||
|
||||
# 获取日期
|
||||
date_text = soup.find_all('th', class_="colhead sticky sticky-row-cell")[-1].get_text(strip=True)
|
||||
for month, replacement in MONTH_MAPPING.items():
|
||||
date_text = date_text.replace(month, replacement)
|
||||
date = datetime.strptime(date_text.replace('*', ''), '%Y/%m/%d')
|
||||
|
||||
# 获取利率数据
|
||||
data = [remove_tags(str(td)).strip() for td in soup.find_all('td', class_="data")]
|
||||
return date, data
|
||||
|
||||
def get_latest_db_date():
|
||||
"""从数据库获取最新日期"""
|
||||
with pymysql.connect(**DB_CONFIG) as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute("SELECT MAX(date) FROM USTreasuriesYields")
|
||||
result = cursor.fetchone()
|
||||
return result[0] if result[0] else None
|
||||
|
||||
def insert_data(date, rates, mprime, dpcredit):
|
||||
"""插入数据到数据库"""
|
||||
with pymysql.connect(**DB_CONFIG) as conn:
|
||||
with conn.cursor() as cursor:
|
||||
sql_treasuries = """
|
||||
INSERT INTO USTreasuriesYields (date, 1_Mo, 3_Mo, 6_Mo, 1_Yr, 2_Yr, 5_Yr, 10_Yr, 20_Yr, 30_Yr)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
sql_interest_rate = """
|
||||
INSERT INTO InterestRate (date, name, _value)
|
||||
VALUES (%s, %s, %s)
|
||||
"""
|
||||
cursor.execute(sql_treasuries, [date] + rates)
|
||||
cursor.execute(sql_interest_rate, (date, 'BPL', mprime))
|
||||
cursor.execute(sql_interest_rate, (date, 'DWPC', dpcredit))
|
||||
conn.commit()
|
||||
|
||||
def main():
|
||||
while True:
|
||||
new_date, data = fetch_web_data()
|
||||
old_date = get_latest_db_date()
|
||||
|
||||
if old_date and new_date <= old_date:
|
||||
time.sleep(21600) # 6小时
|
||||
continue
|
||||
|
||||
# 提取利率数据
|
||||
rates = [
|
||||
data[i].replace('ND', 'NULL') if i < len(data) else 'NULL'
|
||||
for i in [104, 109, 114, 119, 124, 134, 144, 149, 154]
|
||||
]
|
||||
mprime = data[54] if len(data) > 54 else 'NULL'
|
||||
dpcredit = data[59] if len(data) > 59 else 'NULL'
|
||||
|
||||
insert_data(new_date, rates, mprime, dpcredit)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user