注意事项
API调用有限制,注意频率控制。妥善保管API密钥, 不要硬编码在代码中。处理异常情况,确保程序稳定性。 大量数据获取时考虑分批处理。
数据API参考
# 导入库
import tushare as ts
import pandas as pd
# 设置token
ts.set_token('your_token')
pro = ts.pro_api()
# 获取股票列表
stock_list = pro.stock_basic(exchange='', list_status='L')
print(stock_list.head())
# 获取日线数据
df = pro.daily(ts_code='000001.SZ',
start_date='20240101',
end_date='20241231')
print(df.head())
# 获取财务指标
indicators = pro.fina_indicator(ts_code='000001.SZ',
start_date='20240101',
end_date='20241231')
print(indicators.head())
# 获取指数成分股
index_stocks = pro.index_weight(index_code='000300.SH',
start_date='20240101',
end_date='20240131')
print(index_stocks.head())
# 导入聚宽API
from joinquant import api
from joinquant import query
# 获取股票价格数据
prices = api.get_price('000001.XSHE',
start_date='2024-01-01',
end_date='2024-12-31',
fields=['open', 'high', 'low', 'close', 'volume'])
print(prices.head())
# 获取财务数据
valuation = api.get_fundamentals(
query(valuation).filter(valuation.code == '000001.XSHE')
)
print(valuation.head())
# 获取行业成分股
industry_stocks = api.get_industry_stocks('医药生物')
print(industry_stocks[:10])
# 技术指标
ma = api.MA(prices['close'], 20)
print(ma.tail())
# 导入优矿API
from uqer import DataAPI
# 获取日线行情
df = DataAPI.MktEqudGet(
secID='000001.XSHE',
beginDate='20240101',
endDate='20241231'
)
print(df.head())
# 获取复权行情
df_adj = DataAPI.MktEqudAdjGet(
secID='000001.XSHE',
beginDate='20240101',
endDate='20241231'
)
print(df_adj.head())
# 获取财务指标
indicators = DataAPI.FinIndicatorGet(
secID='000001.XSHE',
beginDate='20240101',
endDate='20241231'
)
print(indicators.head())
# 获取行业分类
industry = DataAPI.SecIndustryGet(secID='000001.XSHE')
print(industry.head())
# 安装yfinance
# pip install yfinance
import yfinance as yf
# 创建股票对象
stock = yf.Ticker('AAPL')
# 获取历史数据
hist = stock.history(period='1y')
print(hist.head())
# 获取财务信息
info = stock.info
print(f"公司名称: {info.get('longName')}")
print(f"市值: {info.get('marketCap')}")
# 获取财务报表
financials = stock.financials
balance_sheet = stock.balance_sheet
cash_flow = stock.cashflow
# 获取分析师建议
recommendations = stock.recommendations
print(recommendations.head())
# 使用requests调用REST API
import requests
import json
# 设置API端点和参数
url = "https://api.example.com/stock/data"
params = {
'symbol': 'AAPL',
'start_date': '2024-01-01',
'end_date': '2024-12-31'
}
# 设置认证头
headers = {
'Authorization': 'Bearer your_token',
'Content-Type': 'application/json'
}
# 发送请求
response = requests.get(url, params=params, headers=headers)
# 检查响应
if response.status_code == 200:
data = response.json()
df = pd.DataFrame(data['data'])
print(df.head())
else:
print(f"请求失败: {response.status_code}")
print(response.text)
# 使用websockets获取实时数据
import asyncio
import websockets
import json
async def subscribe_ticker():
uri = "wss://api.example.com/stream"
async with websockets.connect(uri) as websocket:
# 订阅消息
subscribe_msg = {
"action": "subscribe",
"symbols": ["AAPL", "GOOGL"]
}
await websocket.send(json.dumps(subscribe_msg))
# 接收数据
while True:
try:
message = await websocket.recv()
data = json.loads(message)
print(f"收到数据: {data}")
# 处理数据
if 'price' in data:
print(f"{data['symbol']}: {data['price']}")
except Exception as e:
print(f"接收错误: {e}")
break
# 运行订阅
asyncio.run(subscribe_ticker())
# 保存数据到数据库
import pandas as pd
from sqlalchemy import create_engine
# 创建数据库连接
engine = create_engine('mysql+pymysql://user:password@host/database')
# 保存到MySQL
df.to_sql('stock_data',
engine,
if_exists='replace',
index=False)
# 从数据库读取
df_read = pd.read_sql('SELECT * FROM stock_data WHERE symbol = "AAPL"',
engine)
print(df_read.head())
# 保存到CSV
df.to_csv('stock_data.csv', index=False, encoding='utf-8')
# 保存到Excel
df.to_excel('stock_data.xlsx', index=False)
# 数据处理管道
import pandas as pd
import numpy as np
# 1. 数据清洗
df = pd.read_csv('raw_data.csv')
# 删除重复值
df = df.drop_duplicates()
# 处理缺失值
df = df.dropna() # 或者 df.fillna(method='ffill')
# 2. 数据转换
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
# 计算收益率
df['returns'] = df['close'].pct_change()
# 计算技术指标
df['ma20'] = df['close'].rolling(20).mean()
df['std20'] = df['close'].rolling(20).std()
df['upper_band'] = df['ma20'] + 2 * df['std20']
df['lower_band'] = df['ma20'] - 2 * df['std20']
# 3. 数据筛选
# 筛选特定时间段
df_filtered = df['2024-01-01':'2024-12-31']
# 筛选特定条件
df_positive = df[df['returns'] > 0]
# 4. 数据聚合
monthly_returns = df['returns'].resample('M').mean()
print(monthly_returns)
API调用有限制,注意频率控制。妥善保管API密钥, 不要硬编码在代码中。处理异常情况,确保程序稳定性。 大量数据获取时考虑分批处理。