Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.9.59 更新一批代码 #213

Merged
merged 11 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.58 ]
branches: [ master, V0.9.59 ]
pull_request:
branches: [ master ]

Expand Down
9 changes: 7 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from czsc.utils import (
mac_address,
overlap,
to_arrow,

format_standard_kline,

Expand All @@ -65,6 +66,7 @@
save_json,
get_sub_elements,
get_py_namespace,
code_namespace,
freqs_sorted,
x_round,
import_by_name,
Expand Down Expand Up @@ -155,6 +157,7 @@
show_strategies_recent,
show_factor_value,
show_code_editor,
show_classify,
)

from czsc.utils.bi_info import (
Expand Down Expand Up @@ -204,13 +207,15 @@
cross_sectional_strategy,
judge_factor_direction,
monotonicity,
min_max_limit,
rolling_layers,
)


__version__ = "0.9.58"
__version__ = "0.9.59"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20240808"
__date__ = "20240901"


def welcome():
Expand Down
63 changes: 63 additions & 0 deletions czsc/eda.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,66 @@ def monotonicity(sequence):
"""
from scipy.stats import spearmanr
return spearmanr(sequence, range(len(sequence)))[0]


def min_max_limit(x, min_val, max_val, digits=4):
"""限制 x 的取值范围在 min_val 和 max_val 之间

:param x: float, 输入值
:param min_val: float, 最小值
:param max_val: float, 最大值
:param digits: int, 保留小数位数
:return: float
"""
return round(max(min_val, min(max_val, x)), digits)


def rolling_layers(df, factor, n=5, **kwargs):
"""对时间序列数据进行分层

:param df: 因子数据,必须包含 dt, factor 列,其中 dt 为日期,factor 为因子值
:param factor: 因子列名
:param n: 分层数量,默认为10
:param kwargs:

- window: 窗口大小,默认为2000
- min_periods: 最小样本数量,默认为300
- mode: str, {'loose', 'strict'}, 分层模式,默认为 'loose';
loose 表示使用 rolling + rank 的方式分层,有一点点未来信息,存在一定的数据穿越问题;
strict 表示使用 rolling + qcut 的方式分层,无未来信息,但是执行速度较慢。

:return: df, 添加了 factor分层 列
"""
assert df[factor].nunique() > n * 2, "因子值的取值数量必须大于分层数量"
assert df[factor].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[factor].isna().sum())
assert df['dt'].duplicated().sum() == 0, f"dt 列不能有重复值,存在重复值数量:{df['dt'].duplicated().sum()}"

window = kwargs.get("window", 600)
min_periods = kwargs.get("min_periods", 300)

# 不能有 inf 和 -inf
if df.loc[df[factor].isin([float("inf"), float("-inf")]), factor].shape[0] > 0:
raise ValueError(f"存在 {factor} 为 inf / -inf 的数据")

if kwargs.get('mode', 'loose') == 'loose':
# loose 模式,可能存在一点点未来信息
df['pct_rank'] = df[factor].rolling(window=window, min_periods=min_periods).rank(pct=True, ascending=True)
bins = [i/n for i in range(n+1)]
df['pct_rank_cut'] = pd.cut(df['pct_rank'], bins=bins, labels=False)
df['pct_rank_cut'] = df['pct_rank_cut'].fillna(-1)
# 第00层表示缺失值
df[f"{factor}分层"] = df['pct_rank_cut'].apply(lambda x: f"第{str(int(x+1)).zfill(2)}层")
df.drop(['pct_rank', 'pct_rank_cut'], axis=1, inplace=True)

else:
assert kwargs.get('mode', 'strict') == 'strict'
df[f"{factor}_qcut"] = (
df[factor].rolling(window=window, min_periods=min_periods)
.apply(lambda x: pd.qcut(x, q=n, labels=False, duplicates="drop", retbins=False).values[-1], raw=False)
)
df[f"{factor}_qcut"] = df[f"{factor}_qcut"].fillna(-1)
# 第00层表示缺失值
df[f"{factor}分层"] = df[f"{factor}_qcut"].apply(lambda x: f"第{str(int(x+1)).zfill(2)}层")
df.drop([f"{factor}_qcut"], axis=1, inplace=True)

return df
3 changes: 2 additions & 1 deletion czsc/traders/rwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def publish_dataframe(self, df, overwrite=False, batch_size=10000):
:param df: pandas.DataFrame, 必需包含['symbol', 'dt', 'weight']列,
可选['price', 'ref']列, 如没有price则写0, dtype同publish方法
:param overwrite: boolean, 是否覆盖已有记录
:param batch_size: int, 每次发布的最大数量
:return: 成功发布信号的条数
"""
df = df.copy()
Expand Down Expand Up @@ -392,7 +393,7 @@ def get_hist_weights(self, symbol, sdt, edt) -> pd.DataFrame:
price = price if price is None else float(price)
try:
ref = json.loads(ref)
except Exception:
except Exception as e:
ref = ref
weights.append((self.strategy_name, symbol, dt, weight, price, ref))

Expand Down
4 changes: 3 additions & 1 deletion czsc/traders/weight_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def __init__(self, dfw, digits=2, **kwargs) -> None:
"""
self.kwargs = kwargs
self.dfw = dfw.copy()
self.dfw["dt"] = pd.to_datetime(self.dfw["dt"])
if self.dfw.isnull().sum().sum() > 0:
raise ValueError("dfw 中存在空值, 请先处理")
self.digits = digits
Expand Down Expand Up @@ -553,9 +554,10 @@ def backtest(self, n_jobs=1):
dret = pd.concat([v["daily"] for k, v in res.items() if k in symbols], ignore_index=True)
dret = pd.pivot_table(dret, index="date", columns="symbol", values="return").fillna(0)
dret["total"] = dret[list(res.keys())].mean(axis=1)
dret = dret.round(4).reset_index()
res["品种等权日收益"] = dret

stats = {"开始日期": dret.index.min().strftime("%Y%m%d"), "结束日期": dret.index.max().strftime("%Y%m%d")}
stats = {"开始日期": dret["date"].min().strftime("%Y%m%d"), "结束日期": dret["date"].max().strftime("%Y%m%d")}
stats.update(daily_performance(dret["total"]))
dfp = pd.concat([v["pairs"] for k, v in res.items() if k in symbols], ignore_index=True)
pairs_stats = evaluate_pairs(dfp)
Expand Down
27 changes: 27 additions & 0 deletions czsc/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# coding: utf-8
import os
import pandas as pd
from typing import List, Union

from . import qywx
Expand Down Expand Up @@ -95,6 +96,20 @@ def get_py_namespace(file_py: str, keys: list = []) -> dict:
return namespace


def code_namespace(code: str, keys: list = []) -> dict:
"""获取 python 代码中的 namespace

:param code: python 代码
:param keys: 指定需要的对象名称
:return: namespace
"""
namespace = {"code": code}
exec(code, namespace)
if keys:
namespace = {k: v for k, v in namespace.items() if k in keys}
return namespace


def import_by_name(name):
"""通过字符串导入模块、类、函数

Expand Down Expand Up @@ -199,3 +214,15 @@ def mac_address():
x = uuid.UUID(int=uuid.getnode()).hex[-12:].upper()
x = "-".join([x[i : i + 2] for i in range(0, 11, 2)])
return x


def to_arrow(df: pd.DataFrame):
"""将 pandas.DataFrame 转换为 pyarrow.Table"""
import io
import pyarrow as pa

table = pa.Table.from_pandas(df)
with io.BytesIO() as sink:
with pa.ipc.new_file(sink, table.schema) as writer:
writer.write_table(table)
return sink.getvalue()
1 change: 1 addition & 0 deletions czsc/utils/bar_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def is_trading_time(dt: datetime = datetime.now(), market="A股"):
def get_intraday_times(freq="1分钟", market="A股"):
"""获取指定市场的交易时间段

:param freq: K线周期,如 1分钟、5分钟、15分钟、30分钟、60分钟
:param market: 市场名称,可选值:A股、期货、默认
:return: 交易时间段列表
"""
Expand Down
Loading
Loading