wance_data/src/utils/backtest_until.py

import json
from datetime import datetime

import numpy as np
from xtquant import xtdata
import pandas as pd

# 数据的列名
columns = ['open', 'high', 'low', 'close', 'volume', 'amount', 'settelmentPrice',
           'openInterest', 'preClose', 'suspendFlag']


# 获取本地数据并进行处理
async def get_local_data(field_list: list, stock_list: list, period: str, start_time: str, end_time: str,
                         count: int, dividend_type: str, fill_data: bool, data_dir: str):
    result = xtdata.get_local_data(field_list=field_list, stock_list=stock_list, period=period, start_time=start_time,
                                   end_time=end_time, count=count, dividend_type=dividend_type, fill_data=fill_data,
                                   data_dir=data_dir)
    return await data_processing(result)


async def data_processing(result_local):
    # 初始化一个空的列表，用于存储每个股票的数据框
    df_list = []

    # 遍历字典中的 DataFrame
    for stock_code, df in result_local.items():
        # 确保 df 是一个 DataFrame
        if isinstance(df, pd.DataFrame):
            # 将时间戳转换为日期时间格式，只保留年-月-日
            df['time'] = pd.to_datetime(df['time'], unit='ms').dt.date
            # 将 'time' 列设置为索引，保留为日期格式
            df.set_index('time', inplace=True)
            # 将 'close' 列重命名为 'close_股票代码'
            df.rename(columns={'close': f'close_{stock_code}'}, inplace=True)
            # 将所有列添加到列表中
            df_list.append(df)  # 保留所有字段，包括重命名后的 'close_股票代码'
        else:
            print(f"数据格式错误: {stock_code} 不包含 DataFrame")

    # 使用 pd.concat() 将所有 DataFrame 合并为一个大的 DataFrame，保留所有列
    combined_df = pd.concat(df_list, axis=1)

    # 确保返回的 DataFrame 索引是日期格式
    combined_df.index = pd.to_datetime(combined_df.index)

    return combined_df


def convert_pandas_to_json_serializable(data: pd.Series) -> str:
    """
    将 Pandas Series 或 DataFrame 中的 Timestamp 索引转换为字符串，并返回 JSON 可序列化的结果。

    参数:
    data: pd.Series 或 pd.DataFrame, 带有时间戳索引的 pandas 数据

    返回:
    JSON 字符串，键为日期字符串，值为原数据的值。
    """
    # 判断数据类型
    if isinstance(data, (pd.Series, pd.DataFrame)):
        # 如果索引是时间戳类型，则转换为 YYYYMMDD 格式
        if isinstance(data.index, pd.DatetimeIndex):
            data.index = data.index.strftime('%Y%m%d')

        # 处理 NaN 和 None 的情况，替换为 0 或其他合适的默认值
        data = data.replace([np.nan, None], 0)

        # 将索引重置为普通列，然后转换为字典
        json_serializable_data = data.rename_axis('date').reset_index().to_dict(orient='records')

        # 将字典转换为 JSON 格式字符串
        json_string = json.dumps(json_serializable_data)
        return json_string
    else:
        raise ValueError("输入必须为 Pandas Series 或 DataFrame")


def clean_value(value):
    """清理数据值，使其适合 JSON 序列化。"""
    if pd.isna(value) or np.isinf(value):
        return 0  # 或者可以使用 None 代替
    return value


def to_json_serializable(data):
    """
    将 Pandas DataFrame 转换为 JSON 可序列化的格式。
    - 将索引（如果是时间戳）转换为 'YYYYMMDD' 格式。
    - 将数据按时间升序排列。
    - 返回数组形式的结构，内部仍使用字典存放每一行数据。
    """
    result = []

    if isinstance(data, pd.DataFrame):
        # 将索引（时间戳）转换为 YYYYMMDD 格式
        data.index = data.index.strftime('%Y%m%d')

        # 按时间（索引）升序排序
        data = data.sort_index(ascending=True)

        # 遍历 DataFrame 的每一行
        for idx, row in data.iterrows():
            # 创建一个字典来存放当前行的数据
            row_dict = {'date': idx}  # 将日期作为键
            # 清理行中的每个值
            for col in row.index:
                row_dict[col] = clean_value(row[col])  # 清理每个值
            result.append(row_dict)  # 将字典添加到结果数组中

        return result

    elif isinstance(data, pd.Series):
        # 将 Series 转换为数组形式（每一行的数据以字典形式存放）
        for idx in data.index:
            result.append({str(idx.strftime('%Y%m%d')): clean_value(data[idx])})
        return result

    else:
        raise TypeError("输入数据必须是 Pandas 的 DataFrame 或 Series")

async def data_check(field_list: list,
                     stock_list: list,
                     period: str = '1d',
                     start_time: str = '',
                     end_time: str = '',
                     count: int = -1,
                     dividend_type: str = 'none',
                     fill_data: bool = True,
                     data_dir: str = '', ):
    result_data =  xtdata.get_local_data(field_list=[], stock_list=stock_list, period=period, start_time=start_time,
                          end_time=end_time, count=count, dividend_type=dividend_type, fill_data=fill_data,
                          data_dir=data_dir)
    time_now = int(datetime.now().strftime('%Y%m%d'))
    for i in stock_list:
        close = int(result_data.get(i).index[-1])
        if close != 0 and close < time_now:
            xtdata.download_history_data(stock_code=i,
                                                period='1d',
                                                start_time='',
                                                end_time='',
                                                incrementally=True)