{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# coding=utf-8\n", "import pandas as pd\n", "import alphalens\n", "\n", "# IC大约某个值的的比例\n", "IC_LGT_STANDARD = 0.02\n", "\n", "MEAN_FACTOR_RETURN_STANDARD = 0.001 #因子收益率均值\n", "IC_MEAN_STANDARD= 0.015 #IC的均值的标准\n", "IC_STD_STANDARD = 0.1 #IC标准差的标注\n", "IC_LGTRATIO_STANDARD = 0.5 #IC大于IC_LGT_STANDARD的比例的标准\n", "IR_STANDARD = 0.15 #IR的标准\n", "\n", "start_date = \"20170101\"\n", "end_date = \"20180101\"\n", "stock_list = industry(\"C27\")\n", "\n", "factor_list = [\n", " \"eod_derivative_indicator.pe_ratio\",\n", " \"eod_derivative_indicator.pcf_ratio\",\n", " \"eod_derivative_indicator.pb_ratio\",\n", " \"eod_derivative_indicator.market_cap\",\n", " \"eod_derivative_indicator.ps_ratio\",\n", " \"financial_indicator.return_on_invested_capital\",\n", " \"financial_indicator.du_return_on_equity\",\n", " \"financial_indicator.return_on_asset_net_profit\",\n", " \"financial_indicator.return_on_equity\",\n", " \"financial_indicator.return_on_asset\",\n", " \"financial_indicator.earnings_per_share\",\n", " \"financial_indicator.net_profit_to_revenue\",\n", " \"financial_indicator.inc_revenue\",\n", " \"financial_indicator.inc_total_asset\",\n", " \"financial_indicator.inc_net_profit\",\n", " \"financial_indicator.inc_earnings_per_share\",\n", " \"financial_indicator.inc_operating_revenue\",\n", "]\n", "\n", "\n", "\n", "\n", "def get_cls(factor_str):\n", " '''把字符串转化为对象'''\n", " _befor, _after = factor_str.split(\".\")\n", " temp_cls = getattr(fundamentals, _befor)\n", " cls = getattr(temp_cls, _after)\n", " return cls\n", "\n", "def build_factor_list():\n", " factor_list = []\n", " eod_derivative_indicator_ = [\"eod_derivative_indicator.\"+i for i in dir(fundamentals.eod_derivative_indicator) if not i.startswith(\"_\")]\n", " factor_list.extend(eod_derivative_indicator_)\n", " income_statement_ = [\"income_statement.\"+i for i in dir(fundamentals.income_statement) if not i.startswith(\"_\")]\n", " factor_list.extend(income_statement_)\n", " financial_indicator_ = [\"financial_indicator.\"+i for i in dir(fundamentals.financial_indicator) if not i.startswith(\"_\")]\n", " factor_list.extend(financial_indicator_)\n", " income_statement_TTM_ = [\"income_statement_TTM.\"+i for i in dir(fundamentals.income_statement_TTM) if not i.startswith(\"_\")]\n", " factor_list.extend(income_statement_TTM_)\n", " # factor_list = [i for i in factor_list if not i.endswith(\"date\") and not i.endswith(\"metadata\")]\n", " # factor_list = [i for i in factor_list if not i.endswith(\"date\") and not i.endswith(\"metadata\") and not i[-1] in string.digits]\n", " endserror = [\"rpt_quarter\",\"rpt_year\",\"stockcode\",\"date\",\"metadata\",\"enterprise_expansion_reserve\",\"exchange_gains_or_losses\"]\n", " endserror.extend(list(string.digits))\n", " final_list = []\n", " for factor in factor_list:\n", " flag = True\n", " for ends in endserror:\n", " if factor.endswith(ends):\n", " flag = False\n", " if flag:\n", " final_list.append(factor)\n", "\n", " return final_list\n", "\n", "# 1. 准备factor\n", "def prepar_factor(start_date, end_date, factor, stock_list):\n", " '''\n", " :param start_date:开始日期\n", " :param end_date: 结束日期\n", " :param factor: 因子名字\n", " :param stock_list: 股票池\n", " :return:factor数据,multiIndex的series\n", " '''\n", " trading_dates = get_trading_dates(start_date=start_date, end_date=end_date)\n", " q = query(get_cls(factor)).filter(fundamentals.stockcode.in_(stock_list))\n", " fund = get_fundamentals(q, entry_date=end_date, interval=\"{}d\".format(len(trading_dates)))\n", " fund = fund[factor.split(\".\")[-1], :, :].stack()\n", " # print(fund)\n", " return fund\n", "\n", "\n", "# 2. 准备price\n", "def prepar_price(start_date, end_date, stock_list):\n", " prices = get_price(stock_list, start_date=start_date, end_date=end_date, fields=\"close\")\n", " return prices\n", "\n", "\n", "# 3. 获取计算IC值需要的数据格式,multiIndex的series,计算IC,计算因子收益率\n", "def get_IC(factor, prices):\n", " factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices)\n", " IC = alphalens.performance.factor_information_coefficient(factor_data)\n", " factor_return = alphalens.performance.factor_returns(factor_data)\n", " return factor_data, IC, factor_return\n", "\n", "\n", "def get_result( IC, factor_return):\n", " '''\n", " :param IC: 因子的IC\n", " :param factor_return:因子的收益率\n", " :return: mean_factor_return,IC_mean,IC_std,IC_lgtratio,IR,score\n", " '''\n", " # 因子收益率的均值\n", " mean_factor_return = factor_return.iloc[:, 0].mean()\n", " # IC的均值\n", " IC_mean = IC.iloc[:, 0].mean()\n", " # IC的标准差\n", " IC_std = IC.iloc[:, 0].std()\n", " # IC大于标准的比例\n", " IC_lgtratio = IC.iloc[:, 0][IC.iloc[:, 0] > IC_LGT_STANDARD].shape[0] / IC.iloc[:, 0].shape[0]\n", " # IR\n", " IR = IC_mean / IC_std\n", "\n", " #计算当前因子的得分\n", " score = 0\n", " if abs(mean_factor_return)>=MEAN_FACTOR_RETURN_STANDARD:\n", " score+=1\n", " if abs(IC_mean) >=IC_MEAN_STANDARD:\n", " score+=1\n", " if IC_std<=IC_STD_STANDARD:\n", " score+=1\n", " if IC_lgtratio>=IC_LGTRATIO_STANDARD:\n", " score+=1\n", " if abs(IR)>=IR_STANDARD:\n", " score+=1\n", "\n", " return mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score\n", "\n", "\n", "# 计算所有的因子的结果\n", "def get_all_result():\n", " all_result_list = [] #最终[[因子1的数据],[因子2的数据],...]\n", "# factor_list = build_factor_list() #可以去获取所有的因子计算\n", " for factor_str in factor_list:\n", " print(factor_str)\n", " #1. 准备factor\n", " factor = prepar_factor(start_date, end_date, factor_str, stock_list)\n", " #2. 准备价格\n", " prices = prepar_price(start_date, end_date, stock_list)\n", " #3. 计算IC和收益率\n", " try:\n", " factor_data, IC, factor_return = get_IC(factor, prices)\n", " \n", " #4. 得到结果\n", " mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score = get_result(IC, factor_return)\n", " all_result_list.append([factor_str,mean_factor_return, IC_mean, IC_std, IC_lgtratio, IR,score])\n", " except:\n", " pass\n", "\n", " all_result_df = pd.DataFrame(all_result_list,columns=[\"facotr\",\"mean_factor_return\", \"IC_mean\", \"IC_std\", \"IC_lgtratio\", \"IR\",\"score\"])\n", " return all_result_df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "eod_derivative_indicator.pe_ratio\n", "eod_derivative_indicator.pcf_ratio\n", "eod_derivative_indicator.pb_ratio\n", "eod_derivative_indicator.market_cap\n", "eod_derivative_indicator.ps_ratio\n", "financial_indicator.return_on_invested_capital\n", "financial_indicator.du_return_on_equity\n", "financial_indicator.return_on_asset_net_profit\n", "financial_indicator.return_on_equity\n", "financial_indicator.return_on_asset\n", "financial_indicator.earnings_per_share\n", "financial_indicator.net_profit_to_revenue\n", "financial_indicator.inc_revenue\n", "financial_indicator.inc_total_asset\n", "financial_indicator.inc_net_profit\n", "financial_indicator.inc_earnings_per_share\n", "financial_indicator.inc_operating_revenue\n" ] } ], "source": [ "df = get_all_result()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | facotr | \n", "mean_factor_return | \n", "IC_mean | \n", "IC_std | \n", "IC_lgtratio | \n", "IR | \n", "score | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "eod_derivative_indicator.pe_ratio | \n", "-0.000616 | \n", "-0.023112 | \n", "0.117749 | \n", "0.367521 | \n", "-0.196285 | \n", "2 | \n", "
| 1 | \n", "eod_derivative_indicator.pcf_ratio | \n", "-0.000724 | \n", "-0.015745 | \n", "0.094981 | \n", "0.371795 | \n", "-0.165773 | \n", "3 | \n", "
| 2 | \n", "eod_derivative_indicator.pb_ratio | \n", "0.000125 | \n", "-0.013038 | \n", "0.116275 | \n", "0.363248 | \n", "-0.112128 | \n", "0 | \n", "
| 3 | \n", "eod_derivative_indicator.market_cap | \n", "0.000312 | \n", "0.000623 | \n", "0.216232 | \n", "0.448718 | \n", "0.002880 | \n", "0 | \n", "
| 4 | \n", "eod_derivative_indicator.ps_ratio | \n", "-0.000159 | \n", "-0.014260 | \n", "0.115399 | \n", "0.423077 | \n", "-0.123567 | \n", "0 | \n", "
| 5 | \n", "financial_indicator.return_on_invested_capital | \n", "0.001221 | \n", "0.018168 | \n", "0.134614 | \n", "0.470085 | \n", "0.134966 | \n", "2 | \n", "
| 6 | \n", "financial_indicator.du_return_on_equity | \n", "0.001258 | \n", "0.016976 | \n", "0.135526 | \n", "0.500000 | \n", "0.125257 | \n", "3 | \n", "
| 7 | \n", "financial_indicator.return_on_asset_net_profit | \n", "0.001122 | \n", "0.016129 | \n", "0.132530 | \n", "0.508547 | \n", "0.121704 | \n", "3 | \n", "
| 8 | \n", "financial_indicator.return_on_equity | \n", "0.001258 | \n", "0.016976 | \n", "0.135526 | \n", "0.500000 | \n", "0.125257 | \n", "3 | \n", "
| 9 | \n", "financial_indicator.return_on_asset | \n", "0.001201 | \n", "0.017158 | \n", "0.133128 | \n", "0.517094 | \n", "0.128884 | \n", "3 | \n", "
| 10 | \n", "financial_indicator.earnings_per_share | \n", "0.000923 | \n", "0.021687 | \n", "0.145436 | \n", "0.529915 | \n", "0.149119 | \n", "2 | \n", "
| 11 | \n", "financial_indicator.net_profit_to_revenue | \n", "0.000581 | \n", "0.008453 | \n", "0.103580 | \n", "0.504274 | \n", "0.081605 | \n", "1 | \n", "
| 12 | \n", "financial_indicator.inc_revenue | \n", "0.000054 | \n", "0.005099 | \n", "0.070488 | \n", "0.440171 | \n", "0.072338 | \n", "1 | \n", "
| 13 | \n", "financial_indicator.inc_total_asset | \n", "-0.000085 | \n", "0.016466 | \n", "0.103028 | \n", "0.478632 | \n", "0.159819 | \n", "2 | \n", "
| 14 | \n", "financial_indicator.inc_net_profit | \n", "0.000196 | \n", "0.017035 | \n", "0.079496 | \n", "0.538462 | \n", "0.214287 | \n", "4 | \n", "
| 15 | \n", "financial_indicator.inc_earnings_per_share | \n", "0.000317 | \n", "0.020497 | \n", "0.078697 | \n", "0.487179 | \n", "0.260456 | \n", "3 | \n", "
| 16 | \n", "financial_indicator.inc_operating_revenue | \n", "-0.000089 | \n", "0.005405 | \n", "0.072610 | \n", "0.410256 | \n", "0.074440 | \n", "1 | \n", "