Source code for factorset.Util.finance

# -*- coding:utf-8 -*-
"""
@author:code37
@file:finance.py
@time:2018/4/279:46
"""
import pandas as pd
from datetime import datetime

[docs]def ttmContinues(report_df, label): """ Compute Trailing Twelve Months for multiple indicator. computation rules: #. ttm indicator is computed on announcement date. #. on given release_date, use the latest report_date and the previous report year for computation. #. if any report period is missing, use weighted method. #. if two reports (usually first-quoter and annual) are released together, only keep latest :param report_df: must have 'report_date', 'release_date', and <label> columns :type report_df: Pandas.DataFrame :param label: column name for intended indicator :type label: str. :return: columned by ['datetime', 'report_date', <label>+'_TTM', ...] :rtype: Pandas.DataFrame .. todo:: if announce_date exist, use announce_date instead of release_date, report_date as well """ report_df = report_df.sort_values(by=['release_date', 'report_date']) report_df = report_df.drop_duplicates(subset=['release_date', 'report_date']) # 剔除重复的数据 report_df = report_df.reset_index(drop=True) month_ends = {3: 31, 6: 30, 9: 30, 12: 31} weights = {3 :4, 6 :2, 9: float(4)/3, 12: 1} label_list = label.replace(' ', '').split(',') ret = [] for i, row in report_df.iterrows(): if i < 4: # 少于4份财报的不计算 continue # get five report periods released_reports = report_df.ix[report_df.release_date<=row['release_date'], :] latest_report_year = released_reports.report_date.max().year latest_report_season = released_reports.report_date.max().month five_reports = list() year = latest_report_year season = latest_report_season for _ in range(5): value_for_report_period = dict() value_for_report_period['weight'] = weights[season] value_for_report_period['report_date'] = datetime(year=year, month=season, day=month_ends[season]) season = season - 3 if season <= 0: year = year - 1 season = 12 for label in label_list: report_value_for_label = released_reports.ix[released_reports.report_date == value_for_report_period['report_date'], label] # fill nan into missing period if len(report_value_for_label) == 0: value = None else: value = report_value_for_label.values[-1] # assign weights value_for_report_period[label] = value five_reports.append(value_for_report_period) # calculate TTM value five_reports = pd.DataFrame(five_reports).sort_values(by='report_date') five_reports.index = range(len(five_reports)) #print five_reports ttm_value = dict() for label in label_list: if five_reports.ix[:, label].isnull().any(): # weighted method ttm_value[label + '_TTM'] = None else: # normal method normal_reports = five_reports.copy() normal_reports[label + '_SINGLE'] = normal_reports.ix[:, label].diff(1) normal_reports['report_month'] = normal_reports.report_date.apply(lambda x: x.month) if len(normal_reports[normal_reports.report_month == 3]) > 1: normal_reports = normal_reports.iloc[1:] normal_reports.ix[normal_reports.report_month == 3, label + '_SINGLE'] = normal_reports.ix[normal_reports.report_month == 3, label] ttm_value[label + '_TTM'] = normal_reports.ix[:, label + '_SINGLE'].sum() ttm_value['datetime'] = row['release_date'] ttm_value['report_date'] = row['report_date'] ret.append(ttm_value) if len(ret) > 0: ret = pd.DataFrame(ret).drop_duplicates(subset=['datetime'], keep='last') ret = ret.dropna() return ret
[docs]def ttmDiscrete(report_df, label_str, min_report_num=4): """ :param report_df: must have 'report_date', 'release_date', and <label> columns :type report_df: Pandas.DataFrame :param label_str: :param min_report_num: :type min_report_num: int :return: columned by ['datetime', 'report_date', <label>+'_TTM', ...] :rtype: pd.DataFrame """ label_list = label_str.replace(' ', '').split(',') report_df = report_df.sort_values(by=['report_date', 'release_date'], ascending=[False, False]) report_df = report_df.drop_duplicates(subset=['release_date', 'report_date']) # 剔除重复的数据 release_dates = list(set(report_df['release_date'])) release_dates.sort(reverse=True) report_dates = report_df.groupby('release_date')['report_date'].first().values[::-1] result_dict = [] for release_dt in release_dates: label_dict = {} for label in label_list: label_dict[label + '_TTM'] = report_df[report_df['release_date']<=release_dt].drop_duplicates(subset=['report_date']).head(min_report_num)[label].mean() result_dict.append(label_dict) ret_df = pd.DataFrame(result_dict) ret_df['datetime'] = release_dates ret_df['report_date'] = report_dates ret_df = ret_df.sort_values(by=['report_date', 'datetime']) return ret_df