Source code for factorset.data.CSVParser

# -*- coding:utf-8 -*-
"""
@author:code37
@file:CSVParser.py
@time:2018/4/1816:55
"""

import pandas as pd
import os
from factorset.data import FundDict as fd

#####
dir = os.path.abspath('.')
encode = 'gbk'
#####

def forfor(a):
    return [item for sublist in a for item in sublist]

[docs]def all_stock_symbol(dir): """ :param dir: 数据路径 :type dir: string :return: 路径下所有股票tickers """ return [symbol.strip('.csv') for symbol in os.listdir(path='{}/hq'.format(dir))]
[docs]def read_stock(dir, ticker): """ :param dir: 数据路径 :type dir: string :param ticker: 单个股票ticker :return: 单个股票行情, pd.DataFrame """ # 去除字符,保留前六位编号 # ticker = re.sub(r'\D', '', ticker)"" return pd.read_csv('{}/hq/{}.csv'.format(dir, ticker), encoding=encode, parse_dates=True, index_col=0)
[docs]def concat_all_stock(dir): """ 纵向合并目录所有股票行情 :param dir: 数据路径 :type dir: string :return: pd.DataFrame """ return pd.concat([read_stock(dir, s) for s in all_stock_symbol(dir)])
[docs]def hconcat_stock_series(hq, tickers): """ 横向合并股票行情 :param hq: concat_all_stock后的DataFrame :type hq: pd.DataFrame :param tickers: 股票tickers, list :type tickers: list :rtype: pd.DataFrame """ l = [] for ticker in tickers: l.append(hq[hq.code == ticker].close.rename(ticker, inplace=True).fillna(method='ffill')) return pd.concat(l, axis=1)
[docs]def concat_stock(dir, tickers): """ 纵向合并目录指定股票行情 :param dir: 数据路径 :type dir: string :param tickers: 股票tickers, list :rtype: pd.DataFrame """ return pd.concat([read_stock(dir, s) for s in tickers])
[docs]def all_fund_symbol(dir, type): """ 获取储存路径中一种报表的所有tickers :param dir: 数据路径 :type dir: string :param type: BS','IS','CF' :return: tickers :rtype: list """ return [symbol.strip('{}_'.format(type)).strip('.csv') for symbol in os.listdir(path='{}/fund'.format(dir)) if '{}_'.format(type) in symbol]
[docs]def read_fund(dir, type, ticker): """ 读取一个股票的一种报表数据 :param dir: 数据路径,string :type dir: string :param type: BS','IS','CF' :param ticker: 股票ticker, str :rtype: pd.DataFrame """ return pd.read_csv('{}/fund/{}_{}.csv'.format(dir, type, ticker), encoding=encode, parse_dates=True, index_col=0)
[docs]def fund_collist(dir, type): """ 一种报表所有股票的会计项目 :param dir: 数据路径 :type dir: string :param type: BS','IS','CF' :rtype: list """ l = [] for s in all_fund_symbol(dir, type): try: l.append(read_fund(dir, type, s).columns.tolist()) except Exception as e: print(s, e) l = set(forfor(l)) return l
[docs]def concat_fund(dir, tickers, type): """ 纵向合并一种财务报表 :param dir: 数据路径 :type dir: string :param tickers: 股票tickers, list :param type: BS','IS','CF' :rtype: pd.DataFrame """ l = [] Dict_to_call = getattr(fd, '{}_DICT'.format(type)) for s in tickers: df = read_fund(dir, type, s) df.columns = df.columns.to_series().map(Dict_to_call) df.sort_index(axis=1, inplace=True) df['ticker'] = s l.append(df) return pd.concat(l)
def dup(l): seen = set() return [x for x in l if x in seen or seen.add(x)] if __name__ == '__main__': # print(all_symbol('BS')) # print(read_stock(dir, '300593.SZ')) # print(all_stock_symbol(dir)) # print(all_fund_symbol(dir, 'BS')) # print(read_fund(dir, 'BS', '000001.SZ')) # print(read_fund(dir, 'BS', '000001.SZ').columns) # print(read_fund(dir, 'BS', '000002.SZ').columns) # print(concat_fund(dir, ['000001.SZ'], 'BS').columns.values) # print(concat_fund(dir, ['000002.SZ'], 'BS').columns.values) # print(dup(concat_fund(dir, ['000001.SZ'], 'BS').columns.values)) # print(dup(concat_fund(dir, ['000002.SZ'], 'BS').columns.values)) # print(len(concat_fund(dir, ['000001.SZ'], 'BS').columns.values), len(set(concat_fund(dir, ['000001.SZ'], 'BS').columns.values))) # print(len(concat_fund(dir, ['000002.SZ'], 'BS').columns.values), len(set(concat_fund(dir, ['000002.SZ'], 'BS').columns.values))) print(concat_fund(dir, ['000001.SZ', '000002.SZ', '000004.SZ'], 'BS'))