def y_sum_by_time(x_arr, y_arr, top=None):
df = pd.DataFrame({'Timestamp': pd.to_datetime(x_arr, unit='s'), 'Status': y_arr})
df['Date'] = df['Timestamp'].apply(lambda x: "%d/%d/%d" % (, x.month, x.year))
df['Hour'] = df['Timestamp'].apply(lambda x: "%d" % (x.hour))
df['Weekday'] = df['Timestamp'].apply(lambda x: "%s" % (x.weekday_name))
times = ['Hour', 'Weekday', 'Date']
result = {}
for groupby in times:
df_group = df.groupby(groupby, as_index=False).agg({'Status': np.sum})
if top != None and top > 0:
#df_group = df_group.nlargest(top, 'Status').sort(['Status', 'Hour'],ascending=False)
idx = df_group.nlargest(top, 'Status') > 0
idx = df_group['Status'].max() == df_group['Status']
result[groupby] = {k: g['Status'].replace(np.nan, 'None').tolist() for k,g in df_group[idx].groupby(groupby)}
return result
def output(self):
Generate data wrapper for Mahali data
@return Mahali data wrapper
nav_files, obs_files = self.cacheData()
def getSiteAndDate(in_filename):
date = pd.to_datetime('2015' + in_filename[-8:-5], format='%Y%j')
return in_filename[-12:-8], date
data_list = []
for nav, obs in zip(nav_files, obs_files):
site, date = getSiteAndDate(nav)
if (site,date) != getSiteAndDate(obs):
raise RuntimeError('Data mismatch')
# data_list.append([site,date,readRinexNav(nav), rinexobs(obs)])
data_list.append([site,date,nav, obs])
return DataWrapper(data_list)
def __init__(self, ap_paramList = [], start_date = None, end_date = None, cutoff=0.75):
Construct a Groundwater Data Fetcher
@param ap_paramList[LowerLat]: Autoparam Lower latitude
@param ap_paramList[UpperLat]: Autoparam Upper latitude
@param ap_paramList[LeftLon]: Autoparam Left longitude
@param ap_paramList[RightLon]: Autoparam Right longitude
@param start_date: Starting date (defualt: None)
@param end_date: Ending date (default: None)
@param cutoff: Required amount of data for each station
self.start_date = pd.to_datetime(start_date)
self.end_date = pd.to_datetime(end_date)
self.ap_paramList = ap_paramList
self.cutoff = cutoff
def convert_date(in_date):
Converts input string to pandas date time, ignores other types of objects
@param in_date: Input date
return pandas data time object
if isinstance(in_date,str):
return pd.to_datetime(in_date)
except ValueError as e:
return pd.to_datetime(in_date, format='%Y%j')
return in_date
def QA_fetch_get_security_bars(code, _type, lens, ip=best_ip['stock'], port=7709):
api = TdxHq_API()
with api.connect(ip, port):
data = pd.concat([api.to_df(api.get_security_bars(_select_type(_type), _select_market_code(
code), code, (i - 1) * 800, 800)) for i in range(1, int(lens / 800) + 2)], axis=0)
data = data\
.assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\
.drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\
.assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\
.assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\
.assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\
.assign(type=_type).set_index('datetime', drop=False, inplace=False).tail(lens)
if data is not None:
return data
return None
def QA_fetch_get_stock_xdxr(code, ip=best_ip['stock'], port=7709):
api = TdxHq_API()
market_code = _select_market_code(code)
with api.connect(ip, port):
category = {
'1': '????', '2': '?????', '3': '??????', '4': '??????', '5': '????',
'6': '????', '7': '????', '8': '??????', '9': '?????', '10': '?????',
'11': '???', '12': '??????', '13': '?????', '14': '?????'}
data = api.to_df(api.get_xdxr_info(market_code, code))
if len(data) >= 1:
data = data\
.assign(date=pd.to_datetime(data[['year', 'month', 'day']]))\
.drop(['year', 'month', 'day'], axis=1)\
.assign(category_meaning=data['category'].apply(lambda x: category[str(x)]))\
.rename(index=str, columns={'panhouliutong': 'liquidity_after',
'panqianliutong': 'liquidity_before', 'houzongguben': 'shares_after',
'qianzongguben': 'shares_before'})\
.set_index('date', drop=False, inplace=False)
return data.assign(date=data['date'].apply(lambda x: str(x)[0:10]))
return None
def QA_data_stock_to_fq(__data, type_='01'):
def __QA_fetch_stock_xdxr(code, format_='pd', collections=QA_Setting.client.quantaxis.stock_xdxr):
data = pd.DataFrame([item for item in collections.find(
{'code': code})]).drop(['_id'], axis=1)
data['date'] = pd.to_datetime(data['date'])
return data.set_index(['date', 'code'], drop=False)
return pd.DataFrame(columns=['category', 'category_meaning', 'code', 'date', 'fenhong',
'fenshu', 'liquidity_after', 'liquidity_before', 'name', 'peigu', 'peigujia',
'shares_after', 'shares_before', 'songzhuangu', 'suogu', 'xingquanjia'])
'?? ??/??? ??????'
if type_ in ['01', 'qfq']:
#print(QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0])))
return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
elif type_ in ['02', 'hfq']:
return QA_data_make_hfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
QA_util_log_info('wrong fq type! Using qfq')
return QA_data_make_qfq(__data, __QA_fetch_stock_xdxr(__data['code'][0]))
def mfi(df):
df['date'] = pd.to_datetime(
fig = plt.figure(figsize=(16, 9))
gs = GridSpec(3, 1) # 2 rows, 3 columns
price = fig.add_subplot(gs[:2, 0])
price.plot(df['date'], df['close'], color='blue')
indicator = fig.add_subplot(gs[2, 0], sharex=price)
indicator.plot(df['date'], df['mfi'], c='pink')
indicator.plot(df['date'], [20.]*len(df['date']), c='green')
indicator.plot(df['date'], [80.]*len(df['date']), c='orange')
def atr(df):
Average True Range
:param df:
df['date'] = pd.to_datetime(
fig = plt.figure(figsize=(16, 9))
gs = GridSpec(3, 1) # 2 rows, 3 columns
price = fig.add_subplot(gs[:2, 0])
price.plot(df['date'], df['close'], color='blue')
indicator = fig.add_subplot(gs[2, 0], sharex=price)
indicator.plot(df['date'], df['atr'], c='pink')
# indicator.plot(df['date'], [20.]*len(df['date']), c='green')
# indicator.plot(df['date'], [80.]*len(df['date']), c='orange')
def rocr(df):
Average True Range
:param df:
df['date'] = pd.to_datetime(
fig = plt.figure(figsize=(16, 9))
gs = GridSpec(3, 1) # 2 rows, 3 columns
price = fig.add_subplot(gs[:2, 0])
price.plot(df['date'], df['close'], color='blue')
indicator = fig.add_subplot(gs[2, 0], sharex=price)
indicator.plot(df['date'], df['rocr'], c='pink')
# indicator.plot(df['date'], [20.]*len(df['date']), c='green')
# indicator.plot(df['date'], [80.]*len(df['date']), c='orange')
def get_indicator(df, indicator):
ret_df = df
if 'MACD' in indicator:
macd, macdsignal, macdhist = ta.MACD(df.close.values, fastperiod=12, slowperiod=26, signalperiod=9)
ret_df = KlineData._merge_dataframe(pd.DataFrame([macd, macdsignal, macdhist]).T.rename(columns={0: "macddif", 1: "macddem", 2: "macdhist"}), ret_df)
ret_df = KlineData._merge_dataframe(line_intersections(ret_df, columns=['macddif', 'macddem']), ret_df)
if 'MFI' in indicator:
real = ta.MFI(df.high.values, df.low.values, df.close.values, df.volume.values, timeperiod=14)
ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "mfi"}), ret_df)
if 'ATR' in indicator:
real = ta.NATR(df.high.values, df.low.values, df.close.values, timeperiod=14)
ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "atr"}), ret_df)
if 'ROCR' in indicator:
real = ta.ROCR(df.close.values, timeperiod=10)
ret_df = KlineData._merge_dataframe(pd.DataFrame([real]).T.rename(columns={0: "rocr"}), ret_df)
ret_df['date'] = pd.to_datetime(ret_df['date'], format='%Y-%m-%d')
return ret_df
def to_ns(x):
"""Convert input timestamps to nanoseconds (integers)
:param x: value to be converted
:returns: converted value
:rtype: int
if pd.isnull(x):
return 0
return pd.to_datetime(x).value
if hasattr(x, '__str__'):
return pd.to_datetime(str(x)).value
return 0
def get_twitter_sentiment_multilabel_classification_dataset():
file_name = os.path.join('tests', 'twitter_sentiment.csv')
df_twitter = pd.read_csv(open(file_name,'rU'), encoding='utf-8', engine='python')
except Exception as e:
dataset_url = ''
df_twitter = pd.read_csv(dataset_url)
# Do not write the index that pandas automatically creates
df_twitter.to_csv(file_name, index=False)
# Grab only 10% of the dataset- runs much faster this way
df_twitter = df_twitter.sample(frac=0.1)
df_twitter['tweet_created'] = pd.to_datetime(df_twitter.tweet_created)
df_twitter_train, df_twitter_test = train_test_split(df_twitter, test_size=0.33, random_state=42)
return df_twitter_train, df_twitter_test
def add_date_features_df(df, date_col):
# Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one
# However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here
df.is_copy = False
df[date_col] = pd.to_datetime(df[date_col])
df[date_col + '_day_of_week'] = df[date_col].apply(lambda x: x.weekday()).astype(int, raise_on_error=False)
df[date_col + '_hour'] = df[date_col].apply(lambda x: x.hour).astype(int, raise_on_error=False)
df[date_col + '_minutes_into_day'] = df[date_col].apply(lambda x: x.hour * 60 + x.minute)
except AttributeError:
df[date_col + '_is_weekend'] = df[date_col].apply(lambda x: x.weekday() in (5,6))
df[date_col + '_day_part'] = df[date_col + '_minutes_into_day'].apply(minutes_into_day_parts)
df = df.drop([date_col], axis=1)
return df
# Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
def get_utc_timestamp(dt):
Returns the Timestamp/DatetimeIndex
with either localized or converted to UTC.
dt : Timestamp/DatetimeIndex
the date(s) to be converted
same type as input
date(s) converted to UTC
dt = pd.to_datetime(dt)
dt = dt.tz_localize('UTC')
except TypeError:
dt = dt.tz_convert('UTC')
return dt
def format_dates(self, data, columns):
""" This method translates columns values into datetime objects
:param data: original Pandas dataframe
:param columns: list of columns to cast the date to a datetime object
:type data: pandas.DataFrame
:type columns: list of strings
:returns: Pandas dataframe with updated 'columns' with datetime objects
:rtype: pandas.DataFrame
for column in columns:
if column in data.columns:
data[column] = pandas.to_datetime(data[column])
return data
def __init__(self, begin=None, end=None):
self.calendar ????
type weekday next_td tradeday day_trade night_trade midnight_trade
2016-01-01 2 5 2016-01-04 2016-01-01 True True True
2016-01-02 3 6 2016-01-04 2016-01-04 False False True
:param begin:
:param end:
# ?????, pd.Sereis, date: type
self.holidays = self.get_holiday_json()
self.begin = begin or self.yearbegin()
self.end = end or self.yearend() # ??1?10?
if self.holidays.shape[0]:
end = max(self.holidays.index)
end = pd.to_datetime(end)
self.end = self.end.replace(end.year + 1)
# ????
self.calendar = self.getCalendar()
def ml_regression_build_prediction_test_window(self, req, num_units, rds, dbs):
import pandas as pd
ml_type = req["MLType"]
target_column_name = req["TargetColumnName"] # What column is getting processed?
target_column_values = req["TargetColumnValues"] # Possible values each int in the target_column_name maps to
train_feature_names = req["TrainFeatures"] # Pass in the features to train
source_df = req["SourceDF"]
sample_filter_mask = (source_df["DSName"] != "")
new_df = source_df.iloc[-1 * int(num_units):]
if "Date" in str(source_df.columns):
new_df["Date"] = pd.to_datetime(new_df["Date"], format='%Y-%m-%d') # assuming the Date column is present
if "FDate" in str(source_df.columns):
new_df["FDate"] = pd.to_datetime(new_df["FDate"], format='%Y-%m-%d') # assuming the Future Date column is present
last_row = new_df.iloc[-1]
return new_df
# end of ml_regression_build_prediction_test_window
def _to_dataframe(self, rs):
result = {}
if isinstance(rs, list):
return map(self._to_dataframe, rs)
for key, data in rs.items():
name, tags = key
if tags is None:
key = name
key = (name, tuple(sorted(tags.items())))
df = pd.DataFrame(data)
df.time = pd.to_datetime(df.time)
df.set_index('time', inplace=True)
df.index = df.index.tz_localize('UTC') = None
result[key] = df
return result
def test_query_into_dataframe(self):
data = [
"name": "foo",
"columns": ["time", "sequence_number", "column_one"],
"points": [
[3600, 16, 2], [3600, 15, 1],
[0, 14, 2], [0, 13, 1]
# dataframe sorted ascending by time first, then sequence_number
dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]],
index=pd.to_datetime([0, 0,
3600, 3600],
unit='s', utc=True),
columns=['sequence_number', 'column_one'])
with _mocked_session('get', 200, data):
cli = DataFrameClient('host', 8086, 'username', 'password', 'db')
result = cli.query('select column_one from foo;')
assert_frame_equal(dataframe, result)
def _to_dataframe(self, json_result, time_precision):
dataframe = pd.DataFrame(data=json_result['points'],
if 'sequence_number' in dataframe.keys():
dataframe.sort_values(['time', 'sequence_number'], inplace=True)
dataframe.sort_values(['time'], inplace=True)
pandas_time_unit = time_precision
if time_precision == 'm':
pandas_time_unit = 'ms'
elif time_precision == 'u':
pandas_time_unit = 'us'
dataframe.index = pd.to_datetime(list(dataframe['time']),
del dataframe['time']
return dataframe
def _convert_dataframe_to_json(self, dataframe, name, time_precision='s'):
if not isinstance(dataframe, pd.DataFrame):
raise TypeError('Must be DataFrame, but type was: {0}.'
if not (isinstance(dataframe.index, pd.tseries.period.PeriodIndex) or
isinstance(dataframe.index, pd.tseries.index.DatetimeIndex)):
raise TypeError('Must be DataFrame with DatetimeIndex or \
if isinstance(dataframe.index, pd.tseries.period.PeriodIndex):
dataframe.index = dataframe.index.to_timestamp()
dataframe.index = pd.to_datetime(dataframe.index)
if dataframe.index.tzinfo is None:
dataframe.index = dataframe.index.tz_localize('UTC')
dataframe['time'] = [self._datetime_to_epoch(dt, time_precision)
for dt in dataframe.index]
data = {'name': name,
'columns': [str(column) for column in dataframe.columns],
'points': [self._convert_array(x) for x in dataframe.values]}
return data
def parse_header(self, l_data):
Format the list of the header passe and return a dictionary
:param l_data: list. list with the elements of the parsed row
d_rtn = {}
# Name of file
d_rtn['name_of_file'] = l_data[1]
# Initial date of file
d_rtn['initial_date'] = pd.to_datetime(l_data[2], format='%Y-%m-%d')
d_rtn['initial_date'] = d_rtn['initial_date'].date()
# End date of file
d_rtn['end_date'] = pd.to_datetime(l_data[3], format='%Y-%m-%d')
d_rtn['end_date'] = d_rtn['end_date'].date()
# Contain the total of lines when the file Trailer record
d_rtn['total_of_lines'] = int(l_data[4])
return d_rtn
def setUp(self): = {'id': [chr(97 + c) for c in range(1, 10)],
'x': [50, 50, -10, 0, 0, 5, 15, -3, None],
'y': [0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5],
'cat': ['a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c',
's1': np.ones(9),
's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)],
'somedate': [, 7, 4), datetime.datetime(2022, 1, 1, 13, 57),
datetime.datetime(1990, 12, 9), np.nan,
1990, 12, 9), datetime.datetime(1950, 12, 9),
datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9), datetime.datetime(1950, 12, 9)],
'bool': [True, True, False, True, False, True, True, False, True]
self.df = pd.DataFrame(
self.df['somedate'] = pd.to_datetime(self.df['somedate'])
self.results = describe(self.df)
self.test_dir = tempfile.mkdtemp()
def makeWeekly(data):
columnList = data.columns.tolist()
columnCount = len(columnList)-2
if columnCount < 1:
sys.exit("you need at least 1 column")
data[columnList[0]] = pd.to_datetime(data[columnList[0]])
cl = tuple(columnList[1:-1])
data1 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl], as_index = False)[columnList[-1]].sum()
data2 = data.groupby([pd.Grouper(key = columnList[0], freq='W'), *cl])[columnList[-1]].sum()
data1['week'] = data2.index.get_level_values(columnList[0])
cols = data1.columns.tolist()
cols = cols[-1:] + cols[:-1]
data1 = data1[cols]
return data1
#%% Create Ordering Function
def df_maker(stats, logged_user):
symbol_slug = stats['symbol__symbol']
period_slug = stats['period__period']
system_slug = stats['system__title']
broker_slug = stats['broker__slug']
direction_slug = get_direction(stats=stats)
broker = stats['broker__title']
period = stats['period__name']
symbol = stats['symbol__symbol']
system = stats['system__title']
meta_image = stats['img']
heat_image = stats['heatmap']
yearly_image = stats['yearly_ret']
mc_image = stats['mc']
portfolio = get_index_portfolio(logged_user=logged_user, stats=stats)
in_file = join(settings.DATA_PATH, "performance", "{0}=={1}=={2}=={3}".format(\
broekr_slug_to_title(broker_slug=broker_slug), symbol_slug, period_slug, system_slug))
df = nonasy_df_multi_reader(filename=in_file, limit=settings.LIMIT_ENABLED)
df.index = to_datetime(df.index).to_pydatetime()
return (symbol_slug, period_slug, system_slug, broker_slug, direction_slug, \
broker, period, symbol, system, meta_image, portfolio, df, heat_image, \
yearly_image, mc_image)
def gen_time_data(df):
t = {}
now =
t["ye"] = now.year
t["mo"] = now.month
t["to_day"] =
t["dow"] = now.weekday()
t["prev_day"] = await get_prev_day(d=t["to_day"], mo=t["mo"])
t["prev_mo"] = await get_prev_mo(mo=t["mo"])
t["end_prev_day"] = [30, 31]
df['ts'] = df.index
df['ts'] = to_datetime(df['ts'])
t["df_year"] = df['ts'].ix[-1].to_pydatetime().year
t["df_month"] = df['ts'].ix[-1].to_pydatetime().month
t["df_day"] = df['ts'].ix[-1].to_pydatetime().day
t["df_weekday"] = df['ts'].ix[-1].to_pydatetime().weekday()
return t, df
def make_df(resp) -> Union[pd.DataFrame, Iterable[Tuple[str, pd.DataFrame]]]:
"""Makes list of DataFrames from a response object"""
def maker(series) -> pd.DataFrame:
df = pd.DataFrame(series['values'], columns=series['columns'])
df = df.set_index(pd.to_datetime(df['time'])).drop('time', axis=1) # type: pd.DataFrame
df.index = df.index.tz_localize('UTC') = None
if 'name' in series: = series['name']
return df
df_list = [(series['name'], maker(series))
for statement in resp['results']
for series in statement['series']]
if len(df_list) == 1:
return df_list[0][1]
return df_list
def parse_raw(filepath,seconds=1):
:param filepath: ???????????????
:param seconds: int??????????????
:return: dataframe??index??????columns??????
time_start=data.iloc[1,0]+' '+data.iloc[1,1]
datetime_start=pd.to_datetime(date_start+' '+time_start)
return newdata
def get_year_start_end(dt, first_day=None, last_day=None):
The first and last day of the year for the specified date.
dt: datetime
first_day: datetime
last_day: datetime
datetime, datetime
year_start = first_day if first_day \
else pd.to_datetime(date(dt.year, 1, 1), utc=True)
year_end = last_day if last_day \
else pd.to_datetime(date(dt.year, 12, 31), utc=True)
if year_end > pd.Timestamp.utcnow():
year_end = pd.Timestamp.utcnow().floor('1D')
return year_start, year_end