def test_truncate_ndots(self):
def getndots(s):
return len(re.match('[^\.]*(\.*)', s).groups()[0])
s = Series([0, 2, 3, 6])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace('\n', '')
self.assertEqual(getndots(strrepr), 2)
s = Series([0, 100, 200, 400])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace('\n', '')
self.assertEqual(getndots(strrepr), 3)
python类option_context()的实例源码
test_format.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
test_format.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def test_output_significant_digits(self):
# Issue #9764
# In case default display precision changes:
with pd.option_context('display.precision', 6):
# DataFrame example from issue #9764
d = pd.DataFrame(
{'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7,
5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6,
4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
expected_output = {
(0, 6):
' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
(1, 6):
' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07',
(1, 8):
' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07',
(8, 16):
' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06',
(9, 16):
' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006'
}
for (start, stop), v in expected_output.items():
self.assertEqual(str(d[start:stop]), v)
test_format.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 42
收藏 0
点赞 0
评论 0
def test_too_long(self):
# GH 10451
with pd.option_context('display.precision', 4):
# need both a number > 1e6 and something that normally formats to
# having length > display.precision + 6
df = pd.DataFrame(dict(x=[12345.6789]))
self.assertEqual(str(df), ' x\n0 12345.6789')
df = pd.DataFrame(dict(x=[2e6]))
self.assertEqual(str(df), ' x\n0 2000000.0')
df = pd.DataFrame(dict(x=[12345.6789, 2e6]))
self.assertEqual(
str(df), ' x\n0 1.2346e+04\n1 2.0000e+06')
test_style.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def test_precision(self):
with pd.option_context('display.precision', 10):
s = Styler(self.df)
self.assertEqual(s.precision, 10)
s = Styler(self.df, precision=2)
self.assertEqual(s.precision, 2)
s2 = s.set_precision(4)
self.assertTrue(s is s2)
self.assertEqual(s.precision, 4)
def _main(log_path, show_browser=False):
print(log_path)
df, metadata = process_log(log_path)
del df['Timestamp']
df['Msg Type'] = df['Msg Type'].apply(escape_html_chars)
df['Message'] = df['Message'].apply(escape_html_chars)
# df['Message'] = df['Message'].apply(try_json)
df['Message'] = df.apply(lambda row: format_error(row['Msg Type'], row['Message']), 1)
df['Rev ID'] = df['Rev ID'].apply(lambda x: '<a href="https://www.wikidata.org/w/index.php?oldid={}&diff=prev">{}</a>'.format(x,x) if x else x)
level_counts, info_counts, warning_counts, error_counts = generate_summary(df)
warnings_df = df.query("Level == 'WARNING'")
warnings_df.is_copy = False
del warnings_df['Level']
if not warnings_df.empty:
warnings_df = gen_ext_id_links(warnings_df)
warnings_df = url_qid(warnings_df, "QID")
errors_df = df.query("Level == 'ERROR'")
errors_df.is_copy = False
del errors_df['Level']
if not errors_df.empty:
errors_df = gen_ext_id_links(errors_df)
errors_df = url_qid(errors_df, "QID")
# errors_df['Message'] = errors_df['Message'].apply(try_format_error)
info_df = df.query("Level == 'INFO'")
info_df.is_copy = False
del info_df['Level']
if not info_df.empty:
info_df = gen_ext_id_links(info_df)
info_df = url_qid(info_df, "QID")
info_df.Message = info_df.Message.str.replace("SKIP", "No Action")
with pd.option_context('display.max_colwidth', -1):
# this class nonsense is an ugly hack: https://stackoverflow.com/questions/15079118/js-datatables-from-pandas/41536906
level_counts = level_counts.to_frame().to_html(escape=False)
info_counts = info_counts.to_frame().to_html(escape=False)
warning_counts = warning_counts.to_frame().to_html(escape=False)
error_counts = error_counts.to_frame().to_html(escape=False)
info_df = info_df.to_html(escape=False, classes='df" id = "info_df')
warnings_df = warnings_df.to_html(escape=False, classes='df" id = "warning_df')
errors_df = errors_df.to_html(escape=False, classes='df" id = "error_df')
template = Template(open(os.path.join(sys.path[0], "template.html")).read())
s = template.render(name=metadata['name'], run_id=metadata['run_id'],
level_counts=level_counts,
info_counts=info_counts,
warning_counts=warning_counts,
error_counts=error_counts,
warnings_df=warnings_df, errors_df=errors_df, info_df=info_df)
out_path = log_path.rsplit(".", 1)[0] + ".html"
with open(out_path, 'w') as f:
f.write(s)
if show_browser:
webbrowser.open(out_path)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('logfiles', type=str, nargs='+')
parser.add_argument('--fields', type=str, default='ret,avglen,ent,kl,vf_r2,ttotal')
parser.add_argument('--noplot', action='store_true')
parser.add_argument('--plotfile', type=str, default=None)
parser.add_argument('--range_end', type=int, default=None)
args = parser.parse_args()
assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'
fields = args.fields.split(',')
# Load logs from all files
fname2log = {}
for fname in args.logfiles:
if ':' in fname:
os.system('rsync -avrz {} /tmp'.format(fname))
fname = os.path.join('/tmp', os.path.basename(fname))
with pd.HDFStore(fname, 'r') as f:
assert fname not in fname2log
df = f['log']
df.set_index('iter', inplace=True)
fname2log[fname] = df.loc[:args.range_end, fields]
# Print
if not args.noplot or args.plotfile is not None:
import matplotlib
if args.plotfile is not None:
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.style.use('seaborn-colorblind')
ax = None
for fname, df in fname2log.items():
with pd.option_context('display.max_rows', 9999):
print(fname)
print(df[-1:])
if 'vf_r2' in df.keys():
df['vf_r2'] = np.maximum(0, df['vf_r2'])
if not args.noplot:
if ax is None:
ax = df.plot(subplots=True, title=','.join(args.logfiles))
else:
df.plot(subplots=True, title=','.join(args.logfiles), ax=ax, legend=False)
if args.plotfile is not None:
plt.savefig(args.plotfile, transparent=True, bbox_inches='tight', dpi=300)
elif not args.noplot:
plt.show()
def transform(self, X, y=None):
# Suppress SettingWithCopyWarning (alternatively: add a X = X.copy()
with pd.option_context('mode.chained_assignment', None):
# --- Convert Embarked
mapping = {'S': 0,
'C': 1,
'Q': 2,
}
X.loc[:, 'Embarked'] = X.loc[:, 'Embarked'].replace(mapping, inplace=False)
# --- Convert Sex
mapping = {'female': 0,
'male': 1
}
X.loc[:, 'Sex'] = X['Sex'].replace(mapping, inplace=False)
# --- Convert Name to Title
X.loc[:, 'Title'] = X['Name'].map(lambda name: name.split(',')[1].split('.')[0].strip())
# a map of more aggregated titles
mapping = {
"Capt": 0, # Officer
"Col": 0, # Officer
"Major": 0, # Officer
"Jonkheer": 1, # Royalty
"Don": 1, # Royalty
"Sir": 1, # Royalty
"Dr": 0, # Officer
"Rev": 0, # Officer
"the Countess": 1, # Royalty
"Dona": 1, # Royalty
"Mme": 2, # "Mrs"
"Mlle": 3, # "Miss"
"Ms": 2, # "Mrs"
"Mr": 4, # "Mr"
"Mrs": 2, # "Mrs"
"Miss": 3, # "Miss"
"Master": 5, # "Master"
"Lady": 1 # "Royalty"
}
X.loc[:, 'Title'] = X['Title'].map(mapping)
X = X.drop('Name', 1)
return X
test_base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def test_representation_to_series(self):
idx1 = DatetimeIndex([], freq='D')
idx2 = DatetimeIndex(['2011-01-01'], freq='D')
idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
idx4 = DatetimeIndex(
['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H', tz='Asia/Tokyo')
idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
tz='US/Eastern')
idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15'])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = """0 2011-01-01
dtype: datetime64[ns]"""
exp3 = """0 2011-01-01
1 2011-01-02
dtype: datetime64[ns]"""
exp4 = """0 2011-01-01
1 2011-01-02
2 2011-01-03
dtype: datetime64[ns]"""
exp5 = """0 2011-01-01 09:00:00+09:00
1 2011-01-01 10:00:00+09:00
2 2011-01-01 11:00:00+09:00
dtype: datetime64[ns, Asia/Tokyo]"""
exp6 = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 NaT
dtype: datetime64[ns, US/Eastern]"""
exp7 = """0 2011-01-01 09:00:00
1 2011-01-02 10:15:00
dtype: datetime64[ns]"""
with pd.option_context('display.width', 300):
for idx, expected in zip([idx1, idx2, idx3, idx4,
idx5, idx6, idx7],
[exp1, exp2, exp3, exp4,
exp5, exp6, exp7]):
result = repr(Series(idx))
self.assertEqual(result, expected)
test_format.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_repr_max_columns_max_rows(self):
term_width, term_height = get_terminal_size()
if term_width < 10 or term_height < 10:
raise nose.SkipTest("terminal size too small, "
"{0} x {1}".format(term_width, term_height))
def mkframe(n):
index = ['%05d' % i for i in range(n)]
return DataFrame(0, index, index)
df6 = mkframe(6)
df10 = mkframe(10)
with option_context('mode.sim_interactive', True):
with option_context('display.width', term_width * 2):
with option_context('display.max_rows', 5,
'display.max_columns', 5):
self.assertFalse(has_expanded_repr(mkframe(4)))
self.assertFalse(has_expanded_repr(mkframe(5)))
self.assertFalse(has_expanded_repr(df6))
self.assertTrue(has_doubly_truncated_repr(df6))
with option_context('display.max_rows', 20,
'display.max_columns', 10):
# Out off max_columns boundary, but no extending
# since not exceeding width
self.assertFalse(has_expanded_repr(df6))
self.assertFalse(has_truncated_repr(df6))
with option_context('display.max_rows', 9,
'display.max_columns', 10):
# out vertical bounds can not result in exanded repr
self.assertFalse(has_expanded_repr(df10))
self.assertTrue(has_vertically_truncated_repr(df10))
# width=None in terminal, auto detection
with option_context('display.max_columns', 100, 'display.max_rows',
term_width * 20, 'display.width', None):
df = mkframe((term_width // 7) - 2)
self.assertFalse(has_expanded_repr(df))
df = mkframe((term_width // 7) + 2)
com.pprint_thing(df._repr_fits_horizontal_())
self.assertTrue(has_expanded_repr(df))
clipboard.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
"""
Attempt to write text representation of object to the system clipboard
The clipboard can be then pasted into Excel for example.
Parameters
----------
obj : the object to write to the clipboard
excel : boolean, defaults to True
if True, use the provided separator, writing in a csv
format for allowing easy pasting into excel.
if False, write a string representation of the object
to the clipboard
sep : optional, defaults to tab
other keywords are passed to to_csv
Notes
-----
Requirements for your platform
- Linux: xclip, or xsel (with gtk or PyQt4 modules)
- Windows:
- OS X:
"""
from pandas.util.clipboard import clipboard_set
if excel is None:
excel = True
if excel:
try:
if sep is None:
sep = '\t'
buf = StringIO()
obj.to_csv(buf, sep=sep, **kwargs)
clipboard_set(buf.getvalue())
return
except:
pass
if isinstance(obj, DataFrame):
# str(df) has various unhelpful defaults, like truncation
with option_context('display.max_colwidth', 999999):
objstr = obj.to_string(**kwargs)
else:
objstr = str(obj)
clipboard_set(objstr)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('logfiles', type=str, nargs='+')
parser.add_argument('--fields', type=str, default='trueret,avglen,ent,kl,vf_r2,vf_kl,tdvf_r2,rloss,racc')
parser.add_argument('--noplot', action='store_true')
parser.add_argument('--plotfile', type=str, default=None)
parser.add_argument('--range_end', type=int, default=None)
args = parser.parse_args()
assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'
fields = args.fields.split(',')
# Load logs from all files
fname2log = {}
for fname in args.logfiles:
with pd.HDFStore(fname, 'r') as f:
assert fname not in fname2log
df = f['log']
df.set_index('iter', inplace=True)
fname2log[fname] = df.loc[:args.range_end, fields]
# Print stuff
if not args.noplot or args.plotfile is not None:
import matplotlib
if args.plotfile is not None:
matplotlib.use('Agg')
import matplotlib.pyplot as plt; plt.style.use('ggplot')
ax = None
for fname, df in fname2log.items():
with pd.option_context('display.max_rows', 9999):
print fname
print df[-1:]
df['vf_r2'] = np.maximum(0,df['vf_r2'])
if ax is None:
ax = df.plot(subplots=True, title=fname)
else:
df.plot(subplots=True, title=fname, ax=ax, legend=False)
if not args.noplot:
plt.show()
if args.plotfile is not None:
plt.savefig(args.plotfile, bbox_inches='tight', dpi=200)
def _print_df_scores(df_scores, score_types, indent=''):
"""Pretty print the scores dataframe.
Parameters
----------
df_scores : pd.DataFrame
the score dataframe
score_types : list of score types
a list of score types to use
indent : str, default=''
indentation if needed
"""
try:
# try to re-order columns/rows in the printed array
# we may not have all train, valid, test, so need to select
index_order = np.array(['train', 'valid', 'test'])
ordered_index = index_order[np.isin(index_order, df_scores.index)]
df_scores = df_scores.loc[
ordered_index, [score_type.name for score_type in score_types]]
except Exception:
_print_warning("Couldn't re-order the score matrix..")
with pd.option_context("display.width", 160):
df_repr = repr(df_scores)
df_repr_out = []
for line, color_key in zip(df_repr.splitlines(),
[None, None] +
list(df_scores.index.values)):
if line.strip() == 'step':
continue
if color_key is None:
# table header
line = stylize(line, fg(fg_colors['title']) + attr('bold'))
if color_key is not None:
tokens = line.split()
tokens_bak = tokens[:]
if 'official_' + color_key in fg_colors:
# line label and official score bold & bright
label_color = fg(fg_colors['official_' + color_key])
tokens[0] = stylize(tokens[0], label_color + attr('bold'))
tokens[1] = stylize(tokens[1], label_color + attr('bold'))
if color_key in fg_colors:
# other scores pale
tokens[2:] = [stylize(token, fg(fg_colors[color_key]))
for token in tokens[2:]]
for token_from, token_to in zip(tokens_bak, tokens):
line = line.replace(token_from, token_to)
line = indent + line
df_repr_out.append(line)
print('\n'.join(df_repr_out))