python类option_context()的实例源码

test_format.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_truncate_ndots(self):
        def getndots(s):
            return len(re.match('[^\.]*(\.*)', s).groups()[0])

        s = Series([0, 2, 3, 6])
        with option_context("display.max_rows", 2):
            strrepr = repr(s).replace('\n', '')
        self.assertEqual(getndots(strrepr), 2)

        s = Series([0, 100, 200, 400])
        with option_context("display.max_rows", 2):
            strrepr = repr(s).replace('\n', '')
        self.assertEqual(getndots(strrepr), 3)
test_format.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def test_output_significant_digits(self):
        # Issue #9764

        # In case default display precision changes:
        with pd.option_context('display.precision', 6):
            # DataFrame example from issue #9764
            d = pd.DataFrame(
                {'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7,
                          5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6,
                          4.999e-6, 5e-6, 5.0001e-6, 6e-6]})

            expected_output = {
                (0, 6):
                '           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
                (1, 6):
                '           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
                (1, 8):
                '           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
                (8, 16):
                '            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
                (9, 16):
                '        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
            }

            for (start, stop), v in expected_output.items():
                self.assertEqual(str(d[start:stop]), v)
test_format.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def test_too_long(self):
        # GH 10451
        with pd.option_context('display.precision', 4):
            # need both a number > 1e6 and something that normally formats to
            # having length > display.precision + 6
            df = pd.DataFrame(dict(x=[12345.6789]))
            self.assertEqual(str(df), '            x\n0  12345.6789')
            df = pd.DataFrame(dict(x=[2e6]))
            self.assertEqual(str(df), '           x\n0  2000000.0')
            df = pd.DataFrame(dict(x=[12345.6789, 2e6]))
            self.assertEqual(
                str(df), '            x\n0  1.2346e+04\n1  2.0000e+06')
test_style.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_precision(self):
        with pd.option_context('display.precision', 10):
            s = Styler(self.df)
        self.assertEqual(s.precision, 10)
        s = Styler(self.df, precision=2)
        self.assertEqual(s.precision, 2)

        s2 = s.set_precision(4)
        self.assertTrue(s is s2)
        self.assertEqual(s.precision, 4)
bot_log_parser.py 文件源码 项目:scheduled-bots 作者: SuLab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _main(log_path, show_browser=False):
    print(log_path)
    df, metadata = process_log(log_path)
    del df['Timestamp']
    df['Msg Type'] = df['Msg Type'].apply(escape_html_chars)
    df['Message'] = df['Message'].apply(escape_html_chars)
    # df['Message'] = df['Message'].apply(try_json)
    df['Message'] = df.apply(lambda row: format_error(row['Msg Type'], row['Message']), 1)
    df['Rev ID'] = df['Rev ID'].apply(lambda x: '<a href="https://www.wikidata.org/w/index.php?oldid={}&diff=prev">{}</a>'.format(x,x) if x else x)

    level_counts, info_counts, warning_counts, error_counts = generate_summary(df)

    warnings_df = df.query("Level == 'WARNING'")
    warnings_df.is_copy = False
    del warnings_df['Level']
    if not warnings_df.empty:
        warnings_df = gen_ext_id_links(warnings_df)
        warnings_df = url_qid(warnings_df, "QID")

    errors_df = df.query("Level == 'ERROR'")
    errors_df.is_copy = False
    del errors_df['Level']
    if not errors_df.empty:
        errors_df = gen_ext_id_links(errors_df)
        errors_df = url_qid(errors_df, "QID")
        # errors_df['Message'] = errors_df['Message'].apply(try_format_error)

    info_df = df.query("Level == 'INFO'")
    info_df.is_copy = False
    del info_df['Level']
    if not info_df.empty:
        info_df = gen_ext_id_links(info_df)
        info_df = url_qid(info_df, "QID")
        info_df.Message = info_df.Message.str.replace("SKIP", "No Action")

    with pd.option_context('display.max_colwidth', -1):
        # this class nonsense is an ugly hack: https://stackoverflow.com/questions/15079118/js-datatables-from-pandas/41536906
        level_counts = level_counts.to_frame().to_html(escape=False)
        info_counts = info_counts.to_frame().to_html(escape=False)
        warning_counts = warning_counts.to_frame().to_html(escape=False)
        error_counts = error_counts.to_frame().to_html(escape=False)
        info_df = info_df.to_html(escape=False, classes='df" id = "info_df')
        warnings_df = warnings_df.to_html(escape=False, classes='df" id = "warning_df')
        errors_df = errors_df.to_html(escape=False, classes='df" id = "error_df')

    template = Template(open(os.path.join(sys.path[0], "template.html")).read())

    s = template.render(name=metadata['name'], run_id=metadata['run_id'],
                        level_counts=level_counts,
                        info_counts=info_counts,
                        warning_counts=warning_counts,
                        error_counts=error_counts,
                        warnings_df=warnings_df, errors_df=errors_df, info_df=info_df)
    out_path = log_path.rsplit(".", 1)[0] + ".html"
    with open(out_path, 'w') as f:
        f.write(s)

    if show_browser:
        webbrowser.open(out_path)
showlog.py 文件源码 项目:rltools 作者: sisl 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('logfiles', type=str, nargs='+')
    parser.add_argument('--fields', type=str, default='ret,avglen,ent,kl,vf_r2,ttotal')
    parser.add_argument('--noplot', action='store_true')
    parser.add_argument('--plotfile', type=str, default=None)
    parser.add_argument('--range_end', type=int, default=None)
    args = parser.parse_args()

    assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'

    fields = args.fields.split(',')

    # Load logs from all files
    fname2log = {}
    for fname in args.logfiles:
        if ':' in fname:
            os.system('rsync -avrz {} /tmp'.format(fname))
            fname = os.path.join('/tmp', os.path.basename(fname))
        with pd.HDFStore(fname, 'r') as f:
            assert fname not in fname2log
            df = f['log']
            df.set_index('iter', inplace=True)
            fname2log[fname] = df.loc[:args.range_end, fields]

    # Print
    if not args.noplot or args.plotfile is not None:
        import matplotlib
        if args.plotfile is not None:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        plt.style.use('seaborn-colorblind')

    ax = None
    for fname, df in fname2log.items():
        with pd.option_context('display.max_rows', 9999):
            print(fname)
            print(df[-1:])

        if 'vf_r2' in df.keys():
            df['vf_r2'] = np.maximum(0, df['vf_r2'])

        if not args.noplot:
            if ax is None:
                ax = df.plot(subplots=True, title=','.join(args.logfiles))
            else:
                df.plot(subplots=True, title=','.join(args.logfiles), ax=ax, legend=False)
    if args.plotfile is not None:
        plt.savefig(args.plotfile, transparent=True, bbox_inches='tight', dpi=300)
    elif not args.noplot:
        plt.show()
comp_common.py 文件源码 项目:Titanic 作者: GeoffBreemer 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def transform(self, X, y=None):
        # Suppress SettingWithCopyWarning (alternatively: add a X = X.copy()
        with pd.option_context('mode.chained_assignment', None):
            # --- Convert Embarked
            mapping = {'S': 0,
                       'C': 1,
                       'Q': 2,
                       }
            X.loc[:, 'Embarked'] = X.loc[:, 'Embarked'].replace(mapping, inplace=False)

            # --- Convert Sex
            mapping = {'female': 0,
                       'male': 1
                       }
            X.loc[:, 'Sex'] = X['Sex'].replace(mapping, inplace=False)

            # --- Convert Name to Title
            X.loc[:, 'Title'] = X['Name'].map(lambda name: name.split(',')[1].split('.')[0].strip())

            # a map of more aggregated titles
            mapping = {
                "Capt": 0,  # Officer
                "Col": 0,  # Officer
                "Major": 0,  # Officer
                "Jonkheer": 1,  # Royalty
                "Don": 1,  # Royalty
                "Sir": 1,  # Royalty
                "Dr": 0,  # Officer
                "Rev": 0,  # Officer
                "the Countess": 1,  # Royalty
                "Dona": 1,  # Royalty
                "Mme": 2,  # "Mrs"
                "Mlle": 3,  # "Miss"
                "Ms": 2,  # "Mrs"
                "Mr": 4,  # "Mr"
                "Mrs": 2,  # "Mrs"
                "Miss": 3,  # "Miss"
                "Master": 5,  # "Master"
                "Lady": 1  # "Royalty"
            }
            X.loc[:, 'Title'] = X['Title'].map(mapping)

        X = X.drop('Name', 1)
        return X
test_base.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_representation_to_series(self):
        idx1 = DatetimeIndex([], freq='D')
        idx2 = DatetimeIndex(['2011-01-01'], freq='D')
        idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
        idx4 = DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
        idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
                              '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo')
        idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
                             tz='US/Eastern')
        idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15'])

        exp1 = """Series([], dtype: datetime64[ns])"""

        exp2 = """0   2011-01-01
dtype: datetime64[ns]"""

        exp3 = """0   2011-01-01
1   2011-01-02
dtype: datetime64[ns]"""

        exp4 = """0   2011-01-01
1   2011-01-02
2   2011-01-03
dtype: datetime64[ns]"""

        exp5 = """0   2011-01-01 09:00:00+09:00
1   2011-01-01 10:00:00+09:00
2   2011-01-01 11:00:00+09:00
dtype: datetime64[ns, Asia/Tokyo]"""

        exp6 = """0   2011-01-01 09:00:00-05:00
1   2011-01-01 10:00:00-05:00
2                         NaT
dtype: datetime64[ns, US/Eastern]"""

        exp7 = """0   2011-01-01 09:00:00
1   2011-01-02 10:15:00
dtype: datetime64[ns]"""

        with pd.option_context('display.width', 300):
            for idx, expected in zip([idx1, idx2, idx3, idx4,
                                      idx5, idx6, idx7],
                                     [exp1, exp2, exp3, exp4,
                                      exp5, exp6, exp7]):
                result = repr(Series(idx))
                self.assertEqual(result, expected)
test_format.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_repr_max_columns_max_rows(self):
        term_width, term_height = get_terminal_size()
        if term_width < 10 or term_height < 10:
            raise nose.SkipTest("terminal size too small, "
                                "{0} x {1}".format(term_width, term_height))

        def mkframe(n):
            index = ['%05d' % i for i in range(n)]
            return DataFrame(0, index, index)

        df6 = mkframe(6)
        df10 = mkframe(10)
        with option_context('mode.sim_interactive', True):
            with option_context('display.width', term_width * 2):
                with option_context('display.max_rows', 5,
                                    'display.max_columns', 5):
                    self.assertFalse(has_expanded_repr(mkframe(4)))
                    self.assertFalse(has_expanded_repr(mkframe(5)))
                    self.assertFalse(has_expanded_repr(df6))
                    self.assertTrue(has_doubly_truncated_repr(df6))

                with option_context('display.max_rows', 20,
                                    'display.max_columns', 10):
                    # Out off max_columns boundary, but no extending
                    # since not exceeding width
                    self.assertFalse(has_expanded_repr(df6))
                    self.assertFalse(has_truncated_repr(df6))

                with option_context('display.max_rows', 9,
                                    'display.max_columns', 10):
                    # out vertical bounds can not result in exanded repr
                    self.assertFalse(has_expanded_repr(df10))
                    self.assertTrue(has_vertically_truncated_repr(df10))

            # width=None in terminal, auto detection
            with option_context('display.max_columns', 100, 'display.max_rows',
                                term_width * 20, 'display.width', None):
                df = mkframe((term_width // 7) - 2)
                self.assertFalse(has_expanded_repr(df))
                df = mkframe((term_width // 7) + 2)
                com.pprint_thing(df._repr_fits_horizontal_())
                self.assertTrue(has_expanded_repr(df))
clipboard.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
    """
    Attempt to write text representation of object to the system clipboard
    The clipboard can be then pasted into Excel for example.

    Parameters
    ----------
    obj : the object to write to the clipboard
    excel : boolean, defaults to True
            if True, use the provided separator, writing in a csv
            format for allowing easy pasting into excel.
            if False, write a string representation of the object
            to the clipboard
    sep : optional, defaults to tab
    other keywords are passed to to_csv

    Notes
    -----
    Requirements for your platform
      - Linux: xclip, or xsel (with gtk or PyQt4 modules)
      - Windows:
      - OS X:
    """
    from pandas.util.clipboard import clipboard_set
    if excel is None:
        excel = True

    if excel:
        try:
            if sep is None:
                sep = '\t'
            buf = StringIO()
            obj.to_csv(buf, sep=sep, **kwargs)
            clipboard_set(buf.getvalue())
            return
        except:
            pass

    if isinstance(obj, DataFrame):
        # str(df) has various unhelpful defaults, like truncation
        with option_context('display.max_colwidth', 999999):
            objstr = obj.to_string(**kwargs)
    else:
        objstr = str(obj)
    clipboard_set(objstr)
showlog.py 文件源码 项目:anirban-imitation 作者: Santara 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('logfiles', type=str, nargs='+')
    parser.add_argument('--fields', type=str, default='trueret,avglen,ent,kl,vf_r2,vf_kl,tdvf_r2,rloss,racc')
    parser.add_argument('--noplot', action='store_true')
    parser.add_argument('--plotfile', type=str, default=None)
    parser.add_argument('--range_end', type=int, default=None)
    args = parser.parse_args()

    assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'

    fields = args.fields.split(',')

    # Load logs from all files
    fname2log = {}
    for fname in args.logfiles:
        with pd.HDFStore(fname, 'r') as f:
            assert fname not in fname2log
            df = f['log']
            df.set_index('iter', inplace=True)
            fname2log[fname] = df.loc[:args.range_end, fields]


    # Print stuff
    if not args.noplot or args.plotfile is not None:
        import matplotlib
        if args.plotfile is not None:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt; plt.style.use('ggplot')

        ax = None
        for fname, df in fname2log.items():
            with pd.option_context('display.max_rows', 9999):
                print fname
                print df[-1:]


            df['vf_r2'] = np.maximum(0,df['vf_r2'])

            if ax is None:
                ax = df.plot(subplots=True, title=fname)
            else:
                df.plot(subplots=True, title=fname, ax=ax, legend=False)
        if not args.noplot:
            plt.show()
        if args.plotfile is not None:
            plt.savefig(args.plotfile, bbox_inches='tight', dpi=200)
testing.py 文件源码 项目:ramp-workflow 作者: paris-saclay-cds 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def _print_df_scores(df_scores, score_types, indent=''):
    """Pretty print the scores dataframe.

    Parameters
    ----------
    df_scores : pd.DataFrame
        the score dataframe
    score_types : list of score types
        a list of score types to use
    indent : str, default=''
        indentation if needed
    """
    try:
        # try to re-order columns/rows in the printed array
        # we may not have all train, valid, test, so need to select
        index_order = np.array(['train', 'valid', 'test'])
        ordered_index = index_order[np.isin(index_order, df_scores.index)]
        df_scores = df_scores.loc[
            ordered_index, [score_type.name for score_type in score_types]]
    except Exception:
        _print_warning("Couldn't re-order the score matrix..")
    with pd.option_context("display.width", 160):
        df_repr = repr(df_scores)
    df_repr_out = []
    for line, color_key in zip(df_repr.splitlines(),
                               [None, None] +
                               list(df_scores.index.values)):
        if line.strip() == 'step':
            continue
        if color_key is None:
            # table header
            line = stylize(line, fg(fg_colors['title']) + attr('bold'))
        if color_key is not None:
            tokens = line.split()
            tokens_bak = tokens[:]
            if 'official_' + color_key in fg_colors:
                # line label and official score bold & bright
                label_color = fg(fg_colors['official_' + color_key])
                tokens[0] = stylize(tokens[0], label_color + attr('bold'))
                tokens[1] = stylize(tokens[1], label_color + attr('bold'))
            if color_key in fg_colors:
                # other scores pale
                tokens[2:] = [stylize(token, fg(fg_colors[color_key]))
                              for token in tokens[2:]]
            for token_from, token_to in zip(tokens_bak, tokens):
                line = line.replace(token_from, token_to)
        line = indent + line
        df_repr_out.append(line)
    print('\n'.join(df_repr_out))


问题


面经


文章

微信
公众号

扫码关注公众号