python类read_json()的实例源码

utils.py 文件源码 项目:bigquery-bokeh-dashboard 作者: GoogleCloudPlatform 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def run_query(query, cache_key, expire=3600, dialect='legacy'):
    memcached_client = memcached_discovery.get_client()
    if memcached_client is None:
        return _run(query, dialect=dialect)
    else:
        json = memcached_client.get(cache_key)
        if json is not None:
            df = pd.read_json(json, orient='records')
        else:
            df = _run(query, dialect=dialect)
            memcached_client.set(cache_key, df.to_json(orient='records'), expire=expire)
        return df
build_model.py 文件源码 项目:Guess-Genre-By-Lyrics 作者: ormatt 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def main():
    start_time = time.time()
    args = parse_args()
    logger.setLevel(getattr(logging, args.verbosity.upper()))
    logger.info("Started")

    build_constants()

    df = pd.read_json(path_or_buf=DATA_PATH, orient='records', encoding="UTF8")
    logger.debug("Loaded {} rows into df".format(len(df)))

    df = utils.get_data_subset.crop(df, None, None)
    df = utils.get_data_subset.filter_rows_by_string(df,
                                                     [TARGET_COL],
                                                     ['Rock',
                                                      'Hip Hop'])
    df = utils.clean_data.execute_cleaners(df)
    df = utils.normalize_data.normalize_genres(df, TARGET_COL)
    X, y = utils.get_data_subset.get_x_y(df, SAMPLE_COL, TARGET_COL)

    clf = model_pipeline.get_pipeline(SAMPLE_COL)

    utils.persistence.dump(DF_DUMP_NAME, df)
    utils.persistence.dump(CLF_DUMP_NAME, clf)

    if args.train:
        train_and_test.train_and_dump(X, y, clf)
    elif args.test:
        train_and_test.test_using_kfold(X, y, clf)

    logger.info("Finished in {0:.2f} seconds".format(time.time() - start_time))
enigma.py 文件源码 项目:cjworkbench 作者: CJWorkbench 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def handle_dotio_url(wf_module, url, split_url, num_rows):
    """
    Processes response for any request to enigma.io. Here, we assume that the API key is provided,
    because, at least at first glance (or two or three) there doesn't seem to be any provisions for
    accessing dataset endpoints sans API key.
    """

    if num_rows > 500:
        wf_module.set_error("You can request a maximum of 500 rows.")
        return

    if "/limit/" not in url:
        if url.endswith('/'):
            url += "limit/{}".format(num_rows)
        else:
            url += "/limit/{}".format(num_rows)

    response = requests.get(url)
    if response.status_code != 200:
        error = json.loads(response.text)
        if "message" in error:
            message = error["message"]
        else:
            message = error["info"]["message"]
            if "additional" in error["info"]:
               message += ": " + error["info"]["additional"]["message"]
        wf_module.set_error("Unable to retrieve data from Enigma. Received {} status, with message {}"
            .format(response.status_code, message))
        return
    try:
        json_text = json.loads(response.text)
        table = pd.read_json(json.dumps(json_text['result']))
        return table
    except Exception as ex: # Generic exceptions suck, but is it the most pragmatic/all-encompassing here?
        wf_module.set_error("Unable to process request: {}".format(str(ex)))
        return
__init__.py 文件源码 项目:jupyter-handsontables 作者: techmuch 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _from_json(self, value, obj=None):
        if value is not None:
            df = pd.read_json(json.dumps(value), orient="split")
        else:
            df = pd.DataFrame()
        return df
__init__.py 文件源码 项目:jupyter-handsontables 作者: techmuch 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def _from_json(self, value, obj=None):
        if value is not None:
            df = pd.read_json(json.dumps(value), orient="split")
        else:
            df = pd.DataFrame()
        return df
pycore.py 文件源码 项目:datanode 作者: jay-johnson 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def pd_json_to_df(self, data_json, sorted_by_key="Date", in_ascending=True):
        import pandas as pd
        new_df  = pd.read_json(data_json).sort_values(by=sorted_by_key, ascending=in_ascending)
        return new_df
    # end of pd_json_to_df
medium_posts_data_reader.py 文件源码 项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def read_posts():
    posts = list()
    file_in = open('./post_list.txt', 'r')
    post_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for post_id in post_list:
        if not post_id:
            continue
        if not os.path.exists('./data/Posts/%s.json' % post_id):
            continue
        try:
            file_in = open('./data/Posts/%s.json' % post_id, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            post = dict()
            post['post_id'] = post_id
            post['published_date'] = raw_data['published_date']
            post['recommends'] = raw_data['recommends']
            post['responses'] = raw_data['responses']
            posts.append(post)
        except:
            continue
        num += 1
        print(post_id)
        print(num)
    return pd.read_json(json.dumps(posts))
medium_tags_data_reader.py 文件源码 项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_posts():
    posts = list()
    file_in = open('./post_list.txt', 'r')
    post_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for post_id in post_list:
        if not post_id:
            continue
        if not os.path.exists('./data/Posts/%s.json' % post_id):
            continue
        try:
            file_in = open('./data/Posts/%s.json' % post_id, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            for tag in raw_data['tags']:
                post = dict()
                post['post_id'] = post_id
                post['published_date'] = raw_data['published_date']
                post['recommends'] = raw_data['recommends']
                post['responses'] = raw_data['responses']
                post['tag'] = tag['name']
                posts.append(post)
                print(post)
        except:
            continue
        num += 1
        print(post_id)
        print(num)
    return pd.read_json(json.dumps(posts))
medium_users_data_reader.py 文件源码 项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def read_users():
    users = list()
    file_in = open('./username_list.txt', 'r')
    username_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for username in username_list:
        if not username:
            continue
        if not os.path.exists('./data/Users/%s.json' % username):
            continue
        try:
            file_in = open('./data/Users/%s.json' % username, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            user = dict()
            user['username'] = username
            user['reg_date'] = datetime.date.fromtimestamp(raw_data['profile']['user']['createdAt']/1000.0).isoformat()
            if not raw_data['profile']['user']['lastPostCreatedAt']:
                raw_data['profile']['user']['lastPostCreatedAt'] = raw_data['profile']['user']['createdAt']
            user['last_post_date'] = datetime.date.fromtimestamp(raw_data['profile']['user']['lastPostCreatedAt']/1000.0).isoformat()
            user['posts_count'] = raw_data['profile']['numberOfPostsPublished']
            user['following_count'] = raw_data['profile']['user']['socialStats']['usersFollowedCount']
            user['followers_count'] = raw_data['profile']['user']['socialStats']['usersFollowedByCount']
            users.append(user)
        except:
            continue
        num += 1
        print(username)
        print(num)
    return pd.read_json(json.dumps(users))
realtimeProCon.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def data_received(self, data):
        updateOZ_event.data=pd.read_json(data.decode())
        updateOZ_event.set()
realtimeProCon.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def handle_OZServer(loop):
    reader, writer = yield from asyncio.open_connection('127.0.0.1', 2222,loop=loop)
    symbolList=list()
    while True:
        if updateOZ_event.is_set():
            print('In Server send')
            updateOZ_event.clear()
            for element in updateOZ_event.data :
                writer.write(('Add_'+ element+'_End').encode())
            writer.write('Send'.encode())

            outputbuffer = StringIO()
            condition = True
            while condition:
                data =  yield from reader.read(1024)
                message=data.decode()
                if message.find('!ENDMSG!') != -1:
                    message = message.replace('!ENDMSG!', '')
                    condition = False
                    print('End found')

                outputbuffer.write(message)

            outputbuffer.seek(0)
            DF=pd.read_json(outputbuffer)
            #print(DF)
            yield from updateOZ_queue.put(DF)
        yield None

    writer.close()
    reader.close()
helpers.py 文件源码 项目:spotlight 作者: maciejkula 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _load_data(filename, columns=None):

    data = pd.read_json(filename, lines=True)
    data = data.sort_values('validation_mrr', ascending=False)

    mrr_cols = ['validation_mrr', 'test_mrr']

    if columns is None:
        columns = [x for x in data.columns if
                   (x not in mrr_cols and x != 'hash')]

    cols = data.columns
    cols = mrr_cols + columns

    return data[cols]
helpers.py 文件源码 项目:spotlight 作者: maciejkula 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _load_data(filename, columns=None):

    data = pd.read_json(filename, lines=True)
    data = data.sort_values('validation_mrr', ascending=False)

    mrr_cols = ['validation_mrr', 'test_mrr']

    if columns is None:
        columns = [x for x in data.columns if
                   (x not in mrr_cols and x != 'hash')]

    cols = data.columns
    cols = mrr_cols + columns

    return data[cols]
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[
                       1, 2], columns=['x', 'y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a', 'b'], ['c', 'd']],
                  [[1.5, 2.5], [3.5, 4.5]],
                  [[1, 2.5], [3, 4.5]],
                  [[Timestamp('20130101'), 3.5],
                   [Timestamp('20130102'), 4.5]]]:
            _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        unser = read_json(df.to_json(), numpy=False)
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), numpy=False,
                          convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=['jim', 'joe'])
        df['joe'] = df['joe'].astype('i8')
        self.assertTrue(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)


问题


面经


文章

微信
公众号

扫码关注公众号