python类read_json()的实例源码-第2页-面圈网

utils.py 文件源码项目：bigquery-bokeh-dashboard 作者: GoogleCloudPlatform 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def run_query(query, cache_key, expire=3600, dialect='legacy'):
    memcached_client = memcached_discovery.get_client()
    if memcached_client is None:
        return _run(query, dialect=dialect)
    else:
        json = memcached_client.get(cache_key)
        if json is not None:
            df = pd.read_json(json, orient='records')
        else:
            df = _run(query, dialect=dialect)
            memcached_client.set(cache_key, df.to_json(orient='records'), expire=expire)
        return df

build_model.py 文件源码项目：Guess-Genre-By-Lyrics 作者: ormatt 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def main():
    start_time = time.time()
    args = parse_args()
    logger.setLevel(getattr(logging, args.verbosity.upper()))
    logger.info("Started")

    build_constants()

    df = pd.read_json(path_or_buf=DATA_PATH, orient='records', encoding="UTF8")
    logger.debug("Loaded {} rows into df".format(len(df)))

    df = utils.get_data_subset.crop(df, None, None)
    df = utils.get_data_subset.filter_rows_by_string(df,
                                                     [TARGET_COL],
                                                     ['Rock',
                                                      'Hip Hop'])
    df = utils.clean_data.execute_cleaners(df)
    df = utils.normalize_data.normalize_genres(df, TARGET_COL)
    X, y = utils.get_data_subset.get_x_y(df, SAMPLE_COL, TARGET_COL)

    clf = model_pipeline.get_pipeline(SAMPLE_COL)

    utils.persistence.dump(DF_DUMP_NAME, df)
    utils.persistence.dump(CLF_DUMP_NAME, clf)

    if args.train:
        train_and_test.train_and_dump(X, y, clf)
    elif args.test:
        train_and_test.test_using_kfold(X, y, clf)

    logger.info("Finished in {0:.2f} seconds".format(time.time() - start_time))

enigma.py 文件源码项目：cjworkbench 作者: CJWorkbench 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def handle_dotio_url(wf_module, url, split_url, num_rows):
    """
    Processes response for any request to enigma.io. Here, we assume that the API key is provided,
    because, at least at first glance (or two or three) there doesn't seem to be any provisions for
    accessing dataset endpoints sans API key.
    """

    if num_rows > 500:
        wf_module.set_error("You can request a maximum of 500 rows.")
        return

    if "/limit/" not in url:
        if url.endswith('/'):
            url += "limit/{}".format(num_rows)
        else:
            url += "/limit/{}".format(num_rows)

    response = requests.get(url)
    if response.status_code != 200:
        error = json.loads(response.text)
        if "message" in error:
            message = error["message"]
        else:
            message = error["info"]["message"]
            if "additional" in error["info"]:
               message += ": " + error["info"]["additional"]["message"]
        wf_module.set_error("Unable to retrieve data from Enigma. Received {} status, with message {}"
            .format(response.status_code, message))
        return
    try:
        json_text = json.loads(response.text)
        table = pd.read_json(json.dumps(json_text['result']))
        return table
    except Exception as ex: # Generic exceptions suck, but is it the most pragmatic/all-encompassing here?
        wf_module.set_error("Unable to process request: {}".format(str(ex)))
        return

__init__.py 文件源码项目：jupyter-handsontables 作者: techmuch 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def _from_json(self, value, obj=None):
        if value is not None:
            df = pd.read_json(json.dumps(value), orient="split")
        else:
            df = pd.DataFrame()
        return df

__init__.py 文件源码项目：jupyter-handsontables 作者: techmuch 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _from_json(self, value, obj=None):
        if value is not None:
            df = pd.read_json(json.dumps(value), orient="split")
        else:
            df = pd.DataFrame()
        return df

pycore.py 文件源码项目：datanode 作者: jay-johnson 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def pd_json_to_df(self, data_json, sorted_by_key="Date", in_ascending=True):
        import pandas as pd
        new_df  = pd.read_json(data_json).sort_values(by=sorted_by_key, ascending=in_ascending)
        return new_df
    # end of pd_json_to_df

medium_posts_data_reader.py 文件源码项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def read_posts():
    posts = list()
    file_in = open('./post_list.txt', 'r')
    post_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for post_id in post_list:
        if not post_id:
            continue
        if not os.path.exists('./data/Posts/%s.json' % post_id):
            continue
        try:
            file_in = open('./data/Posts/%s.json' % post_id, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            post = dict()
            post['post_id'] = post_id
            post['published_date'] = raw_data['published_date']
            post['recommends'] = raw_data['recommends']
            post['responses'] = raw_data['responses']
            posts.append(post)
        except:
            continue
        num += 1
        print(post_id)
        print(num)
    return pd.read_json(json.dumps(posts))

medium_tags_data_reader.py 文件源码项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def read_posts():
    posts = list()
    file_in = open('./post_list.txt', 'r')
    post_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for post_id in post_list:
        if not post_id:
            continue
        if not os.path.exists('./data/Posts/%s.json' % post_id):
            continue
        try:
            file_in = open('./data/Posts/%s.json' % post_id, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            for tag in raw_data['tags']:
                post = dict()
                post['post_id'] = post_id
                post['published_date'] = raw_data['published_date']
                post['recommends'] = raw_data['recommends']
                post['responses'] = raw_data['responses']
                post['tag'] = tag['name']
                posts.append(post)
                print(post)
        except:
            continue
        num += 1
        print(post_id)
        print(num)
    return pd.read_json(json.dumps(posts))

medium_users_data_reader.py 文件源码项目：Medium-crawler-with-data-analyzer 作者: lifei96 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def read_users():
    users = list()
    file_in = open('./username_list.txt', 'r')
    username_list = str(file_in.read()).split(' ')
    file_in.close()
    num = 0
    for username in username_list:
        if not username:
            continue
        if not os.path.exists('./data/Users/%s.json' % username):
            continue
        try:
            file_in = open('./data/Users/%s.json' % username, 'r')
            raw_data = json.loads(str(file_in.read()))
            file_in.close()
            user = dict()
            user['username'] = username
            user['reg_date'] = datetime.date.fromtimestamp(raw_data['profile']['user']['createdAt']/1000.0).isoformat()
            if not raw_data['profile']['user']['lastPostCreatedAt']:
                raw_data['profile']['user']['lastPostCreatedAt'] = raw_data['profile']['user']['createdAt']
            user['last_post_date'] = datetime.date.fromtimestamp(raw_data['profile']['user']['lastPostCreatedAt']/1000.0).isoformat()
            user['posts_count'] = raw_data['profile']['numberOfPostsPublished']
            user['following_count'] = raw_data['profile']['user']['socialStats']['usersFollowedCount']
            user['followers_count'] = raw_data['profile']['user']['socialStats']['usersFollowedByCount']
            users.append(user)
        except:
            continue
        num += 1
        print(username)
        print(num)
    return pd.read_json(json.dumps(users))

realtimeProCon.py 文件源码项目：PythonTrading 作者: F2011B 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def data_received(self, data):
        updateOZ_event.data=pd.read_json(data.decode())
        updateOZ_event.set()

realtimeProCon.py 文件源码项目：PythonTrading 作者: F2011B 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def handle_OZServer(loop):
    reader, writer = yield from asyncio.open_connection('127.0.0.1', 2222,loop=loop)
    symbolList=list()
    while True:
        if updateOZ_event.is_set():
            print('In Server send')
            updateOZ_event.clear()
            for element in updateOZ_event.data :
                writer.write(('Add_'+ element+'_End').encode())
            writer.write('Send'.encode())

            outputbuffer = StringIO()
            condition = True
            while condition:
                data =  yield from reader.read(1024)
                message=data.decode()
                if message.find('!ENDMSG!') != -1:
                    message = message.replace('!ENDMSG!', '')
                    condition = False
                    print('End found')

                outputbuffer.write(message)

            outputbuffer.seek(0)
            DF=pd.read_json(outputbuffer)
            #print(DF)
            yield from updateOZ_queue.put(DF)
        yield None

    writer.close()
    reader.close()

helpers.py 文件源码项目：spotlight 作者: maciejkula 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _load_data(filename, columns=None):

    data = pd.read_json(filename, lines=True)
    data = data.sort_values('validation_mrr', ascending=False)

    mrr_cols = ['validation_mrr', 'test_mrr']

    if columns is None:
        columns = [x for x in data.columns if
                   (x not in mrr_cols and x != 'hash')]

    cols = data.columns
    cols = mrr_cols + columns

    return data[cols]

helpers.py 文件源码项目：spotlight 作者: maciejkula 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _load_data(filename, columns=None):

    data = pd.read_json(filename, lines=True)
    data = data.sort_values('validation_mrr', ascending=False)

    mrr_cols = ['validation_mrr', 'test_mrr']

    if columns is None:
        columns = [x for x in data.columns if
                   (x not in mrr_cols and x != 'hash')]

    cols = data.columns
    cols = mrr_cols + columns

    return data[cols]

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[
                       1, 2], columns=['x', 'y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a', 'b'], ['c', 'd']],
                  [[1.5, 2.5], [3.5, 4.5]],
                  [[1, 2.5], [3, 4.5]],
                  [[Timestamp('20130101'), 3.5],
                   [Timestamp('20130102'), 4.5]]]:
            _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        unser = read_json(df.to_json(), numpy=False)
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), numpy=False,
                          convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_frame_empty_mixedtype(self):
        # mixed type
        df = DataFrame(columns=['jim', 'joe'])
        df['joe'] = df['joe'].astype('i8')
        self.assertTrue(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                           check_index_type=False)

test_pandas.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)