python类read_json()的实例源码

03-evaluate.py 文件源码 项目:crema 作者: bmcfee 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def evaluate(input_path, n_jobs):

    aud, ann = zip(*crema.utils.get_ann_audio(input_path))

    test_idx = set(pd.read_json('index_test.json')['id'])

    # drop anything not in the test set
    ann = [ann_i for ann_i in ann if crema.utils.base(ann_i) in test_idx]
    aud = [aud_i for aud_i in aud if crema.utils.base(aud_i) in test_idx]

    stream = tqdm(zip(ann, aud), desc='Evaluating test set', total=len(ann))

    results = Parallel(n_jobs=n_jobs)(delayed(track_eval)(ann_i, aud_i)
                                      for ann_i, aud_i in stream)
    df = pd.DataFrame.from_dict(dict(results), orient='index')

    print('Results')
    print('-------')
    print(df.describe())

    df.to_json(os.path.join(OUTPUT_PATH, 'test_scores.json'))
fastq_stats.py 文件源码 项目:sequana 作者: sequana 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def get_stats(self):
        import pandas as pd
        filenames, mode = self._get_files("*.json")
        if mode == "pe":
            df1 = pd.read_json(filenames[0])
            df2 = pd.read_json(filenames[1])
            df  = pd.concat([df1, df2])
            # Should have been sorted !
            df.index = ['R1', 'R2']
        else:
            df = pd.read_json(filenames[0])
            df.index = ['R1']
        df = df[["A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content",
                "average read length", "total bases"]]
        for this in "ACGTN":
            df[this] /= df["total bases"] 
            df[this] *= 100
        return df
poloniex.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def fetch_raw_metadata_frame(self, api_key, page_number):
        if page_number > 1:
            return pd.DataFrame([])

        raw = pd.read_json(
            self._format_metadata_url(
              api_key,
              page_number,
            ),
            orient='index',
        )

        raw = raw.sort_index().reset_index()
        raw.rename(
            columns={'index': 'symbol'},
            inplace=True,
        )

        raw = raw[raw['isFrozen'] == 0]
        return raw
medium_topstories_analyzer.py 文件源码 项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def read_stories_without_tags():
    stories = list()
    current_date = START_DATE
    while current_date <= END_DATE:
        file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
        raw_data = json.loads(str(file_in.read()))
        file_in.close()
        for raw_story in raw_data['stories']:
            story = dict()
            story['top_date'] = current_date.isoformat()
            story['story_id'] = raw_story['story_id']
            story['author'] = raw_story['author']
            story['published_date'] = raw_story['published_date']
            story['recommends'] = raw_story['recommends']
            story['responses'] = raw_story['responses']
            story['tags_count'] = len(raw_story['tags'])
            stories.append(story)
        print(current_date.isoformat())
        current_date = current_date + datetime.timedelta(days=1)
    return pd.read_json(json.dumps(stories))
medium_topstories_analyzer.py 文件源码 项目:Medium-crawler-with-data-analyzer 作者: lifei96 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def read_stories_by_tags():
    tags = list()
    current_date = START_DATE
    while current_date <= END_DATE:
        file_in = open("./TopStories/%s.json" % current_date.isoformat(), 'r')
        raw_data = json.loads(str(file_in.read()))
        file_in.close()
        for raw_story in raw_data['stories']:
            for raw_tag in raw_story['tags']:
                tag = dict()
                tag['top_date'] = current_date.isoformat()
                tag['story_id'] = raw_story['story_id']
                tag['author'] = raw_story['author']
                tag['published_date'] = raw_story['published_date']
                tag['recommends'] = raw_story['recommends']
                tag['responses'] = raw_story['responses']
                tag['name'] = raw_tag['name']
                tag['post_count'] = raw_tag['postCount']
                tag['follower_count'] = raw_tag['metadata']['followerCount']
                tags.append(tag)
        print(current_date.isoformat())
        current_date = current_date + datetime.timedelta(days=1)
    return pd.read_json(json.dumps(tags))
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def test_frame_from_json_bad_data(self):
        self.assertRaises(ValueError, read_json, StringIO('{"key":b:a:d}'))

        # too few indices
        json = StringIO('{"columns":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        self.assertRaises(ValueError, read_json, json,
                          orient="split")

        # too many columns
        json = StringIO('{"columns":["A","B","C"],'
                        '"index":["1","2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        self.assertRaises(AssertionError, read_json, json,
                          orient="split")

        # bad key
        json = StringIO('{"badkey":["A","B"],'
                        '"index":["2","3"],'
                        '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
        with tm.assertRaisesRegexp(ValueError, r"unexpected key\(s\): badkey"):
            read_json(json, orient="split")
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_v12_compat(self):
        df = DataFrame(
            [[1.56808523, 0.65727391, 1.81021139, -0.17251653],
             [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
             [1.51493992, 0.11805825, 1.629455, -1.31506612],
             [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
             [0.05951614, -2.69652057, 1.28163262, 0.34703478]],
            columns=['A', 'B', 'C', 'D'],
            index=pd.date_range('2000-01-03', '2000-01-07'))
        df['date'] = pd.Timestamp('19920106 18:21:32.12')
        df.ix[3, 'date'] = pd.Timestamp('20130101')
        df['modified'] = df['date']
        df.ix[1, 'modified'] = pd.NaT

        v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(['modified'], axis=1)
        v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test_date_format_frame(self):
        df = self.tsframe.copy()

        def test_w_date(date, date_unit=None):
            df['date'] = Timestamp(date)
            df.ix[1, 'date'] = pd.NaT
            df.ix[5, 'date'] = pd.NaT
            if date_unit:
                json = df.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = df.to_json(date_format='iso')
            result = read_json(json)
            assert_frame_equal(result, df)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        self.assertRaises(ValueError, df.to_json, date_format='iso',
                          date_unit='foo')
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_date_format_series(self):
        def test_w_date(date, date_unit=None):
            ts = Series(Timestamp(date), index=self.ts.index)
            ts.ix[1] = pd.NaT
            ts.ix[5] = pd.NaT
            if date_unit:
                json = ts.to_json(date_format='iso', date_unit=date_unit)
            else:
                json = ts.to_json(date_format='iso')
            result = read_json(json, typ='series')
            assert_series_equal(result, ts)

        test_w_date('20130101 20:43:42.123')
        test_w_date('20130101 20:43:42', date_unit='s')
        test_w_date('20130101 20:43:42.123', date_unit='ms')
        test_w_date('20130101 20:43:42.123456', date_unit='us')
        test_w_date('20130101 20:43:42.123456789', date_unit='ns')

        ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
        self.assertRaises(ValueError, ts.to_json, date_format='iso',
                          date_unit='foo')
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_date_unit(self):
        df = self.tsframe.copy()
        df['date'] = Timestamp('20130101 20:43:42')
        df.ix[1, 'date'] = Timestamp('19710101 20:43:42')
        df.ix[2, 'date'] = Timestamp('21460101 20:43:42')
        df.ix[4, 'date'] = pd.NaT

        for unit in ('s', 'ms', 'us', 'ns'):
            json = df.to_json(date_format='epoch', date_unit=unit)

            # force date unit
            result = read_json(json, date_unit=unit)
            assert_frame_equal(result, df)

            # detect date unit
            result = read_json(json, date_unit=None)
            assert_frame_equal(result, df)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_weird_nested_json(self):
        # this used to core dump the parser
        s = r'''{
        "status": "success",
        "data": {
        "posts": [
            {
            "id": 1,
            "title": "A blog post",
            "body": "Some useful content"
            },
            {
            "id": 2,
            "title": "Another blog post",
            "body": "More content"
            }
           ]
          }
        }'''

        read_json(s)
test_pandas.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_misc_example(self):

        # parsing unordered input fails
        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])

        error_msg = """DataFrame\\.index are different

DataFrame\\.index values are different \\(100\\.0 %\\)
\\[left\\]:  Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
        with tm.assertRaisesRegexp(AssertionError, error_msg):
            assert_frame_equal(result, expected, check_index_type=False)

        result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        assert_frame_equal(result, expected)
backend_live_runner.py 文件源码 项目:autoxd 作者: nessessary 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _getUserStrategy(self, downloadStrategyInterval=60):
    """??????????????
    downloadStrategyInterval: int default=60 ?
    return: df"""
    k = "SignForWebUser_preLoadTime"
    preLoadTime = myredis.get_obj(k)
    if preLoadTime is None:
        preLoadTime = datetime.datetime(2015, 10, 19, 15, 33, 47, 53000)    #????????
    #????
    if (agl.curTime() - preLoadTime).total_seconds() > downloadStrategyInterval:
        url = "http://stocksign.sinaapp.com/query?cmd=query_strategy"
        result = Http().get(url)
        df_source = pd.read_json(result)
        df_source.columns = ['id', 'user_id', 'title', 'code']
        preLoadTime = agl.curTime()
        myredis.set_obj(k, preLoadTime)
        myredis.set_obj('mysource', df_source)
    else:
        df_source = myredis.get_obj('mysource')
        if df_source is None:
        df_source = pd.DataFrame([])
    return df_source
processingData.py 文件源码 项目:market-predictor 作者: bsmitty5000 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def read_scraped_jason(filename):
    df = pd.read_json(filename)

    for column in df.columns:
        df[column] = df[column].apply(unlist)
    # gets only first 10 characters of date: year/month/day
    df['date'] = df['date'].apply(lambda x: x[:10])
    df['date'] = pd.to_datetime(df['date'])

    # if any removes duplicate posts
    df = df.drop_duplicates(subset = ['keywords'])
    # sorts dataframe by post date
    df = df.sort_values(by='date')

    df = df.drop('body', 1)
    df = df.drop('title', 1)

    df['keywords'].replace('', np.nan, inplace=True)
    df = df.dropna()

    return df
indicator_acquire.py 文件源码 项目:StockPredictor 作者: wallsbreaker 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def extract_features_from_json():
    input_path = '../../data/20_5_from_2008/'
    df_list = []
    for json_file in os.listdir(input_path):
        train_data = pd.read_json(os.path.join(input_path, json_file), orient='columns')
        train_data.dropna(inplace=True)
        train_data.sort_index(ascending=False, inplace=True)
        train_data.index = range(len(train_data))
        if len(train_data) > 0:
            data_norm(train_data)

        values = train_data['real_up_after_240'].tolist()
        codes = train_data['code'].tolist()
        train_data.drop(['datetime', 'code', 'real_up_after_240'], axis=1, inplace=True)
        features = train_data.values.tolist()

        with open('../../data/20_5_from_2008/data', 'a') as f:
            for ix in xrange(len(codes)):
                if np.inf not in features[ix] and -np.inf not in features[ix]:
                    f.write('%s;0 %s;1 %f\n' % (codes[ix][2:], ' '.join([str(x) for x in features[ix]]), values[ix]))
utils.py 文件源码 项目:fitbit-analyzer 作者: 5agado 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def loadStepsData(dumpDir):
    """
    Load steps data from dumping done using the official Fitbit API.
    Check README file for further info on the scraping process and saved format
    :param dumpDir: the folder where the date has been dumped
    :return: a list of dataframes, one for each day, containing the intraday steps data
    """
    def loadFun(jsonData):
        intradayData = jsonData['activities-steps-intraday']['dataset']
        date = jsonData['activities-steps'][0]['dateTime']
        if not intradayData:
            return None
        df = pd.read_json(json.dumps(intradayData))
        df['datetime'] = pd.to_datetime(date + ' ' + df['time'])
        df.drop('time', inplace=True, axis=1)
        return df

    return _loadData(dumpDir, 'steps', loadFun)
bitx.py 文件源码 项目:bitrader 作者: jr-minnaar 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_orders_frame(self, state=None, kind='auth'):
        q = self.get_orders(state, kind)
        tj = json.dumps(q['orders'])
        df = pd.read_json(tj, convert_dates=['creation_timestamp', 'expiration_timestamp'])
        df.index = df.creation_timestamp
        return df
backend.py 文件源码 项目:berlin-devfest-2016-backend 作者: giansegato 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def processData(data):
    df = pd.DataFrame.transpose(pd.read_json(json.dumps(data)))
    df = df.dropna(subset = [key for key in df.keys() if "x_" in key])
    df = df[pd.notnull(df['y_observed'])]

    X = df[[key for key in df.keys() if "x_" in key]].values
    y = df["y_observed"].values

    return X, y

# 5th: initial model
s3_data_store.py 文件源码 项目:fabric8-analytics-license-analysis 作者: fabric8-analytics 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def read_json_file_into_pandas_df(self, filename, index_col=False):
        json_string = self.read_json_file(filename=filename)
        return pd.read_json(json_string)
bgdata.py 文件源码 项目:OpenAPS 作者: medicinexlab 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def get_bg_dataframe(id_str):
    """
    Function to convert the json file to a pandas dataframe.
    It takes in the string of the id and looks for the devicestatus.json file.
    All data should be stored such that in the directory where main.py lies,
    there is a directory called "data". Inside this directory,
    there is another directory with just the ID Number. Inside this data folder lies the
    devicestatus.json file, which contains the data. If the file is not in the path given,
    it raises an IOError. The path should look like the following example:

    ./data/12345678/devicestatus.json

    Input:      id_str                          ID number as a string
    Output:     bg_df                           Pandas dataframe of all of the data from ./data/[id_str]/devicestatus.json
    Usage:      bg_df = get_bg_dataframe("12345678")
    """

    try:
        file_location = "./data/" + id_str + "/devicestatus.json"
        bg_df = pd.read_json(file_location) #Opens the data file and reads in the data into a dataFrame
    except:
        raise IOError(file_location + " is not a valid file.")

    print
    print("{} total entries.".format(len(bg_df)))

    return bg_df


#Function to find the indices for the given start and end date strings
find_files.py 文件源码 项目:geekbook 作者: mmagnus 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def file_search(filename, verbose):
    """Search for filename. Returns dirname of the filename's path, and the full path.

    170107 add cache. If the db is not found, create an empty pandas df 
    and populate this df with append later. If the filename is not in the db
    run g/locate. Then, save the found path to the db (using pandas, via df, to json)"""

    # cache
    if os.path.isfile(JSON_DB):
        df = pd.read_json(JSON_DB, orient='records')
        #filename = 'x.pse'
        pathdf = df[df['fn'] == filename]['path']
        if not pathdf.empty:
            path = pathdf.to_string(index=False)
            logger.info('find file [from the db]:' + filename)
            return os.path.dirname(path), path
    else:
        df = pd.DataFrame()

    # if filename is not found in the db
    logger.info('find file:' + filename)

    if platform.system() == "Linux":
        out = commands.getoutput('locate ' + filename)
    if platform.system() == "Darwin":
        out = commands.getoutput('glocate ' + filename)
    first_hit = out.split('\n')[0]
    logger.info('# of hits ' + str(len(out.split('\n'))) + " " + out.replace('\n',', '))
    if not first_hit:
        logger.info('not found')
    else:
        logger.info('hit ' + first_hit)

    # update cache
    dffile = pd.DataFrame([[filename, first_hit],], columns=['fn', 'path'])
    df = df.append(dffile, ignore_index=True)
    # save to json
    df.to_json(JSON_DB, orient='records')
    ##
    return os.path.dirname(first_hit), first_hit
future.py 文件源码 项目:slaveo 作者: lamter 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def get_holiday_json(self):
        """
        ???????
        :return:
        """
        path = os.path.join(pwd, 'holiday.json')
        return pd.read_json(path, typ="series").sort_index()
pycore.py 文件源码 项目:sci-pype 作者: jay-johnson 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def pd_json_to_df(self, data_json, sorted_by_key="Date", in_ascending=True):
        import pandas as pd
        new_df  = pd.read_json(data_json).sort_values(by=sorted_by_key, ascending=in_ascending)
        return new_df
    # end of pd_json_to_df
preprocessing.py 文件源码 项目:visualizations 作者: ContentMine 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_raw(filename):
    with open(filename) as infile:
        raw = infile.read()
        # the next line needs rewriting as soon as the zenodo-dump conforms to 'records'-format
        # [{k:v}, {k:v},...]
        rawfacts = pd.read_json('[%s]' % ','.join(raw.splitlines()), orient='records')
    return rawfacts


### functions for ingesting from CProject



### functions for preprocessing
feature_engineering_func.py 文件源码 项目:quickdraw_prediction_model 作者: keisukeirie 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def load_json(filename):
    '''
    Function:
        - opens json file and store information in a pandas dataframe
        - also prints out aggregated df with counts of picture by countrycode
    Input:
        1. filename/path ex: ./data/filename.json
    Output:
        1. new dataframe containing json info
    '''
    df = pd.read_json(filename, lines=True)
    test = df.groupby(df['countrycode']).count()
    print test.sort(columns='drawing',ascending=False).head(15)
    return df
utils.py 文件源码 项目:kaggle-cooking 作者: fpoli 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def read_data(project_path):
    print "Reading data..."
    train = pd.read_json(project_path + "/data/train.json")
    test = pd.read_json(project_path + "/data/test.json")

    print "Train size:", len(train.id)
    print "Test size:", len(test.id)

    return train, test
local_filesystem.py 文件源码 项目:fabric8-analytics-stack-analysis 作者: fabric8-analytics 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def read_json_file_into_pandas_df(self, filename):
        return pd.read_json(os.path.join(self.src_dir, filename), dtype=np.int8)
s3_data_store.py 文件源码 项目:fabric8-analytics-stack-analysis 作者: fabric8-analytics 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def read_json_file_into_pandas_df(self, filename):
        json_string = self.read_json_file(filename=filename)
        return pd.read_json(json_string, dtype=np.int8)
crimeHeatMap.py 文件源码 项目:IntroPython2016 作者: UWPCE-PythonCert 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def apiResults(locationInfo):
    query = ("https://data.seattle.gov/resource/pu5n-trf4.json?$limit={}&$where=within_circle(incident_location,{},{},{})"
        .format(locationInfo['limit'],
                locationInfo['latitude'],
                locationInfo['longitude'],
                locationInfo['radius']))
    return pd.read_json(query)
poloniex.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def fetch_raw_symbol_frame(self,
                               api_key,
                               symbol,
                               calendar,
                               start_date,
                               end_date,
                               frequency):

        # TODO: replace this with direct exchange call
        # The end date and frequency should be used to
        # calculate the number of bars
        if(frequency == 'minute'):
            pc = PoloniexCurator()
            raw = pc.onemin_to_dataframe(symbol, start_date, end_date)

        else:
            raw = pd.read_json(
                self._format_data_url(
                    api_key,
                    symbol,
                    start_date,
                    end_date,
                    frequency,
                ),
                orient='records',
            )
            raw.set_index('date', inplace=True)

        # BcolzDailyBarReader introduces a 1/1000 factor in the way
        # pricing is stored on disk, which we compensate here to get
        # the right pricing amounts
        # ref: data/us_equity_pricing.py
        scale = 1
        raw.loc[:, 'open'] /= scale
        raw.loc[:, 'high'] /= scale
        raw.loc[:, 'low'] /= scale
        raw.loc[:, 'close'] /= scale
        raw.loc[:, 'volume'] *= scale

        return raw


问题


面经


文章

微信
公众号

扫码关注公众号