python类DataFrame()的实例源码

history_container.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def aggregate_ohlcv_panel(self,
                              fields,
                              ohlcv_panel,
                              items=None,
                              minor_axis=None):
        """
        Convert an OHLCV Panel into a DataFrame by aggregating each field's
        frame into a Series.
        """
        vals = ohlcv_panel
        if isinstance(ohlcv_panel, pd.Panel):
            vals = ohlcv_panel.values
            items = ohlcv_panel.items
            minor_axis = ohlcv_panel.minor_axis

        data = [
            self.frame_to_series(
                field,
                vals[items.get_loc(field)],
                minor_axis
            )
            for field in fields
        ]
        return np.array(data)
tasks.py 文件源码 项目:sensu_drive 作者: ilavender 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def y_sum_by_time(x_arr, y_arr, top=None):
    df = pd.DataFrame({'Timestamp': pd.to_datetime(x_arr, unit='s'), 'Status': y_arr})
    df['Date'] = df['Timestamp'].apply(lambda x: "%d/%d/%d" % (x.day, x.month, x.year))
    df['Hour'] = df['Timestamp'].apply(lambda x: "%d" % (x.hour))
    df['Weekday'] = df['Timestamp'].apply(lambda x: "%s" % (x.weekday_name))

    times = ['Hour', 'Weekday', 'Date']

    result = {}

    for groupby in times:

        df_group = df.groupby(groupby, as_index=False).agg({'Status': np.sum})

        if top != None and top > 0:
            #df_group = df_group.nlargest(top, 'Status').sort(['Status', 'Hour'],ascending=False)
            idx = df_group.nlargest(top, 'Status') > 0
        else:
            idx = df_group['Status'].max() == df_group['Status']

        result[groupby] = {k: g['Status'].replace(np.nan, 'None').tolist() for k,g in df_group[idx].groupby(groupby)}

    return result
manubot.py 文件源码 项目:manubot 作者: greenelab 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def get_citation_df(args, text):
    """
    Generate citation_df and save it to 'citations.tsv'.
    """
    citation_df = pandas.DataFrame(
        {'string': get_citation_strings(text)}
    )
    if args.citation_tags_path.is_file():
        tag_df = pandas.read_table(args.citation_tags_path)
        tag_df['string'] = '@tag:' + tag_df.tag
        for citation in tag_df.citation:
            is_valid_citation_string('@' + citation)
        citation_df = citation_df.merge(tag_df[['string', 'citation']], how='left')
    else:
        citation_df['citation'] = None
        logging.info(f'missing {args.citation_tags_path} file: no citation tags set')
    citation_df.citation.fillna(citation_df.string.astype(str).str.lstrip('@'), inplace=True)
    citation_df['standard_citation'] = citation_df.citation.map(standardize_citation)
    citation_df['citation_id'] = citation_df.standard_citation.map(get_citation_id)
    citation_df = citation_df.sort_values(['standard_citation', 'citation'])
    citation_df.to_csv(args.citations_path, sep='\t', index=False)
    check_collisions(citation_df)
    check_multiple_citation_strings(citation_df)
    return citation_df
run_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
#    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))
run_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].genes)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    return (1 / np.sum(f1))

# Main
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def rhoA(self):
        # rhoA
        rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            weights = pd.DataFrame(self.outer_weights[self.latent[i]])
            weights = weights[(weights.T != 0).any()]
            result = pd.DataFrame.dot(weights.T, weights)
            result_ = pd.DataFrame.dot(weights, weights.T)

            S = self.data_[self.Variables['measurement'][
                self.Variables['latent'] == self.latent[i]]]
            S = pd.DataFrame.dot(S.T, S) / S.shape[0]
            numerador = (
                np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights))
            denominador = (
                (np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights)))
            rhoA_ = ((result)**2) * (numerador / denominador)
            if(np.isnan(rhoA_.values)):
                rhoA[self.latent[i]] = 1
            else:
                rhoA[self.latent[i]] = rhoA_.values

        return rhoA.T
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def xloads(self):
        # Xloadings
        A = self.data_.transpose().values
        B = self.fscores.transpose().values
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        xloads_ = (np.dot(A_mA, B_mB.T) /
                   np.sqrt(np.dot(ssA[:, None], ssB[None])))
        xloads = pd.DataFrame(
            xloads_, index=self.manifests, columns=self.latent)

        return xloads
pylspm.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def alpha(self):
        # Cronbach Alpha
        alpha = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                p_ = len(block)
                correction = np.sqrt((p_ - 1) / p_)
                soma = np.var(np.sum(block, axis=1))
                cor_ = pd.DataFrame.corr(block)

                denominador = soma * correction**2
                numerador = 2 * np.sum(np.tril(cor_) - np.diag(np.diag(cor_)))

                alpha_ = (numerador / denominador) * (p / (p - 1))
                alpha[self.latent[i]] = alpha_
            else:
                alpha[self.latent[i]] = 1

        return alpha.T
boot_mpi.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))
boot.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def do_work_ga(self, item):
        output = pd.DataFrame(self.population[item].genes)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))
boot.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def do_work_pso(self, item):
        output = pd.DataFrame(self.population[item].position)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))
boot.py 文件源码 项目:pylspm 作者: lseman 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def do_work_tabu(self, item):
        output = pd.DataFrame(self.population[item])
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)

        cost = (np.sum(f1))
        print(1 / cost)
        return [self.population[item], cost]
preprocess.py 文件源码 项目:rca-evaluation 作者: sieve-microservices 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def apply(path):
    data = metadata.load(path)
    for service in data["services"]:
        filename = os.path.join(path, service["filename"])
        df = load_timeseries(filename, service)
        print(service)
        df2 = interpolate_missing(df[service["fields"]])
        classes = classify_series(df2)
        preprocessed_series = {}
        for k in classes["other_fields"]:
            # short by one value, because we have to short the other one!
            preprocessed_series[k] = df2[k][1:]
        for k in classes["monotonic_fields"]:
            preprocessed_series[k + "-diff"] = df2[k].diff()[1:]
        newname = service["name"] + "-preprocessed.tsv.gz"
        df3 = pd.DataFrame(preprocessed_series)
        df3.to_csv(os.path.join(path, newname), sep="\t", compression='gzip')
        service["preprocessed_filename"] = newname
        service["preprocessed_fields"] = list(df3.columns)
        service.update(classes)
    metadata.save(path, data)
lag.py 文件源码 项目:rca-evaluation 作者: sieve-microservices 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def centroids(path):
    metadata = load_metadata(path)
    d = {}
    for srv in metadata["services"]:
        name = "%s/%s-cluster-1_1.tsv" % (path, srv["name"])
        df = pd.read_csv(name, sep="\t", index_col='time', parse_dates=True)
        d[srv["name"]] = df.centroid
    df2 = pd.DataFrame(d)
    df2 = df2.fillna(method="bfill", limit=1e9)
    df2 = df2.fillna(method="ffill", limit=1e9)
    fig = df2.plot()
    handles, labels = fig.get_legend_handles_labels()
    fig.grid('on')
    lgd = fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1))
    plt.savefig("graph.png", bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close("all")
format_test.py 文件源码 项目:treecat 作者: posterior 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def test_pd_outer_join():
    dfs = [
        pd.DataFrame({
            'id': [0, 1, 2, 3],
            'a': ['foo', 'bar', 'baz', np.nan],
            'b': ['panda', 'zebra', np.nan, np.nan],
        }),
        pd.DataFrame({
            'id': [1, 2, 3, 4],
            'b': ['mouse', np.nan, 'tiger', 'egret'],
            'c': ['toe', 'finger', 'nose', np.nan],
        }),
    ]
    expected = pd.DataFrame({
        'id': [0, 1, 2, 3, 4],
        'a': ['foo', 'bar', 'baz', np.nan, np.nan],
        'b': ['panda', 'zebra', np.nan, 'tiger', 'egret'],
        'c': [np.nan, 'toe', 'finger', 'nose', np.nan],
    }).set_index('id')
    actual = pd_outer_join(dfs, on='id')
    print(expected)
    print(actual)
    assert expected.equals(actual)
utils.py 文件源码 项目:GeoInfoMiner 作者: jingge326 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def read_image(imagery_path):
    # Read image
    dataset = gdal.Open(imagery_path)
    dsmatrix = dataset.ReadAsArray(xoff=0, yoff=0, xsize=dataset.RasterXSize, ysize=dataset.RasterYSize)

    # Get Geographic meta data
    geo_trans_list = dataset.GetGeoTransform()
    proj_str = dataset.GetProjection()
    num_bands = dataset.RasterCount

    # Adapt to one bands or multi-bands
    if num_bands > 1:
        # Unfold array into pandas DataFrame
        rows = dsmatrix.shape[1]
        cols = dsmatrix.shape[2]
        data_array = dsmatrix[:,0,:]
        for irow in range(1,rows):
            tempmatirx = dsmatrix[:,irow,:]
            data_array = np.hstack((data_array,tempmatirx))
    else:
        # Unfold array into pandas DataFrame
        rows = dsmatrix.shape[0]
        cols = dsmatrix.shape[1]
        data_array = dsmatrix[0,:]
        for irow in range(1,rows):
            tempmatirx = dsmatrix[irow,:]
            data_array = np.hstack((data_array,tempmatirx))

    data_frame = pd.DataFrame(data_array.T)

    return data_frame, rows, cols, geo_trans_list, proj_str, num_bands
data_container.py 文件源码 项目:xpandas 作者: alan-turing-institute 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def __init__(self, *args, **kwargs):
        '''
        The same arguments as for pandas.DataFrame
        https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html

        data argument should be a list of XSeries objects or dict of XSeries objects.
        In dict is passed, key must be a string and it's indicate appropriate column name.
        For example, to create XDataFrame data should looks like
        data = {'col_1': s_1, 'col_2': s_2, ..., 'col_n': s_n} where s_i is a XSeries
        '''
        data = kwargs.get('data')
        if data is None:
            data = args[0]

        data_to_check = []
        if isinstance(data, list):
            data_to_check = data
        elif isinstance(data, dict):
            data_to_check = data.values()

        for d in data_to_check:
            if not isinstance(d, XSeries):
                raise ValueError('All data must be XSeries instances')
        super(XDataFrame, self).__init__(*args, **kwargs)
data_container.py 文件源码 项目:xpandas 作者: alan-turing-institute 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def to_pandas_dataframe(self):
        '''
        Convert self to pandas.DataFrame if all columns are primitive types.
        See more at XSeries.to_pandas_series
        :return:
        '''
        data_types = self.get_data_types()
        is_all_columns_are_primitive = all(
            _is_class_a_primitive(dt)
            for dt in data_types
        )
        if is_all_columns_are_primitive:
            self.__class__ = pd.DataFrame
        else:
            raise ValueError('Unable to cast to pd.DataFrame. {} is not all primitives.'.format(self.data_types))
        return self
algorithm.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def _create_daily_stats(self, perfs):
        # create daily and cumulative stats dataframe
        daily_perfs = []
        # TODO: the loop here could overwrite expected properties
        # of daily_perf. Could potentially raise or log a
        # warning.
        for perf in perfs:
            if 'daily_perf' in perf:

                perf['daily_perf'].update(
                    perf['daily_perf'].pop('recorded_vars')
                )
                perf['daily_perf'].update(perf['cumulative_risk_metrics'])
                daily_perfs.append(perf['daily_perf'])
            else:
                self.risk_report = perf

        daily_dts = [np.datetime64(perf['period_close'], utc=True)
                     for perf in daily_perfs]
        daily_stats = pd.DataFrame(daily_perfs, index=daily_dts)

        return daily_stats
algorithm.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def _pipeline_output(self, pipeline, chunks):
        """
        Internal implementation of `pipeline_output`.
        """
        today = normalize_date(self.get_datetime())
        try:
            data = self._pipeline_cache.unwrap(today)
        except Expired:
            data, valid_until = self._run_pipeline(
                pipeline, today, next(chunks),
            )
            self._pipeline_cache = CachedObject(data, valid_until)

        # Now that we have a cached result, try to return the data for today.
        try:
            return data.loc[today]
        except KeyError:
            # This happens if no assets passed the pipeline screen on a given
            # day.
            return pd.DataFrame(index=[], columns=data.columns)
history_container.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def frame_from_bardata(self, data, algo_dt):
        """
        Create a DataFrame from the given BarData and algo dt.
        """
        data = data._data
        frame_data = np.empty((len(self.fields), len(self.sids))) * np.nan

        for j, sid in enumerate(self.sids):
            sid_data = data.get(sid)
            if not sid_data:
                continue
            if algo_dt != sid_data['dt']:
                continue
            for i, field in enumerate(self.fields):
                frame_data[i, j] = sid_data.get(field, np.nan)

        return pd.DataFrame(
            frame_data,
            index=self.fields.copy(),
            columns=self.sids.copy(),
        )
tracker.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def update_dividends(self, new_dividends):
        """
        Update our dividend frame with new dividends.  @new_dividends should be
        a DataFrame with columns containing at least the entries in
        zipline.protocol.DIVIDEND_FIELDS.
        """

        # Mark each new dividend with a unique integer id.  This ensures that
        # we can differentiate dividends whose date/sid fields are otherwise
        # identical.
        new_dividends['id'] = np.arange(
            self._dividend_count,
            self._dividend_count + len(new_dividends),
        )
        self._dividend_count += len(new_dividends)

        self.dividend_frame = sort_values(pd.concat(
            [self.dividend_frame, new_dividends]
        ), ['pay_date', 'ex_date']).set_index('id', drop=False)
factory.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 44 收藏 0 点赞 0 评论 0
def create_test_panel_ohlc_source(sim_params, env):
    start = sim_params.first_open \
        if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)

    end = sim_params.last_close \
        if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

    index = env.days_in_range(start, end)
    price = np.arange(0, len(index)) + 100
    high = price * 1.05
    low = price * 0.95
    open_ = price + .1 * (price % 2 - .5)
    volume = np.ones(len(index)) * 1000
    arbitrary = np.ones(len(index))

    df = pd.DataFrame({'price': price,
                       'high': high,
                       'low': low,
                       'open': open_,
                       'volume': volume,
                       'arbitrary': arbitrary},
                      index=index)
    panel = pd.Panel.from_dict({0: df})

    return DataPanelSource(panel), panel
data.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 58 收藏 0 点赞 0 评论 0
def add_frame(self, tick, frame, minor_axis=None, items=None):
        """
        """
        if self._pos == self.cap:
            self._roll_data()

        if isinstance(frame, pd.DataFrame):
            minor_axis = frame.columns
            items = frame.index

        if set(minor_axis).difference(set(self.minor_axis)) or \
                set(items).difference(set(self.items)):
            self._update_buffer(frame)

        vals = frame.T.astype(self.dtype)
        self.buffer.loc[:, self._pos, :] = vals
        self.date_buf[self._pos] = tick

        self._pos += 1
mongodb.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def getindexdaily(self,code,start,end):
        total=[]
        startdate = datetime.datetime.strptime(start, "%Y-%m-%d")
        enddate=datetime.datetime.strptime(end, "%Y-%m-%d")
        series={"date":[],"open":[],"close":[],"high":[],"low":[],"volume":[]}

        for stockdaily in self.index[code].find({"date": {"$gte": startdate,"$lt":enddate}}).sort("date"):
            series["date"].append(stockdaily["date"])
            series["open"].append(stockdaily["open"])
            series["close"].append(stockdaily["close"])
            series["high"].append(stockdaily["high"])
            series["low"].append(stockdaily["low"])
            series["volume"].append(stockdaily["volume"])

        totaldata=zip(series['date'],series['open'],series['close'],series['high'],series['low'],series['volume'])
        df = pd.DataFrame(list(totaldata))
        df.index=df.date
        return df
mongodb.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def read_treasure_from_mongodb(self,start,end):

        startdate=start
        enddate=end
        series={"Time Period":[],"1month":[],"3month":[],"6month":[],"1year":[],"2year":[],"3year":[],"5year":[],"7year":[],"10year":[],"20year":[],"30year":[]}
        if type(start) is types.StringType:
            startdate = datetime.datetime.strptime(start, "%Y-%m-%d")
        if type(end) is types.StringType:
            enddate=datetime.datetime.strptime(end, "%Y-%m-%d")
        for treasuredaily in self.treasure['treasure'].find({"Time Period": {"$gte": startdate,"$lt":enddate}}).sort("date"):
            series["Time Period"].append(treasuredaily["Time Period"])
            series["1month"].append(treasuredaily["1month"])
            series["3month"].append(treasuredaily["3month"])
            series["6month"].append(treasuredaily["6month"])
            series["1year"].append(treasuredaily["1year"])
            series["2year"].append(treasuredaily["2year"])
            series["3year"].append(treasuredaily["3year"])
            series["5year"].append(treasuredaily["5year"])
            series["7year"].append(treasuredaily["7year"])
            series["10year"].append(treasuredaily["10year"])
            series["20year"].append(treasuredaily["20year"])
            series["30year"].append(treasuredaily["30year"])
        totaldata=zip(series["1month"],series["3month"],series["6month"],series["1year"],series["2year"],series["3year"],series["5year"],series["7year"],series["10year"],series["20year"],series["30year"])
        df = pd.DataFrame(data=list(totaldata),index=series["Time Period"],columns = ['1month', '3month','6month', '1year', '2year', '3year', '5year', '7year', '10year', '20year', '30year'])
        return df.sort_index().tz_localize('UTC')
synthetic.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self, constants, dates, sids):
        loaders = {}
        for column, const in iteritems(constants):
            frame = DataFrame(
                const,
                index=dates,
                columns=sids,
                dtype=column.dtype,
            )
            loaders[column] = DataFrameLoader(
                column=column,
                baseline=frame,
                adjustments=None,
            )

        self._loaders = loaders
test_assets.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def test_consume_metadata(self):

        # Test dict consumption
        dict_to_consume = {0: {'symbol': 'PLAY'},
                           1: {'symbol': 'MSFT'}}
        self.env.write_data(equities_data=dict_to_consume)
        finder = self.asset_finder_type(self.env.engine)

        equity = finder.retrieve_asset(0)
        self.assertIsInstance(equity, Equity)
        self.assertEqual('PLAY', equity.symbol)

        # Test dataframe consumption
        df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1])
        df['asset_name'][0] = "Dave'N'Busters"
        df['exchange'][0] = "NASDAQ"
        df['asset_name'][1] = "Microsoft"
        df['exchange'][1] = "NYSE"
        self.env = TradingEnvironment(load=noop_load)
        self.env.write_data(equities_df=df)
        finder = self.asset_finder_type(self.env.engine)
        self.assertEqual('NASDAQ', finder.retrieve_asset(0).exchange)
        self.assertEqual('Microsoft', finder.retrieve_asset(1).asset_name)
test_algorithm.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def setUp(self):
        self.env = TradingEnvironment()
        self.days = self.env.trading_days[:5]
        self.panel = pd.Panel({1: pd.DataFrame({
            'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 0],
            'type': [DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.CLOSE_POSITION]},
            index=self.days)
        })
        self.no_close_panel = pd.Panel({1: pd.DataFrame({
            'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 1e9],
            'type': [DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE,
                     DATASOURCE_TYPE.TRADE]},
            index=self.days)
        })
test_munge.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_bfill(self):
        # test ndim=1
        N = 100
        s = pd.Series(np.random.randn(N))
        mask = random.sample(range(N), 10)
        s.iloc[mask] = np.nan

        correct = s.bfill().values
        test = bfill(s.values)
        assert_almost_equal(correct, test)

        # test ndim=2
        df = pd.DataFrame(np.random.randn(N, N))
        df.iloc[mask] = np.nan
        correct = df.bfill().values
        test = bfill(df.values)
        assert_almost_equal(correct, test)


问题


面经


文章

微信
公众号

扫码关注公众号