python类read_sql()的实例源码-面圈网

ShareData.py 文件源码项目：SharesData 作者: xjkj123 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def __init__(self,code,sql=False):
        if int(code[0]) is 0 or 3:
            name = "sz" + code
        if int(code[0]) is 6:
            name = "sh" + code

        if sql:
            self.sharedf=ShareClass().GetDayData(code)
        else:
            conn = create_engine(
                'mysql://' + config.user + ':' + config.password + '@' + config.ip + '/daydata?charset=utf8')

            x = 'select * from ' + name + '_tencent;'  # sql???
            self.sharedf = pandas.read_sql(x, con=conn)


        self.name=name

DensityData.py 文件源码项目：sanergy-public 作者: dssg 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def loadData(self):
        # Load the toilet collection data to pandas
        collects = pd.read_sql('SELECT * FROM premodeling.toiletcollection', self.conn, coerce_float=True, params=None)
        pprint.pprint(collects.keys())

        collects = collects[['ToiletID','ToiletExID','Collection_Date','Area','Feces_kg_day','year','month']]
        pprint.pprint(collects.keys())

        # Load the density data to pandas
        density = pd.read_sql('SELECT * FROM premodeling.toiletdensity', self.conn, coerce_float=True, params=None)
        pprint.pprint(density.keys())

        # Return the data
        self.collects = collects
        self.density = density
        return(collects, density)

distance_from_best.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def as_dataframe(self, model_group_ids):
        """Return model-group-id subset of table as dataframe

        Args:
            model_group_ids (list) the desired model group ids

        Returns: (pandas.DataFrame) The data from the table corresponding
            to those model group ids
        """
        return pd.read_sql(
            'select * from {} where model_group_id in ({})'.format(
                self.distance_table,
                str_in_sql(model_group_ids)
            ),
            self.db_engine
        )

asset.py 文件源码项目：py-investment 作者: kprestel 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def simple_moving_average(self, period=50, column='adj_close'):
        table_name = 'sma_test'
        # stmt = text('SELECT * FROM sma_test WHERE asset_id = :asset_id')
        # stmt.bindparams(asset_id=self.id)
        # try:
            # TODO: parse dates
            # df = pd.read_sql(sql, con=conn, params={
            #     'asset_id': self.id
            # })
        # except OperationalError:
        #     logger.exception('error in query')
        sma_ts = pd.Series(
                self.ohlcv[column].rolling(center=False, window=period,
                                           min_periods=period - 1).mean()).dropna()
        print('creating')
        print(sma_ts)
        return sma_ts
            # return sma_ts
        # else:
        #     print('found')
        #     print(df)
        #     return df

ProcessCollectionsData.py 文件源码项目：sanergy-public 作者: dssg 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def standardize_variable_names(table, RULES):
    """
    Script to standardize the variable names in the tables
    PARAM DataFrame table: A table returned from pd.read_sql
    PARAM list[tuples]: A list of tuples with string replacements, i.e., (string, replacement)
    RET table
    """
    variableNames = list(table.columns.values)
    standardizedNames = {} # Pandas renames columns with a dictionary object
    for v in variableNames:
        f = v
        for r in RULES:
            f = re.sub(r[0],r[1],f)
        print '%s to %s' %(v,f)
        standardizedNames[v] = f
    table = table.rename(columns=standardizedNames)
    return table

redirects_candidates.py 文件源码项目：wikilinks 作者: trovdimi 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def export_data_unresolved():

    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    db_work_view = db.get_work_view()
    connection = db_work_view._db_connection


    df_clickstream = pn.read_csv('/home/ddimitrov/data/enwiki201608_unresolved_redirects/2016_08_clickstream_unresolved.tsv', sep='\t', error_bad_lines=False)

    df_clickstream['prev']=df_clickstream['prev'].str.replace('_', ' ')
    df_clickstream['curr']=df_clickstream['curr'].str.replace('_', ' ')
    df_clickstream['curr_unresolved']=df_clickstream['curr_unresolved'].str.replace('_', ' ')


    df_redirects_candidates = pn.read_sql('select * from redirects_candidates_sample', connection)


    sample_unresoleved = pn.merge(df_redirects_candidates, df_clickstream, how='left', left_on= ['source_article_name','target_article_name'], right_on=['prev', 'curr_unresolved'])

    sample_unresoleved['n'].fillna(0, inplace=True)
    sample_unresoleved.to_csv('/home/ddimitrov/data/enwiki201608_unresolved_redirects/data_unresolved.tsv', sep='\t',encoding="utf-8")

weighted_pagerank.py 文件源码项目：wikilinks 作者: trovdimi 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def pickle_correlations_zeros_january():
    db = MySQLDatabase(DATABASE_HOST, DATABASE_USER, DATABASE_PASSWORD, DATABASE_NAME)
    conn = db._create_connection()

    print 'read'
    df = pd.read_sql('select source_article_id, target_article_id from link_features', conn)
    print 'loaded links'
    df2 = pd.read_sql('select prev_id, curr_id, counts from clickstream_derived_en_201501  where link_type_derived= "internal-link";',  conn)
    print 'loaded counts'
    result = pd.merge(df, df2, how='left', left_on = ['source_article_id', 'target_article_id'], right_on = ['prev_id', 'curr_id'])
    print 'merged counts'
    print result
    article_counts = result.groupby(by=["target_article_id"])['counts'].sum().reset_index()
    article_counts['counts'].fillna(0.0, inplace=True)
    print article_counts
    print 'write to file'
    article_counts[["target_article_id","counts"]].to_csv(TMP+'january_article_counts.tsv', sep='\t', index=False)

query.py 文件源码项目：pyuniprot 作者: cebel 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _limit_and_df(self, query, limit, as_df=False):
        """adds a limit (limit==None := no limit) to any query and allow a return as pandas.DataFrame

        :param bool as_df: if is set to True results return as pandas.DataFrame
        :param `sqlalchemy.orm.query.Query` query: SQL Alchemy query 
        :param int or tuple[int] limit: maximum number of results
        :return: query result of pyuniprot.manager.models.XY objects
        """
        if limit:

            if isinstance(limit, int):
                query = query.limit(limit)

            if isinstance(limit, Iterable) and len(limit) == 2 and [int, int] == [type(x) for x in limit]:
                page, page_size = limit
                query = query.limit(page_size)
                query = query.offset(page * page_size)

        if as_df:
            results = read_sql(query.statement, self.engine)

        else:
            results = query.all()

        return results

batch_evaluator.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_metrics(self):
        """Pull requested evaluation metrics for each model meeting the criteria
        specified in __init__.

        :returns: evaluation metrics for all models in the batches
        :rtype: pandas.DataFrame
        """
        metrics_query = """
            SELECT DISTINCT
                metrics.metric || '_' || metrics.parameter as p_metric,
                metrics.value,
                metrics.unique_timestamp
            FROM
                output.metrics as metrics
            WHERE
                metric || '_' || parameter in {0} AND
                unique_timestamp in {1};
        """.format(tuple(self.most_important_metrics),
                   tuple(self.models.index.astype(str).values))
        return(pd.read_sql(metrics_query, self.engine))

model_evaluator.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def get_features(self):
        """ Pull features from the database.

        :returns: features
        :rtype: pandas DataFrame
        """
        query = """
            SELECT
                *
            FROM
                {0}
            WHERE
                {1} is not null;
        """.format(self.model['test_table_name'][0],
                   self.model['labelling'].astype(str)[0])
        return(pd.read_sql(query, self.engine))

model_evaluator.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def get_mh_appt_dates(self, people):
        """ Get the dates of mental health appointments for people labeled 
        correctly by the model.

        :param people: list of ids to search for
        :type people: tuple
        :returns: mental health appointment data
        :rtype: pandas DataFrame
        """
        mh_services_query = """
            select
                dedupe_id,
                svc_date
            from
                clean.jocomentalhealthservices
            where
                svc_date < '{0}' and
                dedupe_id in {1}
        """.format(self.features['labeling_end_date'].unique()[0], people)
        return(pd.read_sql(mh_services_query, self.engine).set_index('dedupe_id'))

eval_models.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def get_y_values(self):
        """ Return a dataframe containing the true classes and predicted
        probabilities for each case, sorted by descending probability.

        :returns: dataframe of true classes and predicted probabilities
        :rtype: pandas DataFrame
        """
        y_query = """
                SELECT
                    label as y_true,
                    prediction_prob as scores
                FROM
                    output.predictions
                WHERE
                    unique_timestamp = '{0}';
            """.format(self.model_id)
        y_values = pd.read_sql(y_query, con).sort_values(by = 'scores',
                                                         ascending = False)

        return y_values

database_checker.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def tables_exist(fake_today,prediction_window,feature_timestamp):
    table_list = pd.read_sql('''
        SELECT
            table_name
        FROM
            information_schema.tables
        WHERE
            table_schema = 'feature_tables' AND
            table_name LIKE 'features_train_{}_{}_at_{}'
        ORDER BY
            table_name desc
        LIMIT
            1;
    '''.format(fake_today, prediction_window, feature_timestamp), con)

    if len(table_list) > 0:
        return True
    else:
        return False

feature_model_grabber.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def load_table(self, train_or_test, feature_timestamp):
        # get feature table name
        if feature_timestamp == '%':
            feature_timestamp = pd.read_sql('''
                SELECT
                    split_part(table_name, '_at_', 2)
                FROM
                    information_schema.tables
                WHERE
                    table_schema = 'feature_tables'
                ORDER BY
                    1 desc
                LIMIT
                    1;
            ''', con).iat[0,0]
        feature_table_name = ('{}."features_{}_{}_{}_at_{}"').format(config_db['feature_schema'],
                train_or_test, self.fake_today, self.prediction_window,
                feature_timestamp)

        # load table
        print 'loading {}'.format(feature_table_name)
        query = ('SELECT * FROM {}').format(feature_table_name)
        full_feature_table = pd.read_sql(query, con = con)
        return full_feature_table, feature_table_name

io.py 文件源码项目：ego.powerflow 作者: openego 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def by_scenario(self, name):
        """
        """

        ormclass = self._mapped[name]
        query = self.session.query(ormclass).filter(
            ormclass.scn_name == self.scn_name)

        if self.version:
            query = query.filter(ormclass.version == self.version)

        # TODO: Better handled in db
        if name == 'Transformer':
            name = 'Trafo'

        df = pd.read_sql(query.statement,
                         self.session.bind,
                         index_col=name.lower() + '_id')

        if 'source' in df:
            df.source = df.source.map(self.id_to_source())

        return df

corpus.py 文件源码项目：coquery 作者: gkunter 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def is_part_of_speech(self, pos):
        if hasattr(self.resource, QUERY_ITEM_POS):
            current_token = tokens.COCAToken(pos, replace=False)
            rc_feature = getattr(self.resource, QUERY_ITEM_POS)
            _, table, _ = self.resource.split_resource_feature(rc_feature)
            S = "SELECT {} FROM {} WHERE {} {} '{}' LIMIT 1".format(
                getattr(self.resource, "{}_id".format(table)),
                getattr(self.resource, "{}_table".format(table)),
                getattr(self.resource, rc_feature),
                self.resource.get_operator(current_token),
                pos)
            engine = self.resource.get_engine()
            df = pd.read_sql(S.replace("%", "%%"), engine)
            engine.dispose()
            return len(df.index) > 0
        else:
            return False

uniqueviewer.py 文件源码项目：coquery 作者: gkunter 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_unique(self):
        if not self.db_name:
            return
        sql = sqlhelper.sql_url(options.cfg.current_server, self.db_name)
        if self._uniques:
            S = "SELECT DISTINCT {} FROM {}".format(self.column, self.table)
            self.df = pd.read_sql(S, sqlalchemy.create_engine(sql))
            self.df = self.df.sort_values(self.column, ascending=True)
        else:
            S = "SELECT {} FROM {}".format(self.column, self.table)
            self.df = pd.read_sql(S, sqlalchemy.create_engine(sql))
        items = (self.df[self.column].apply(utf8)
                                     .apply(QtWidgets.QTableWidgetItem))
        self.ui.tableWidget.setRowCount(len(items))
        self.ui.tableWidget.setColumnCount(1)
        for row, item in enumerate(items):
            self.ui.tableWidget.setItem(row, 0, item)

sql_utils.py 文件源码项目：ModelFlow 作者: yuezPrincetechs 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def read_sql(self,sql,chunksize=None,return_generator=True):
        '''
        ?????????
        :param sql: sql???
        :param chunksize: int?????????????????????????
        :param return_generator: bool?????????False????????????????chunksize?int?????
        :return:
        '''
        if chunksize is not None and chunksize<=0:
            chunksize=None
        result=pd.read_sql(sql,self.engine,chunksize=chunksize)
        if return_generator:
            return result
        else:
            if chunksize is None:
                return result
            else:
                result=list(result)
                if len(result)==0:
                    return pd.DataFrame()
                else:
                    result=pd.concat(result,axis=0)
                    return result

non_seq_formatting.py 文件源码项目：smiles-neural-network 作者: PMitura 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def getData(con):

    query = 'SELECT {} FROM {}'.format(
        ','.join(['"{}"'.format(x) for x in DOWNLOAD_COLS]),
        DOWNLOAD_TABLE)

    if WHERE:
        query += ' WHERE {}'.format(WHERE)

    if LIMIT:
        query += ' LIMIT {}'.format(LIMIT)

    print(query)

    df = pd.read_sql(
        sql = query,
        con = con)

    return df

rdkit_db.py 文件源码项目：smiles-neural-network 作者: PMitura 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def getData(con, lo):
    query = DOWNLOAD_QUERY.format(
        ','.join(['"{}"'.format(x) for x in DOWNLOAD_COLS]),
        DOWNLOAD_TABLE,
        LELIMIT,
        lo)
    print(query)

    df = pd.read_sql(
        sql = query,
        con = con)

    df.columns = ['canonical_smiles']

    # fingerDf = computeDescriptors(df)
    fingerDf = computeMACCS(df)

    mergedDf = pd.concat([df, fingerDf], axis=1)

    return mergedDf

Database.py 文件源码项目：Trending-Places-in-OpenStreetMap 作者: geometalab 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def retrieve_data(self, date, world_or_region=WORLD, table_name='trending_places'):
        """
        Fetched records saved in a Database

        Parameters
        ----------
        date
        world_or_region
        table_name

        Returns
        -------
        pandas data frame
        """
        with self.con:
            return pd.read_sql(FETCH_QUERY % (table_name, date, world_or_region),
                               self.con, parse_dates=['last_day'])

stock_fetcher.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _update(stock, conn):
    try:
        print "update ----- :", stock
        query = "select * from '%s' order by date" % stock
        df = pd.read_sql(query, conn)
        df = df.set_index('date')

        print "sql saved:", df.tail(1),df.ix[-1],df.ix[-1].name
        if dt.now().weekday() == 5:
            today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=1))[:10]
        elif dt.now().weekday() == 6:
            today = str(pd.Timestamp(dt.now()) - pd.Timedelta(days=2))[:10]
        else:
            today = str(pd.Timestamp(dt.now()))[:10]
        print "today:",today
        if today != df.ix[-1].name[:10]:
            df = ts.get_h_data(stock, start=df.ix[-1].name[:10], retry_count=5, pause=1)
            print "read from tu:",df.head(1)
            df[['open', 'high', 'close', 'low', 'volume']].to_sql(stock, conn, if_exists='append')
            import time
            time.sleep(10)
    except Exception, arg:
        print "exceptionu:", stock, arg
        errorlist.append(stock)

clean.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _clean(stock, conn):
    try:
        print "clean ------ :", stock
        query = "select * from '%s' order by date" % stock
        df = pd.read_sql(query, conn)
        print "before",df.tail(5)
        cur = conn.cursor()
        query = "delete from '%s' where rowid not in(select max(rowid) from '%s' group by date)" % (stock, stock)
        cur.execute(query)
        conn.commit()
        query = "select * from '%s' order by date" % stock
        df = pd.read_sql(query, conn)
        print "after",df.tail(5)
    except Exception, arg:
        print "exceptionc:", stock, arg
        raise SystemExit(-1)

dl.py 文件源码项目：strategy 作者: kanghua309 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def RNNPredict(mask,trigger_date=None,source='History.db'):
    class RNNPredict(CustomFactor):
        inputs = [];
        window_length = 1
        def compute(self, today, assets, out, *inputs):
            if trigger_date != None and today != pd.Timestamp(trigger_date,tz='UTC'):  # ????????factor??????????????factor????computer??????????????? ???
                return
            if os.path.splitext(source)[1] == '.db':
                conn = sqlite3.connect(source, check_same_thread=False)  #?????????????????????
                query = "select * from predict where date >= '%s' order by date limit 1 " % str(today)[:19]
                df = pd.read_sql(query, conn)
                df = df.set_index('date')
                conn.close()
            elif os.path.splitext(source)[1] == '.csv':
                 df = pd.read_csv("predict.csv", index_col=0, parse_dates=True)
                 df = df[df.index >= pd.Timestamp(str(today))]
                 print today,df
            else:
                raise ValueError
            new_index = [sid(asset).symbol + "_return" for asset in assets]
            df = df.reindex(columns = new_index)
            out[:] = df.ix[0].values
            print "RNNpredict:", today, out

    return RNNPredict(mask=mask)

f1_data.py 文件源码项目：f1_2017 作者: aflaisler 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def mysql_into_df(self):
        # first Extract all the table names
        # connect = self.db_connection
        df_tables = pd.read_sql('show tables;', self.db_connection)
        # store the tables in a dictionary
        d = {}
        col_name = df_tables.columns[0]
        # load individual table into the dictionary
        for table in df_tables[col_name]:
            key = table
            value = pd.read_sql('SELECT * FROM ' + table +
                                ';', self.db_connection)
            d[key] = value
        self.dictTables = d

    # Load qualification table separatedly to infare the timestamp

filter_stock.py 文件源码项目：stock 作者: Rockyzsu 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def get_lowest(self, code, date,current_date):
        '''
        ??????????
        :param code: ????
        :param date: ??
        :return:
        '''
        date = date + '-01-01'
        cmd = 'select * from `{}` where datetime > \'{}\' and datetime <\'{}\''.format(code, date,current_date)

        try:
            df = pd.read_sql(cmd, history_engine,index_col='index')
        except Exception,e:
            print e
            return None,None
        #print df.dtypes
        # ???????????????
        if len(df)<1:
            return None,None
        df['low']=df['low'].astype('float64')
        idx= df['low'].idxmin()
        min_date= df.loc[idx]
        return min_date['datetime'],min_date['low']

standard_sessions.py 文件源码项目：Odin-Securities 作者: JamesBrofos 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def standard_sessions(start_date, end_date):
    """Return the datetimes corresponding to the trading sessions in a specified
    time period during which the stock market was officially open.

    Parameters
    ----------
    start_date: Datetime object.
        The datetime indicating the beginning of the trading time period.
    end_date (optional): Datetime object.
        The datetime indicating the ending of the trading time period.
    """
    qry = """
    SELECT p.datetime FROM prices AS p JOIN symbols as s ON p.symbol_id = s.id
    WHERE s.symbol='^GSPC' AND p.datetime >= '{}' AND p.datetime <= '{}'
    """.format(start_date, end_date)
    return pd.read_sql(qry, conn)

make_dataset.py 文件源码项目：UK_Imbalance_Price_Forecasting 作者: ADGEfficiency 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def query_sql(db_path, table_name):
    """
    Uses pandas to pull data from our sqlite database

    args
        db_path (str) : location of the database
        table_name (str) : name of the table we want

    returns
        data (pd.DataFrame) :
    """
    print('Pulling data for table {} from {}'.format(db_path, table_name))
    #  connect to our database
    conn = sqlite3.connect(db_path)
    #  pull data by selecting the entire table
    data = pd.read_sql(sql='SELECT * from '+str(table_name), con=conn)
    data.set_index('index', drop=True, inplace=True)
    #  close the connection
    conn.close()
    return data

run.py 文件源码项目：pongr 作者: wseaton 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def matches():
    singles = pd.read_sql('select * from game where deleted = 0', con=engine)
    doubles = pd.read_sql('select * from doubles_game where deleted = 0', con=engine)

    tz = pytz.timezone('America/New_York')

    for frame in [singles, doubles]:

        frame['timestamp'] = frame['timestamp'].apply(datetime.utcfromtimestamp)
        frame['timestamp'] = frame['timestamp'].apply(datetime.replace, tzinfo=tz)
        frame['timestamp'] = frame['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S %Z')

    singles = singles.to_dict('records')
    doubles = doubles.to_dict('records')

    return render_template('gamelog.html', singles_games=singles, doubles_games=doubles)

tracker.py 文件源码项目：pawprint 作者: cbredev 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def read(self, *fields, **conditionals):
        """
        Pull raw data into a dataframe. If no conditions are passed, pull the whole table.
        Otherwise, filter based on the conditions specified ( currently only equality ).
        """

        # Parse the list of fields to return
        field_query = self._parse_fields(*fields)

        # Parse the conditions
        conditionals_query = self._parse_conditionals(**conditionals)

        query = "SELECT {} FROM {} {}".format(field_query, self.table, conditionals_query)

        if "DISTINCT" not in query:
            query += " ORDER BY {}".format(self.timestamp_field)

        return pd.read_sql(query, self.db)