def tweets_between(self, start, end):
"""
Retrieve tweets between the start and and datetimes. Returns a generator
that iterates on ``chunksize``.
:param start: The start of the search range.
:type start: datetime
:param end: The end of the search range.
:type end: datetime
"""
conn = sqlite3.connect(self._db, detect_types=sqlite3.PARSE_DECLTYPES)
df = pd.read_sql_query(
'SELECT * FROM tweets WHERE created_at > ? AND created_at <= ?',
conn, params=(start, end), parse_dates=['created_at']
)
return TweetBin(df, start, end)
python类read_sql_query()的实例源码
def get_related_id(session, engine, parameters):
category = parameters.split(":",1)[0]
sql_query=session.query(ALLOWED_CLASSES[category])
for field_value in parameters.split(":",1)[1].split("&&"):
field, value = field_value.split(".",1)
if ":" in value:
values = get_related_id(session, engine, value)
for value in values:
value=int(value) # the value is returned as a numpy object
if field[-4:] == "date": # support for date entry matching (the values have to be passes as string but matched as datetime)
value = datetime.datetime(*[int(i) for i in value.split(",")])
sql_query = sql_query.filter(getattr(ALLOWED_CLASSES[category], field)==value)
else:
if field[-4:] == "date": # support for date entry matching (the values have to be passes as string but matched as datetime)
value = datetime.datetime(*[int(i) for i in value.split(",")])
sql_query = sql_query.filter(getattr(ALLOWED_CLASSES[category], field)==value)
mystring = sql_query.statement
mydf = pd.read_sql_query(mystring,engine)
mydf = mydf.T.groupby(level=0).first().T #awkward hack to deal with polymorphic tables returning multiple IDs
related_table_ids = mydf["id"]
input_values = list(related_table_ids)
if input_values == []:
raise BaseException("No entry was found with a value of \""+str(value)+"\" on the \""+field+"\" column of the \""+category+"\" CATEGORY, in the database.")
session.close()
engine.dispose()
return input_values
def get_for_protocolize(db_path, class_name, code):
"""Return a dataframe containing a specific entry from a given class name, joined with its related tables up to three levels down.
"""
session, engine = load_session(db_path)
cols = []
joins = []
classobject = ALLOWED_CLASSES[class_name]
insp = sqlalchemy.inspection.inspect(classobject)
for name, col in insp.columns.items():
cols.append(col.label(name))
for name, rel in insp.relationships.items():
alias = aliased(rel.mapper.class_, name=name)
joins.append((alias, rel.class_attribute))
for col_name, col in sqlalchemy.inspection.inspect(rel.mapper).columns.items():
#the id column causes double entries, as it is mapped once on the parent table (related_table_id) and once on the child table (table_id)
if col.key != "id":
aliased_col = getattr(alias, col.key)
cols.append(aliased_col.label("{}_{}".format(name, col_name)))
sub_insp = sqlalchemy.inspection.inspect(rel.mapper.class_)
for sub_name, sub_rel in sub_insp.relationships.items():
if "contains" not in sub_name:
sub_alias = aliased(sub_rel.mapper.class_, name=name+"_"+sub_name)
joins.append((sub_alias, sub_rel.class_attribute))
for sub_col_name, sub_col in sqlalchemy.inspection.inspect(sub_rel.mapper).columns.items():
#the id column causes double entries, as it is mapped once on the parent table (related_table_id) and once on the child table (table_id)
if sub_col.key != "id":
sub_aliased_col = getattr(sub_alias, sub_col.key)
cols.append(sub_aliased_col.label("{}_{}_{}".format(name, sub_name, sub_col_name)))
sql_query = session.query(*cols).select_from(classobject)
for join in joins:
sql_query = sql_query.outerjoin(*join)
sql_query = sql_query.filter(classobject.code == code)
mystring = sql_query.statement
mydf = pd.read_sql_query(mystring,engine)
session.close()
engine.dispose()
return mydf
def DataSetAnalytes(self, data_set, TableProcessingToReturn='Both_PeakFinding_TargetAnalyteFinding'):
# Query all foreign key columns in Sample table and return a list of all analyts that are
# found in a given data set.
column_string = self.createQueryColumnsStr(TableProcessingToReturn)
# Build SQL statement & Query sample table for all foreign key columns of a given data set
sql_statement = "SELECT %s FROM Sample WHERE Sample.DataSetName = '%s';" % (column_string, data_set)
df = pd.read_sql_query(sql_statement, self.conn)
return self.GetFoundAnalytesLst(df)
def Get_100fgArea(self, table, data_set):
sql_statement = "SELECT %s.Area AS Area_100fg \
FROM \
Sample \
Inner Join %s ON \
%s.id = Sample.%s_foreignkey \
WHERE \
Sample.DataSetName = '%s' AND \
%s.Concentration_pg = 0.1;" % (table, table, table, table, data_set, table)
return pd.read_sql_query(sql_statement, self.conn)
def GetAveSimilarities(self, table, data_set):
sql_statement = "SELECT AVG(%s.Similarity) AS Ave_Similarity, %s.Concentration_pg AS Conc_pg \
FROM \
Sample \
Inner Join %s ON \
%s.id = Sample.%s_foreignkey \
WHERE \
Sample.DataSetName = '%s' \
GROUP BY Conc_pg;" % (table, table, table, table, table, data_set)
return pd.read_sql_query(sql_statement, self.conn)
def Similarities(self, table, data_sets):
# This query provides all the data to create the Concentration vs Similarity plot and tables
#condition = "DataSet = '" + "' OR DataSet = '".join(data_sets) + "' "
condition = self.CreateConditionClause_OrSeriesStr(data_sets)
sql_statement = "SELECT %s.Similarity AS Similarity, %s.Concentration_pg AS Conc_pg, \
Sample.Instrument AS SerNo, Sample.DataSetName AS DataSet \
FROM \
Sample \
Inner Join %s ON \
%s.id = Sample.%s_foreignkey \
WHERE \
%s \
ORDER BY SerNo, Conc_pg ASC;" % (table, table, table, table, table, condition)
return pd.read_sql_query(sql_statement, self.conn)
def ClearDataSetData(self, data_sets, analyte_table_lst):
# Clears all the data from the database for the define data sets
# append the DataSetConcentrations table so it too will be included in the data clearing
analyte_table_lst.append('DataSetConcentrations')
# Create a single string that contains all the forigen keys in the sample table
# that has data to be removed (which is comma space delimited)
ForeignKeyColumn_lst = [col + '_foreignkey' for col in analyte_table_lst]
ForeignKeyColumn_Columns = ', '.join(ForeignKeyColumn_lst)
# Get condition string from data sets
data_set_condition = self.CreateConditionClause_OrSeriesStr(data_sets, "DataSetName")
# Resulting df: columns correspond to tables that contain data to be removed, the values in each column
# are the primary keys of records within that table that need to be deleted.
sql_statement = 'SELECT %s FROM Sample WHERE %s;' % (ForeignKeyColumn_Columns, data_set_condition)
df = pd.read_sql_query(sql_statement, self.conn)
df.columns = analyte_table_lst
# Iterate through the dataframe by column to delete each record from the db
# note: column = table name
for column in df:
condition = self.CreateConditionClause_OrSeriesStr(set(df[column]), "id")
self.conn.execute('DELETE FROM %s WHERE %s' % (column, condition))
# Finally remove the defined records from the sample table as well
self.conn.execute('DELETE FROM Sample WHERE %s' % data_set_condition)
self.CommitDB()
def write_locations(field, query_template, plate_name, row, conn, config):
# Read cells file for each image
query = query_template.replace("@@@",field).format(
plate_name,
row["Metadata_Well"],
row["Metadata_Site"]
)
locations = pd.read_sql_query(query, conn)
# Keep center coordinates only, remove NaNs, and transform to integers
locations = locations.dropna(axis=0, how="any")
locations[field+"_Location_Center_X"] = locations[field+"_Location_Center_X"]*config["compression"]["scaling_factor"]
locations[field+"_Location_Center_Y"] = locations[field+"_Location_Center_Y"]*config["compression"]["scaling_factor"]
locations[field+"_Location_Center_X"] = locations[field+"_Location_Center_X"].astype(int)
locations[field+"_Location_Center_Y"] = locations[field+"_Location_Center_Y"].astype(int)
# Save the resulting dataset frame in the output directory
loc_file = "{}/{}/locations/{}-{}-{}.csv".format(
config["compression"]["output_dir"],
row["Metadata_Plate"],
row["Metadata_Well"],
row["Metadata_Site"],
field
)
dataset.utils.check_path(loc_file)
locations.to_csv(loc_file, index=False)
def run(self):
self.initialize()
tags = pd.read_sql_query("SELECT * FROM TAGS", self.con)
self.collect_changes(tags)
self.apply_changes_if_not_dry()
self.close()
def df_query(self, query, with_labels=False):
"""
Run a :mod:`sqlalchemy` query and return result as a :class:`pandas.DataFrame`
Args:
query (sqlalchemy.orm.query.Query): query object, usually generated by :func:`session.query()` in
an :class:`sqlalchemy.orm.session.Session`
with_labels (bool): A query for fields with the same name from different tables will cause problems
when converting it to a :class:`pandas.DataFrame`, because there will be duplicate column names.
When setting `with_labels=True`, disambiguation labels are assigned to all (!)
fields in the query - the field name is prefixed with the column name. This enables
querying fields with identical names from multiple tables but getting unique column names in the output.
:return: query result as :class:`pandas.DataFrame`
"""
import pandas as pd
if with_labels:
query = query.with_labels()
# compile sql statement, including arguments
statement = query.statement.compile(self.engine)
# run query
return pd.read_sql_query(sql=statement, con=self.engine)
def get_pandas_df(self, sql):
"""
We need to overide this method in order to use connections other than the default_conn_name
:param sql: A query input via the web UI at /admin/queryview/
:return: a Panda data frame
"""
conn = self.get_conn(self.snowflake_conn_id)
df = pd.read_sql_query(sql, conn)
return df
def get_test_2(self):
return pd.read_sql_query('select date,open,close,high,low,vol,code '
'from (select * from t_stick_data_m_test order by date desc limit 15) a '
'order by date asc;',self.sqlconn)
def build_model(all_features, categorical_features, target, connection_string, filename):
engine = create_engine(connection_string)
query = """SELECT * FROM connection_features"""
df = pd.read_sql_query(query, con=engine, index_col=['departurestop', 'departuredate', 'route'])
df.index.levels[0].name = 'stationfrom'
df.index.levels[1].name = 'date'
df.index.levels[2].name = 'vehicle'
df = df.reset_index()
model = build_model_random_forest(df, all_features, categorical_features, target)
joblib.dump(model, filename)
def predict(all_features, categorical_features, connection_string, filename):
engine = create_engine(connection_string)
model = joblib.load(filename)
print model
query = """SELECT * FROM connection_features"""
df = pd.read_sql_query(query, con=engine, index_col=['departurestop', 'departuredate', 'route'])
df.index.levels[0].name = 'stationfrom'
df.index.levels[1].name = 'date'
df.index.levels[2].name = 'vehicle'
df = df.reset_index()
predicted = model.predict(df[all_features])
print predicted
def pivot_stations(df, engine):
query = """
SELECT
d.*,
s.name AS arrivalname
FROM distance d INNER JOIN station s ON d.stationto = s.id
"""
distances = pd.read_sql_query(query, con=engine)
stations = distances['arrivalname'].unique().tolist()
dist_pivot = pd.pivot_table(distances, values='distance', index=['stationfrom', 'date', 'trip'],
columns=['arrivalname'], aggfunc=np.mean)
dist_pivot = dist_pivot.reindex(df.index.rename(['stationfrom', 'date', 'vehicle']))
df = df.join(dist_pivot, how='outer')
return df, stations
def select_fromindexs(dbname,field,indexs,**kwargs):
normal_db = False
tablename = False
# handling even if indexs arent in str format
if type(indexs[0]) == int:
indexs = [str(row) for row in indexs]
for key,value in kwargs.iteritems():
if key == 'size':
size = value
if key == 'normal_db':
normal_db = value
if key == 'tablename':
tablename = value
a,engine = make_query(dbname,tablename=tablename,normal_db=normal_db)
stringindexs = ','.join(indexs)
if not tablename == False:
dbname = tablename
# now making querry
query = '''SELECT * FROM %s WHERE %s IN (%s);''' % (dbname,field,stringindexs)
return pd.read_sql_query(query,engine)
def select_fromindexs(dbname,field,indexs,**kwargs):
normal_db = False
tablename = False
# handling even if indexs arent in str format
if type(indexs[0]) == int:
indexs = [str(row) for row in indexs]
for key,value in kwargs.iteritems():
if key == 'size':
size = value
if key == 'normal_db':
normal_db = value
if key == 'tablename':
tablename = value
a,engine = make_query(dbname,tablename=tablename,normal_db=normal_db)
stringindexs = ','.join(indexs)
if not tablename == False:
dbname = tablename
# now making querry
query = '''SELECT * FROM %s WHERE %s IN (%s);''' % (dbname,field,stringindexs)
return pd.read_sql_query(query,engine)
def runCode(self, code):
errors = None
output = undefined
try:
if code.lstrip().upper().startswith('SELECT '):
output = pandas.read_sql_query(code, self._connection)
else:
self._connection.execute(code)
except Exception as exc:
errors = [{
'line': 0,
'column': 0,
'message': str(exc)
}]
return {
'errors': errors,
'output': None if output is undefined else pack(output)
}
def query_object(self, starnames, key='*'):
"""
Get information about the given star.
Parameters:
===========
starnames: string, or iterable of strings
The name(s) of the star.
key: string, default='*' (return everything)
What data do you want? Can be anything that sql will take
Returns:
========
A pandas DataFrame with the given information for each star
"""
if isinstance(starnames, str):
starnames = [starnames,]
starnames = ["'{}'".format(n) for n in starnames]
name_list = '(' + ', '.join(starnames) + ')'
sql_query = "SELECT {} FROM star WHERE name IN {}".format(key, name_list)
print(sql_query)
df = pd.read_sql_query(sql_query, self.db_con)
return df