def parse_psqs(psqs_results_file):
"""Parse a PSQS result file and returns a Pandas DataFrame of the results
Args:
psqs_results_file: Path to psqs results file
Returns:
Pandas DataFrame: Summary of PSQS results
"""
# TODO: generalize column names for all results, save as dict instead
psqs_results = pd.read_csv(psqs_results_file, sep='\t', header=None)
psqs_results['pdb_file'] = psqs_results[0].apply(lambda x: str(x).strip('./').strip('.pdb'))
psqs_results = psqs_results.rename(columns = {1:'psqs_local', 2:'psqs_burial', 3:'psqs_contact', 4:'psqs_total'}).drop(0, axis=1)
psqs_results['u_pdb'] = psqs_results['pdb_file'].apply(lambda x: x.upper() if len(x)==4 else np.nan)
psqs_results['i_entry_name'] = psqs_results['pdb_file'].apply(lambda x: x.split('_model1')[0] if len(x)>4 else np.nan)
psqs_results = psqs_results[pd.notnull(psqs_results.psqs_total)]
return psqs_results
python类notnull()的实例源码
def trace_serializer(trace):
data = OrderedDict([
("type", "ARBITRARY_START"),
("interpretation", trace.interpretation),
("unit", trace.unit),
("trace_id", trace.trace_id),
("interval", trace.interval),
("records", [
OrderedDict([
("start", start.isoformat()),
("value", record.value if pd.notnull(record.value) else None),
("estimated", bool(record.estimated)),
])
for start, record in trace.data.iterrows()
]),
])
return data
def clean_and_write_dataframe_to_csv(data, filename):
"""
Cleans a dataframe of np.NaNs and saves to file via pandas.to_csv
:param data: data to write to CSV
:type data: :class:`pandas.DataFrame`
:param filename: Path to file to write CSV to. if None, string of data
will be returned
:type filename: str | None
:return: If the filename is None, returns the string of data. Otherwise
returns None.
:rtype: str | None
"""
# cleans np.NaN values
data = data.where((pd.notnull(data)), None)
# If filename=None, to_csv will return a string
result = data.to_csv(path_or_buf=filename, encoding='utf-8', dtype=str, index=False, na_rep=None,
skipinitialspace=True, quoting=csv.QUOTE_ALL)
logging.info("Dataframe of shape %s has been stored." % str(data.shape))
return result
def element_to_bdsim(e):
"""Convert a pandas.Series representation onto a BDSim sequence element."""
bdsim = ""
if e.KEYWORD in ['MARKER', 'INSTRUMENT']:
bdsim = "{}: {};".format(e.name.replace('$', ''), "marker")
if e.KEYWORD in ['DRIFT', 'QUADRUPOLE', 'RBEND', 'SBEND']:
bdsim = "{}: {}, l={}*m".format(e.name.replace('$', ''), e.KEYWORD.lower(), e.L)
if e.get('BENDING_ANGLE') is not None and not np.isnan(e['BENDING_ANGLE']):
bdsim += f",angle=-{e['BENDING_ANGLE']}"
elif e.get('ANGLE') is not None and not np.isnan(e['ANGLE']):
bdsim += f",angle=-{e.get('ANGLE', 0)}"
else:
# Angle property not supported by the element or absent
bdsim += ""
#if pd.notnull(e['APERTYPE']):
# bdsim += ", aperture={}*m".format(str(e['APERTURE']).strip('[]'))
if pd.notnull(e.get('PLUG')) and pd.notnull(e.get('CIRCUIT')):
bdsim += ", {}={{{{ {} or '0.0' }}}}".format(e['PLUG'].lower(), e['CIRCUIT'])
bdsim += ';'
return bdsim
def element_to_mad(e):
"""Convert a pandas.Series representation onto a MAD-X sequence element."""
if e.CLASS not in SUPPORTED_CLASSES:
return ""
mad = "{}: {}, ".format(e.name, e.CLASS)
if e.get('BENDING_ANGLE') is not None and not np.isnan(e['BENDING_ANGLE']):
mad += f"ANGLE={e['BENDING_ANGLE']},"
elif e.get('ANGLE') is not None and not np.isnan(e['ANGLE']):
mad += f"ANGLE={e.get('ANGLE', 0)},"
else:
# Angle property not supported by the element or absent
mad += ""
mad += ', '.join(["{}={}".format(p, e[p]) for p in SUPPORTED_PROPERTIES if pd.notnull(e.get(p, None))])
if pd.notnull(e['LENGTH']) and e['LENGTH'] != 0.0:
mad += ", L={}".format(e['LENGTH'])
if pd.notnull(e.get('APERTYPE', None)):
mad += ", APERTURE={}".format(str(e['APERTURE']).strip('[]'))
if pd.notnull(e.get('PLUG')) and pd.notnull(e.get('CIRCUIT')) and pd.isnull(e.get('VALUE')):
mad += ", {}:={}".format(e['PLUG'], e['CIRCUIT'])
if pd.notnull(e.get('PLUG')) and pd.notnull(e.get('VALUE')):
mad += ", {}={}".format(e['PLUG'], e['VALUE'])
mad += ", AT={}".format(e['AT_CENTER'])
mad += ";"
return mad
def get_sec_spt(row):
"""
Get the secondary spectral type from the information we have. Meant to be
called as the `apply` method of a pandas DataFrame.
"""
if pd.notnull(row['Sp2']):
return row['Sp2']
elif pd.notnull(row['Sp1']) and pd.notnull(row['mag1']) and pd.notnull(row['mag2']):
# TODO: Do better than assuming V band!
band = 'V'
absmag_prim = MS.GetAbsoluteMagnitude(row['Sp1'], color=band)
dm = float(row['mag1']) - absmag_prim
absmag_sec = float(row['mag2']) - dm
return MS.GetSpectralType_FromAbsMag(absmag_sec, color=band)[0]
elif pd.notnull(row['Sp1']) and pd.notnull(row['K1']) and pd.notnull(row['K2']):
mass = MS.Interpolate('mass', row['Sp1'])
q = float(row['K1']) / float(row['K2'])
sec_mass = q * mass
return MS.GetSpectralType('mass', sec_mass)[0]
else:
print(row)
raise ValueError('Must give enough information to figure out the spectral type!')
def series_is_datetime(series: pd.Series, check_num: int=5, dropna: bool=True):
"""
Checks random rows in a Series comparing rows that coerce to datetime.
:param series:
:param check_num:
:param dropna:
:return:
"""
if dropna:
series = series.dropna(axis=0)
got, lost = 0, 0
size = (check_num if series.index.size > check_num else series.index.size)
if size > 0:
checks = np.random.randint(0, high=series.index.size, size=size)
for x in series[checks].tolist():
try:
x = pd.Timestamp(x)
if pd.notnull(x):
got += 1
except (ValueError, OverflowError):
lost += 1
return got > lost
def to_csv(self, filepath='hypothesis/SGD_hypothesis_header.csv'):
df = pd.DataFrame()
df = pd.concat([df, pd.DataFrame([['depth', self.depth]])], ignore_index=True)
df = pd.concat([df, pd.DataFrame([['sizes'] + [self.input_size+1] \
+ [hidden_size+1 for hidden_size in self.hidden_sizes] \
+ [self.output_size]])], ignore_index=True)
for i, weight in enumerate(self.best_weights):
df = pd.concat([df, pd.DataFrame([['W_{}'.format(i)] + weight.T.flatten().tolist()])], ignore_index=True)
# Fill nan with None[]
df = df.where((pd.notnull(df)), None)
# Since pd.to_csv converts int to float if there's `None` in the same row,
# we need to handle this.
with open(filepath, 'w') as f:
for row in range(df.shape[0]):
for col in range(df.shape[1]):
if (row == 0 and col != 0) or (row == 1 and col != 0):
val = int(df[col][row]) if df[col][row] is not None else ''
else:
val = df[col][row] if df[col][row] is not None else ''
f.writelines('{},'.format(val))
if row != df.shape[0]-1: f.writelines('\n')
def execute_internal(self, context, **kwargs):
"""
the internal execution process to be implemented
:param context:
:param kwargs:
:return:
"""
df = pd.read_csv('https://raw.githubusercontent.com/bailaohe/parade/master/assets/movie_metadata.csv')
# Process projection on the dataset to get our interested attributes
df = df[['movie_title', 'genres', 'title_year', 'content_rating', 'budget', 'num_voted_users', 'imdb_score']]
# Filter out records with *NAN* title_year and budget
df = df[pd.notnull(df['title_year'])]
df = df[df['budget'] > 0]
# Extract the genres ROOT
df['genres_root'] = df['genres'].apply(lambda g: g.split('|')[0])
return df
def Join_Inputs(df, df_betas, df_ff_params, df_liq_prox):
# add beta values & set index to datetime from df_diff
df = pd.merge(df, df_betas, left_on='cusip_id',
right_on='cusip_id', left_index=True)
df['trd_exctn_dt_idx'] = pd.to_datetime(df['trd_exctn_dt'],\
format='%Y%m%d')
df.set_index('trd_exctn_dt_idx', inplace=True)
# Join with fama-french factors on date index
df_join_ff = df.join(df_ff_params, lsuffix="_m", rsuffix='_b')
# Drop any rows where dates in df_diff do not appear in fama-french
df_join_ff = df_join_ff[pd.notnull(df_join_ff['MKT_b'])]
# Combine liquidity factor L_t
df_liq_prox_values = df_liq_prox['residual_term']
df_join_liq = df_join_ff.join(df_liq_prox_values)
df_join_liq = df_join_liq[pd.notnull(df_join_liq['residual_term'])]
return df_join_liq
def test_decide_from_contexts_df_null_decisions():
tree = CLIENT.get_decision_tree(AGENT_ID,
COMPLEX_AGENT_DATA.last_valid_index().value // 10 ** 9)
test_df = pd.DataFrame(
[
["Jean-Pierre", "+02:00"],
["Paul"]
],
columns=["b", "tz"],
index=pd.date_range("20130201", periods=2, freq="D"))
df = CLIENT.decide_from_contexts_df(tree, test_df)
assert_equal(len(df), 2)
assert pd.isnull(df["a_predicted_value"][0])
assert pd.notnull(df["error"][0])
assert pd.notnull(df["a_predicted_value"][1])
assert pd.isnull(df["error"][1])
def add_operations(self, agent_id, operations):
if isinstance(operations, pd.DataFrame):
if not isinstance(operations.index, pd.DatetimeIndex):
raise CraftAiBadRequestError("Invalid dataframe given, it is not time indexed")
chunk_size = self.config["operationsChunksSize"]
for chunk in chunker(operations, chunk_size):
chunk_operations = [
{
"timestamp": row.name.value // 10 ** 9, # Timestamp.value returns nanoseconds
"context": {
col: row[col] for col in operations.columns if pd.notnull(row[col])
}
} for _, row in chunk.iterrows()
]
super(Client, self).add_operations(agent_id, chunk_operations)
return {
"message": "Successfully added %i operation(s) to the agent \"%s/%s/%s\" context."
% (len(operations), self.config["owner"], self.config["project"], agent_id)
}
else:
return super(Client, self).add_operations(agent_id, operations)
def decide_from_row(tree, columns, row):
time = Time(
t=row.name.value // 10 ** 9, # Timestamp.value returns nanoseconds
timezone=row.name.tz
)
context = {
col: row[col] for col in columns if pd.notnull(row[col])
}
try:
decision = VanillaInterpreter.decide(tree, [context, time])
keys, values = zip(*[
(output + "_" + key, value)
for output, output_decision in decision["output"].items()
for key, value in output_decision.items()
])
return pd.Series(data=values, index=keys)
except CraftAiNullDecisionError as e:
return pd.Series(data=[e.message], index=["error"])
def _calculate_geographic_nullity(geo_group, x_col, y_col):
"""
Helper method which calculates the nullity of a DataFrame. Factored out of and used within `geoplot`.
"""
# Aggregate by point and fetch a list of non-null coordinate pairs, which is returned.
point_groups = geo_group.groupby([x_col, y_col])
points = [point for point in point_groups.groups.keys() if pd.notnull(point[0]) and pd.notnull(point[1])]
# Calculate nullities by location, then take their average within the overall feature.
counts = np.sum(point_groups.count().values, axis=1)
entries = point_groups.size()
width = len(geo_group.columns)
# Remove empty (NaN, NaN) points.
if len(entries) > 0: # explicit check to avoid a Runtime Warning
geographic_nullity = np.average(1 - counts / width / entries)
return points, geographic_nullity
else:
return points, np.nan
def _get_hd_args(path, high_dim_node, annotation):
"""
Create dict with meta tags that belong to a certain high dimensional node.
"""
map_file = high_dim_node.sample_mapping
s = map_file.slice_path(path).iloc[:, 5].unique()
t = map_file.slice_path(path).iloc[:, 6].unique()
hd_args = {'hd_sample': ', '.join(s.astype(str)) if pd.notnull(s[0]) else '',
'hd_tissue': ', '.join(t.astype(str)) if pd.notnull(t[0]) else '',
'hd_type': Mappings.annotation_data_types.get(high_dim_node.params.datatype),
}
if annotation:
hd_args.update({'pl_marker_type': annotation.marker_type,
'pl_genome_build': annotation.params.get('GENOME_RELEASE', ''),
'pl_title': annotation.params.get('TITLE', ''),
'pl_id': annotation.platform})
return hd_args
def extract_days(input_delta):
"""
Helper function to extract the number of days from a time delta. Returns:
- Number of days, if valid time delta
- np.NaN if time delta is null or invalid
:param input_delta:
:return: number of days in time delta
:rtype: float
"""
# Attempt to coerce into Pandas time delta
delta = pd.Timedelta(input_delta)
# Attempt to extract number of days
days = np.NaN
if pd.notnull(delta):
days = delta.days
# Return result
return days
def kama(self, efficiency_ratio_periods=10, ema_fast=2, ema_slow=30,
period=20, column='adj_close'):
er = self._efficiency_ratio_computation(
period=efficiency_ratio_periods, column=column)
fast_alpha = 2 / (ema_fast + 1)
slow_alpha = 2 / (ema_slow + 1)
smoothing_constant = pd.Series(
(er * (fast_alpha - slow_alpha) + slow_alpha) ** 2,
name='smoothing_constant')
sma = pd.Series(self.ohlcv[column].rolling(period).mean(), name='SMA')
kama = []
for smooth, ma, price in zip(iter(smoothing_constant.items()),
iter(sma.shift(-1).items()),
iter(self.ohlcv[column].items())):
try:
kama.append(kama[-1] + smooth[1] * (price[1] - kama[-1]))
except:
if pd.notnull(ma[1]):
kama.append(ma[1] + smooth[1] * (price[1] - ma[1]))
else:
kama.append(None)
sma['KAMA'] = pd.Series(kama, index=sma.index,
name='{} days KAMA Ticker {}'.format(period,
self.ticker))
yield sma['KAMA']
def KAMA(cls, ohlc, er=10, ema_fast=2, ema_slow=30, period=20):
"""Developed by Perry Kaufman, Kaufman's Adaptive Moving Average (KAMA) is a moving average designed to account for market noise or volatility.
Its main advantage is that it takes into consideration not just the direction, but the market volatility as well."""
er = cls.ER(ohlc, er)
fast_alpha = 2 / (ema_fast + 1)
slow_alpha = 2 / (ema_slow + 1)
sc = pd.Series((er * (fast_alpha - slow_alpha) + slow_alpha)**2, name="smoothing_constant") ## smoothing constant
sma = pd.Series(ohlc["close"].rolling(period).mean(), name="SMA") ## first KAMA is SMA
kama = []
# Current KAMA = Prior KAMA + smoothing_constant * (Price - Prior KAMA)
for s, ma, price in zip(sc.iteritems(), sma.shift().iteritems(), ohlc["close"].iteritems()):
try:
kama.append(kama[-1] + s[1] * (price[1] - kama[-1]))
except:
if pd.notnull(ma[1]):
kama.append(ma[1] + s[1] * (price[1] - ma[1]))
else:
kama.append(None)
sma["KAMA"] = pd.Series(kama, index=sma.index, name="{0} period KAMA.".format(period)) ## apply the kama list to existing index
return sma["KAMA"]
def markGaps(self):
"""Produces dictionary of list of gaps in time series data based on the presence of nan values;
used for gantt plotting
:returns: dateranges; a dictionary with station names as keys and lists of begin and end dates as values
"""
df = self.data
stations = self.stations
dateranges = {}
for station in stations:
dateranges[station] = []
first = df.ix[:, station].first_valid_index()
last = df.ix[:, station].last_valid_index()
records = df.ix[first:last, station]
#dateranges[station].append(pd.to_datetime(first))
for i in range(len(records) - 1):
if pd.isnull(records[i + 1]) and pd.notnull(records[i]):
dateranges[station].append(pd.to_datetime(records.index)[i])
elif pd.isnull(records[i]) and pd.notnull(records[i + 1]):
dateranges[station].append(pd.to_datetime(records.index)[i])
dateranges[station].append(pd.to_datetime(last))
return dateranges
def update_last_known_values(self):
"""
Store the non-NaN values from our oldest frame in each frequency.
"""
ffillable = self.ffillable_fields
if not len(ffillable):
return
for frequency in self.unique_frequencies:
digest_panel = self.digest_panels.get(frequency, None)
if digest_panel:
oldest_known_values = digest_panel.oldest_frame(raw=True)
else:
oldest_known_values = self.buffer_panel.oldest_frame(raw=True)
oldest_vals = oldest_known_values
oldest_columns = self.fields
for field in ffillable:
f_idx = oldest_columns.get_loc(field)
field_vals = oldest_vals[f_idx]
# isnan would be fast, possible to use?
non_nan_sids = np.where(pd.notnull(field_vals))
key = (frequency.freq_str, field)
key_loc = self.last_known_prior_values.index.get_loc(key)
self.last_known_prior_values.values[
key_loc, non_nan_sids
] = field_vals[non_nan_sids]
def uniprot_reviewed_checker(uniprot_id):
"""Check if a single UniProt ID is reviewed or not.
Args:
uniprot_id:
Returns:
bool: If the entry is reviewed
"""
query_string = 'id:' + uniprot_id
uni_rev_raw = StringIO(bsup.search(query_string, columns='id,reviewed', frmt='tab'))
uni_rev_df = pd.read_table(uni_rev_raw, sep='\t', index_col=0)
uni_rev_df = uni_rev_df.fillna(False)
uni_rev_df = uni_rev_df[pd.notnull(uni_rev_df.Status)]
uni_rev_df = uni_rev_df.replace(to_replace="reviewed", value=True)
uni_rev_df = uni_rev_df.replace(to_replace="unreviewed", value=False)
uni_rev_dict_adder = uni_rev_df.to_dict()['Status']
return uni_rev_dict_adder[uniprot_id]
def processData(data):
df = pd.DataFrame.transpose(pd.read_json(json.dumps(data)))
df = df.dropna(subset = [key for key in df.keys() if "x_" in key])
df = df[pd.notnull(df['y_observed'])]
X = df[[key for key in df.keys() if "x_" in key]].values
y = df["y_observed"].values
return X, y
# 5th: initial model
def pre_processData(train_data,file_path):
train_data.loc[(train_data.Age.isnull()), 'Age' ] = np.mean(train_data.Age) # ???????????
train_data.loc[(train_data.Cabin.notnull(),'Cabin')] = 'yes' # Cabin??????yes
train_data.loc[(train_data.Cabin.isnull(),'Cabin')] = 'no'
'''0/1????'''
dummies_cabin = pd.get_dummies(train_data['Cabin'],prefix='Cabin') # get_dummies?????0/1??????????????prefix???Cabin
dummies_Embarked = pd.get_dummies(train_data['Embarked'], prefix='Embarked')
dummies_Sex = pd.get_dummies(train_data['Sex'], prefix='Sex')
dummies_Pclass = pd.get_dummies(train_data['Pclass'],prefix='Pclass')
train_data = pd.concat([train_data,dummies_cabin,dummies_Embarked,dummies_Pclass,dummies_Sex], axis=1) # ??dataframe,axis=1??
train_data.drop(['Pclass','Name','Sex','Embarked','Cabin','Ticket'],axis=1,inplace=True) # ????????????
header_string = ','.join(train_data.columns.tolist()) # ?????string???????
np.savetxt(file_path+r'/pre_processData1.csv', train_data, delimiter=',',header=header_string) # ?????????????
'''???????(Age?Fare)'''
scaler = StandardScaler()
age_scaler = scaler.fit(train_data['Age'])
train_data['Age'] = age_scaler.fit_transform(train_data['Age'])
if np.sum(train_data.Fare.isnull()): # ??Fare???????????
train_data.loc[(train_data.Fare.isnull(),'Fare')]=np.mean(train_data.Fare)
fare_scaler = scaler.fit(train_data['Fare'])
train_data['Fare'] = fare_scaler.transform(train_data['Fare'])
header_string = ','.join(train_data.columns.tolist()) # ?????string???????
np.savetxt(file_path+r'/pre_processData_scaled.csv', train_data, delimiter=',',header=header_string) # ?????????????
return train_data
## feature engineering?????-?????
def generate_tokens(table, key_attr, join_attr, tokenizer):
table_nonnull = table[pd.notnull(table[join_attr])]
return dict(zip(table_nonnull[key_attr],
table_nonnull[join_attr].apply(tokenizer.tokenize)))
def preprocess_data(path, is_test=False):
data = pd.read_csv(path, index_col='PassengerId')
data.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
if is_test:
data = data.replace([None], [0])
else:
data = data[pd.notnull(data['Age'])]
data = data[pd.notnull(data['Embarked'])]
data.replace(["female", "male"], [0, 1], inplace=True)
data.replace(["Q", "C", "S"], [0, 1, 2], inplace=True)
if "Survived" in data:
data = data[pd.notnull(data['Survived'])]
data_norm = (data - data.mean()) / (data.max() - data.min())
return data_norm
def plot_facet(self, data, color, **kwargs):
x = kwargs.get("x")
y = kwargs.get("y")
levels_x = kwargs.get("levels_x")
levels_y = kwargs.get("levels_y")
#num = []
#date = []
#time = data[self._time_column]
#num = data[self._time_column].apply(self.convert_to_datetime)
#date = data[self._time_column].apply(self.convert_to_timeseries)
#if pd.isnull(num).sum() <= pd.isnull(date).sum():
#data[self._time_column] = num
#else:
#data[self._time_column] = date
#data.dropna(inplace=True)
#if len(self._groupby) == 2:
#ct = pd.crosstab(data[self._time_column], data[self._groupby[0]])
#ct = ct.reindex_axis(self._levels[0], axis=1).fillna(0)
#ct = ct[pd.notnull(ct.index)]
#else:
#ct = pd.crosstab(
#data[self._time_column],
#pd.Series([""] * len(self._table[self._time_column]), name=""))
## Line plot:
#self.vmax = max(self.vmax, ct.values.max())
#ct.plot(ax=plt.gca(), color=self.get_palette())
def plot_facet(self, data, color, **kwargs):
x = kwargs.get("x")
y = kwargs.get("y")
levels_x = kwargs.get("levels_x")
levels_y = kwargs.get("levels_y")
#num = []
#date = []
#time = data[self._time_column]
#num = data[self._time_column].apply(self.convert_to_datetime)
#date = data[self._time_column].apply(self.convert_to_timeseries)
#if pd.isnull(num).sum() <= pd.isnull(date).sum():
#data[self._time_column] = num
#else:
#data[self._time_column] = date
#data.dropna(inplace=True)
#if len(self._groupby) == 2:
#ct = pd.crosstab(data[self._time_column], data[self._groupby[0]])
#ct = ct.reindex_axis(self._levels[0], axis=1).fillna(0)
#ct = ct[pd.notnull(ct.index)]
#else:
#ct = pd.crosstab(
#data[self._time_column],
#pd.Series([""] * len(self._table[self._time_column]), name=""))
## Stacked area plot:
#if len(self._groupby) == 2:
#self.vmax = max(self.vmax, ct.apply(sum, axis=1).max())
#ct.plot(ax=plt.gca(), kind="area", stacked=True, color=self.get_palette(), **kwargs)
def plot_facet(self, data, color, **kwargs):
x = kwargs.get("x")
y = kwargs.get("y")
levels_x = kwargs.get("levels_x")
levels_y = kwargs.get("levels_y")
#num = []
#date = []
#time = data[self._time_column]
#num = data[self._time_column].apply(self.convert_to_datetime)
#date = data[self._time_column].apply(self.convert_to_timeseries)
#if pd.isnull(num).sum() <= pd.isnull(date).sum():
#data[self._time_column] = num
#else:
#data[self._time_column] = date
#data.dropna(inplace=True)
#if len(self._groupby) == 2:
#ct = pd.crosstab(data[self._time_column], data[self._groupby[0]])
#ct = ct.reindex_axis(self._levels[0], axis=1).fillna(0)
#ct = ct[pd.notnull(ct.index)]
#else:
#ct = pd.crosstab(
#data[self._time_column],
#pd.Series([""] * len(self._table[self._time_column]), name=""))
## percentage area plot:
## if there is only one grouping variable (the time column),
## the cross table produces a Series, not a data frame. It
## isn't really very informative to plot it, but we provide
## for this special case anyway_
#if type(ct) == pd.Series:
#ct = ct.apply(lambda x: 100)
#else:
#ct = ct.apply(lambda x: (100 * x) / sum(x), axis=1)
#ct.plot(kind="area", ax=plt.gca(), stacked=True, color=self.get_palette(), **kwargs)
def _save_series(self, series):
data = [
[
d.strftime(self.cache_date_format), t
if pd.notnull(t) else None
]
for d, t in series.iteritems()
]
self.json_store.save_json(self._get_cache_key(), data)
def save_series(self, year, series):
key = self._get_cache_key(year)
data = [
[
d.strftime(self.cache_date_format), t
if pd.notnull(t) else None
]
for d, t in series.iteritems()
]
self.json_store.save_json(key, data)