def _prepare_data(self):
"""
Subset the dataframe to the columns needed for estimation purposes, and add a constant.
:return: pd.DataFrame
"""
# Subset the data to the columns used in the model
data = self.data[self.varlist].copy()
data = data[pd.notnull(data)].reset_index(drop=True)
# Mapping each variable name to a unique variable code, and renaming the columns in the data.)
data.rename(columns=self._var_to_symb, inplace=True)
# Adding a constant to the data.
data["Cons"] = 1
if self.options["logit"]:
endog = data["y"]
uniques = np.unique(endog)
if len(uniques) != 2:
raise ValueError(
"The dependent variable does not have exactly two distinct outcomes."
"Please provide another dataset or change the 'logit' option to 0")
else:
endog_logit = [0 if i == uniques[0] else 1 for i in endog]
data["y"] = endog_logit
return data
评论列表
文章目录