def testFromCSVWithFeatureSpec(self):
if not HAS_PANDAS:
return
num_batches = 100
batch_size = 8
data_path = _make_test_csv_sparse()
feature_spec = {
"int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
"float": tf.VarLenFeature(dtypes.float16),
"bool": tf.VarLenFeature(dtypes.bool),
"string": tf.FixedLenFeature(None, dtypes.string, "")
}
pandas_df = pd.read_csv(data_path, dtype={"string": object})
# Pandas insanely uses NaN for empty cells in a string column.
# And, we can't use Pandas replace() to fix them because nan != nan
s = pandas_df["string"]
for i in range(0, len(s)):
if isinstance(s[i], float) and math.isnan(s[i]):
pandas_df.set_value(i, "string", "")
tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
[data_path],
batch_size=batch_size,
shuffle=False,
feature_spec=feature_spec)
# These columns were sparse; re-densify them for comparison
tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])
self._assert_pandas_equals_tensorflow(pandas_df,
tensorflow_df,
num_batches=num_batches,
batch_size=batch_size)
评论列表
文章目录