tensorflow_dataframe_test.py 文件源码-python代码片段

def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        pandas_df.set_value(i, "string", "")
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)