def append_features(df, *cols):
"""Append features from columns to the features vector.
Parameters
----------
df : pyspark.sql.DataFrame
cols : list of str
Returns
-------
pyspark.sql.DataFrame
"""
def add_features(feat, *other):
raw = feat.toArray()
return Vectors.dense(np.append(raw, map(float, other)))
add_features_udf = F.udf(add_features, VectorUDT())
new_feat_list = df.schema['features'].metadata['features'] + cols
return df.withColumn('features', mjolnir.spark.add_meta(
df._sc, add_features_udf('features', *cols), {'features': new_feat_list}))
评论列表
文章目录