def cluster_song_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
"""
??????????????
Args:
playlist_id: ??id
cluster_n:???
is_detailed: ???????????
Returns:
??????
"""
playlist_obj = playlist_detail(playlist_id)
song_list = []
vec_list = []
song_info_dict = {}
ap_cluster = AffinityPropagation()
data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
for item in playlist_obj['tracks']:
song = item['name'].lower()
song_info_dict[song] = {
'name': song,
'artist': item['artists'][0]['name'],
'id': item['id'],
'album_img_url': item['album']['picUrl'],
'site_url': 'http://music.163.com/#/song?id=%s' % item['id']
}
# print song
if song not in song_list:
song_list.append(song)
# print self.song2vec_model.vocab.get(song)
# print self.song2vec_model.syn0norm == None
if self.song2vec_model.vocab.get(song) and len(self.song2vec_model.syn0norm):
song_vec = self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index]
else:
data_process_logger.warn(
'The song %s of playlist-%s is not in dataset' % (song, playlist_obj['name']))
song_vec = [0 for i in range(self.song2vec_model.vector_size)]
vec_list.append(song_vec)
# song_list = list(song_list)
if len(vec_list) > 1:
cluster_result = ap_cluster.fit(vec_list, song_list)
cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
for i in range(len(cluster_result.labels_)):
label = cluster_result.labels_[i]
index = i
cluster_array[label].append(song_list[i])
return cluster_array, playlist_obj['name'], song_info_dict
else:
return [song_list], playlist_obj['name'], song_info_dict
评论列表
文章目录