def conll2modeldata(data):
"""
Converts the document into a dictionary, with the required format for the model.
Args:
data: dict with conll string
Returns: dict like:
{
"clusters": [[[1024,1024],[1024,1025]],[[876,876], [767,765], [541,544]]],
"doc_key": "nw",
"sentences": [["This", "is", "the", "first", "sentence", "."], ["This", "is", "the", "second", "."]],
"speakers": [["spk1", "spk1", "spk1", "spk1", "spk1", "spk1"], ["spk2", "spk2", "spk2", "spk2", "spk2"]]
}
"""
conll_str = data['conll_str']
document_state = DocumentState()
line_list = conll_str.split('\n')
for line in line_list:
document = handle_line(line, document_state)
if document is not None:
model_file = document
return model_file
评论列表
文章目录