def get_words(sents = []): from nltk.tokenize import wordpunct_tokenize words = [] for sent in sents: words.append(wordpunct_tokenize(sent)) return words # file_name = sys.argv[1]