from typing import List from pycorenlp import StanfordCoreNLP from code.helper import FileReader, FileWriter def get_tokens(sentences: List) -> List[List]: ''' This method get the results of a sentence. :param sentences: List containing the sentences :return: List of token-lists ''' nlp = StanfordCoreNLP('http://localhost:9000') properties = { 'annotators': 'tokenize', 'outputFormat': 'json' } tokens = [] for sentence in sentences: result = nlp.annotate(text=sentence, properties=properties) current_tokens = [] for word in result.get('tokens'): current_tokens = current_tokens + [word.get('word')] tokens = tokens + [current_tokens] return tokens if __name__ == '__main__': file_sentences = FileReader.get_file_as_list('./corpus/projekt1_corpus.txt') file_tokens = get_tokens(file_sentences) FileWriter.write(file_tokens, './results/corenlp_tokens.txt')