Spaces:
Runtime error
Runtime error
import string | |
import nltk | |
nltk.download('punkt') | |
from nltk.tokenize import word_tokenize | |
def preprocess_text(texts): | |
""" | |
Preprocesses a list of texts by converting to lowercase, removing punctuation, and tokenizing. | |
Args: | |
texts (list): List of text strings to preprocess. | |
Returns: | |
list: List of preprocessed and tokenized texts. | |
""" | |
processed_texts = [] | |
for text in texts: | |
text = text.lower() | |
text = text.translate(str.maketrans('', '', string.punctuation)) | |
tokens = word_tokenize(text) | |
processed_texts.append(tokens) | |
return processed_texts | |