import string import nltk nltk.download('punkt') from nltk.tokenize import word_tokenize def preprocess_text(texts): """ Preprocesses a list of texts by converting to lowercase, removing punctuation, and tokenizing. Args: texts (list): List of text strings to preprocess. Returns: list: List of preprocessed and tokenized texts. """ processed_texts = [] for text in texts: text = text.lower() text = text.translate(str.maketrans('', '', string.punctuation)) tokens = word_tokenize(text) processed_texts.append(tokens) return processed_texts