from pathlib import Path | |
from hindi_tokenizer import load_tokenizer, encode_text, decode_text | |
def main(): | |
# Load the trained tokenizer | |
output_dir = Path("output") | |
config_path = output_dir / "hindi_encoder.json" | |
if not config_path.exists(): | |
print("Error: Tokenizer configuration not found! Please train the tokenizer first.") | |
return | |
tokenizer = load_tokenizer(str(config_path)) | |
# Interactive loop | |
print("Hindi Text Encoder/Decoder (type 'quit' to exit)") | |
print("-" * 50) | |
while True: | |
text = input("\nEnter Hindi text to encode/decode: ") | |
if text.lower() == 'quit': | |
break | |
if not text.strip(): | |
continue | |
# Encode the text | |
token_ids, tokens = encode_text(tokenizer, text) | |
print("\nEncoding:") | |
print(f"Tokens: {tokens}") | |
print(f"Token IDs: {token_ids}") | |
# Decode back | |
decoded_text = decode_text(tokenizer, token_ids) | |
print("\nDecoding:") | |
print(f"Text: {decoded_text}") | |
if __name__ == "__main__": | |
main() |