import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from peft import PeftModel import gradio as gr # ========================= # CONFIG # ========================= # Base NLLB model BASE_MODEL = "facebook/nllb-200-distilled-600M" # Your LoRA repo on HF Hub # πŸ‘‰ CHANGE THIS to your actual repo if different LORA_REPO = "flt7007/nllb-mizo-bible-lora" # e.g. "frankiethiak/nllb-mizo-bible-lora" # NLLB language codes SRC_LANG = "eng_Latn" # English TGT_LANG = "lus_Latn" # Mizo (Lushai / Mizo) # ========================= # LOAD TOKENIZER + MODEL # ========================= device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if device == "cuda" else torch.float32 print("Using device:", device) # πŸ”΄ IMPORTANT: # Load tokenizer from the LoRA repo, not the base model # This fixes the Ò€œ mojibake issue. tokenizer = AutoTokenizer.from_pretrained( LORA_REPO, src_lang=SRC_LANG ) # Load base NLLB model base_model = AutoModelForSeq2SeqLM.from_pretrained( BASE_MODEL, torch_dtype=dtype ) # Attach LoRA model = PeftModel.from_pretrained( base_model, LORA_REPO ) model.to(device) model.eval() # Try to set forced BOS for Mizo if available forced_bos_token_id = None if hasattr(tokenizer, "lang_code_to_id"): forced_bos_token_id = tokenizer.lang_code_to_id.get(TGT_LANG, None) print("forced_bos_token_id:", forced_bos_token_id) else: print("Tokenizer has no lang_code_to_id; continuing without forced BOS.") # ========================= # TRANSLATION FUNCTION # ========================= def translate_en_to_mizo(text, max_new_tokens, num_beams): text = text.strip() if not text: return "" inputs = tokenizer( text, return_tensors="pt" ).to(device) gen_kwargs = { "max_new_tokens": int(max_new_tokens), "num_beams": int(num_beams), } # Only pass forced_bos_token_id if we actually have it if forced_bos_token_id is not None: gen_kwargs["forced_bos_token_id"] = forced_bos_token_id with torch.no_grad(): outputs = model.generate(**inputs, **gen_kwargs) decoded = tokenizer.batch_decode( outputs, skip_special_tokens=True )[0] return decoded.strip() # ========================= # GRADIO UI # ========================= TITLE = "English β†’ Mizo (NLLB-200 + Bible+Dict LoRA)" DESC = """ Low-resource MT demo for **English β†’ Mizo** using: - Base model: `facebook/nllb-200-distilled-600M` - LoRA: fine-tuned on dictionary + Bible parallel data Model is more Bible/education style and still in-progress. """ with gr.Blocks() as demo: gr.Markdown(f"# {TITLE}") gr.Markdown(DESC) with gr.Row(): with gr.Column(): en_input = gr.Textbox( label="English input", lines=4, placeholder="Type an English sentence here…" ) max_new_tokens = gr.Slider( minimum=10, maximum=200, value=80, step=5, label="Max new tokens" ) num_beams = gr.Slider( minimum=1, maximum=8, value=4, step=1, label="Beam size" ) translate_btn = gr.Button("Translate β†’ Mizo") with gr.Column(): mz_output = gr.Textbox( label="Mizo output", lines=6 ) translate_btn.click( fn=translate_en_to_mizo, inputs=[en_input, max_new_tokens, num_beams], outputs=mz_output ) demo.queue() if __name__ == "__main__": demo.launch()