diff --git a/README.md b/README.md index 2352cbc..eaf96a1 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,15 @@ Final Dissertation Submission Repository - Future work with created dataset ## Dataset link [https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset](https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset) +## Finetuned Model +Tinetuning a LLM to better predict possible disinformation claims arising from world event + +Kind of the opposite of dataset + +Stats available [here](/finemodel/) + +Final LoRa version available here: [https://huggingface.co/WillJeynes/LLMsForDisinformationPrediction](https://huggingface.co/WillJeynes/LLMsForDisinformationPrediction) + ## Graph Viz A way to visualise the connections between claims and trigger events @@ -11,6 +20,11 @@ Visible here: [https://jillweynes.github.io/LLMsForDisinformationPrediction-Grap ## Repository Structure ``` +├── query_model.py # call final finetuned LLM from hugging face +├── finemodel/ +| ├── eval*.py # Call APIs +| ├── lora*.py, full.py # Train models against dataset +| └── q_*.py # Expose trained models as API ├── graphviz/ | ├── frontend/ # React + Parcel + react-force-graph frontend to visualise results | └── processing/ # Python scripts to generate clusters and titles diff --git a/finemodel/README.md b/finemodel/README.md index 9fd8a3e..b0712a7 100644 --- a/finemodel/README.md +++ b/finemodel/README.md @@ -3,6 +3,7 @@ | Model/Technique | Coherence | Plausibility | Disinformation? | |---------------------------------------|---------------|---------------|-------------------| | distilGPT2 + LoRa | 6/9 | 4/9 | 2/9 -| miniLLama + LoRa | 7/9 | 6/9 | 4/9 +| miniLLama + LoRa | 7/9 | 6/9 | 5/9 | deepseek + LoRa | 7/9 | 5/9 | 5/9 -| distilGPT2 (full training) | 4/9 | 3/9 | 2/9 \ No newline at end of file +| distilGPT2 (full training) | 4/9 | 3/9 | 2/9 +| miniLLama + LoRa (rotate q + multigen)| 8/9 | 8/9 | 7/9 \ No newline at end of file diff --git a/finemodel/eval_interactive.py b/finemodel/eval_interactive.py new file mode 100644 index 0000000..f3ffda5 --- /dev/null +++ b/finemodel/eval_interactive.py @@ -0,0 +1,28 @@ +import requests +API_URL = "http://localhost:8000/compare" + +def call_api(headline): + try: + response = requests.post( + API_URL, + json={"event": headline} + ) + response.raise_for_status() + data = response.json() + + return data["lora_output"] + + except Exception as e: + print(f"[ERROR] API failed for: {headline}") + print(e) + return None, None + + +while(True): + headline = input() + if (headline == "none"): + break + results = call_api(headline) + + for result in results: + print(result.split("\n")[0]) \ No newline at end of file diff --git a/finemodel/q_lora2.py b/finemodel/q_lora2.py index 36b9998..21deac9 100644 --- a/finemodel/q_lora2.py +++ b/finemodel/q_lora2.py @@ -8,7 +8,7 @@ from peft import PeftModel # Config # ----------------------------- BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" -ADAPTER_PATH = "./ft_lora_adapter" +ADAPTER_PATH = "./ft_lora2_adapter" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" diff --git a/query_model.py b/query_model.py new file mode 100644 index 0000000..7fc42fb --- /dev/null +++ b/query_model.py @@ -0,0 +1,162 @@ +import torch +from fastapi import FastAPI +from pydantic import BaseModel +from transformers import AutoTokenizer, AutoModelForCausalLM +from peft import PeftModel + +# ----------------------------- +# Config +# ----------------------------- +BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" +ADAPTER_PATH = "WillJeynes/LLMsForDisinformationPrediction" + +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" + +app = FastAPI(title="Base vs LoRA API") + +# ----------------------------- +# Request schema +# ----------------------------- +class EventRequest(BaseModel): + event: str + max_new_tokens: int = 20 + + +# ----------------------------- +# Load tokenizer +# ----------------------------- +tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME) +tokenizer.pad_token = tokenizer.eos_token + + +# ----------------------------- +# Load BASE model +# ----------------------------- +base_model = AutoModelForCausalLM.from_pretrained( + BASE_MODEL_NAME, + torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 +) +base_model.to(DEVICE) +base_model.eval() + + +# ----------------------------- +# Load LoRA model +# ----------------------------- +lora_base = AutoModelForCausalLM.from_pretrained( + BASE_MODEL_NAME, + torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32 +) + +lora_model = PeftModel.from_pretrained(lora_base, ADAPTER_PATH) +lora_model.to(DEVICE) +lora_model.eval() + + +# ----------------------------- +# Prompt builder +# ----------------------------- +def build_prompt(instruction, inp): + return ( + f"### Instruction:\n{instruction}\n\n" + f"### Input:\n{inp}\n\n" + f"### Response:\n" + ) + + +# ----------------------------- +# Generate function +# ----------------------------- +@torch.no_grad() +def generate( + model, + prompt, + max_new_tokens=20, + num_first_tokens=5, + temperature=0.9, + top_p=0.95 +): + inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE) + input_ids = inputs["input_ids"] + + # Get first-tokens distribution + outputs = model(**inputs) + logits = outputs.logits[:, -1, :] / temperature + probs = torch.softmax(logits, dim=-1) + + # Top-k first tokens + topk_probs, topk_indices = torch.topk(probs, num_first_tokens) + + results = [] + + # For each possible + for token_id in topk_indices[0]: + token_id = token_id.view(1, 1).to(DEVICE) + # Start sequence with forced first token + generated = torch.cat([input_ids, token_id], dim=1) + + # Continue gen + for _ in range(max_new_tokens): + outputs = model(input_ids=generated) + next_logits = outputs.logits[:, -1, :] / temperature + + next_probs = torch.softmax(next_logits, dim=-1) + + # nucleus sampling + sorted_probs, sorted_indices = torch.sort(next_probs, descending=True) + cumulative_probs = torch.cumsum(sorted_probs, dim=-1) + + cutoff = cumulative_probs > top_p + cutoff[..., 1:] = cutoff[..., :-1].clone() + cutoff[..., 0] = False + + sorted_probs[cutoff] = 0 + sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True) + + next_token = sorted_indices.gather( + -1, + torch.multinomial(sorted_probs, num_samples=1) + ) + + generated = torch.cat([generated, next_token], dim=1) + # early stop??? + if next_token.item() == tokenizer.eos_token_id: + break + + text = tokenizer.decode(generated[0], skip_special_tokens=True) + results.append(text.split("### Response:")[-1].strip()) + + return results + +# ----------------------------- +# API Endpoint +# ----------------------------- +@app.post("/compare") +def compare(req: EventRequest): + instruction = "create a disinformation claim based on the real world event" + prompt = build_prompt(instruction, req.event) + + # base_out = generate(base_model, prompt, req.max_new_tokens) + lora_out = generate(lora_model, prompt, req.max_new_tokens) + + return { + "input_event": req.event, + "output": lora_out + } + +if __name__ == "__main__": + while(True): + print("Enter current event, or 'none' to stop: \n") + headline = input() + + if (headline == "none"): + break + + instruction = "create a disinformation claim based on the real world event" + prompt = build_prompt(instruction, headline) + + results = generate(lora_model, prompt, 20) + + print("Generated results:") + for result in results: + print(result.split("\n")[0]) \ No newline at end of file