diff --git a/README.md b/README.md
index 2352cbc..eaf96a1 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,15 @@ Final Dissertation Submission Repository - Future work with created dataset
 ## Dataset link
 [https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset](https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset)
 
+## Finetuned Model
+Tinetuning a LLM to better predict possible disinformation claims arising from world event
+
+Kind of the opposite of dataset
+
+Stats available [here](/finemodel/)
+
+Final LoRa version available here: [https://huggingface.co/WillJeynes/LLMsForDisinformationPrediction](https://huggingface.co/WillJeynes/LLMsForDisinformationPrediction)
+
 ## Graph Viz
 A way to visualise the connections between claims and trigger events
 
@@ -11,6 +20,11 @@ Visible here: [https://jillweynes.github.io/LLMsForDisinformationPrediction-Grap
 
 ## Repository Structure
 ```
+├── query_model.py                  # call final finetuned LLM from hugging face
+├── finemodel/
+|   ├── eval*.py                    # Call APIs
+|   ├── lora*.py, full.py           # Train models against dataset
+|   └── q_*.py                      # Expose trained models as API
 ├── graphviz/
 |   ├── frontend/                   # React + Parcel + react-force-graph frontend to visualise results
 |   └── processing/                 # Python scripts to generate clusters and titles
diff --git a/finemodel/README.md b/finemodel/README.md
index 9fd8a3e..b0712a7 100644
--- a/finemodel/README.md
+++ b/finemodel/README.md
@@ -3,6 +3,7 @@
 | Model/Technique                       | Coherence     | Plausibility  | Disinformation?   |
 |---------------------------------------|---------------|---------------|-------------------|
 | distilGPT2 + LoRa                     | 6/9           | 4/9           | 2/9
-| miniLLama + LoRa                      | 7/9           | 6/9           | 4/9
+| miniLLama + LoRa                      | 7/9           | 6/9           | 5/9
 | deepseek + LoRa                       | 7/9           | 5/9           | 5/9
-| distilGPT2 (full training)            | 4/9           | 3/9           | 2/9
\ No newline at end of file
+| distilGPT2 (full training)            | 4/9           | 3/9           | 2/9
+| miniLLama + LoRa (rotate q + multigen)| 8/9           | 8/9           | 7/9
\ No newline at end of file
diff --git a/finemodel/eval_interactive.py b/finemodel/eval_interactive.py
new file mode 100644
index 0000000..f3ffda5
--- /dev/null
+++ b/finemodel/eval_interactive.py
@@ -0,0 +1,28 @@
+import requests
+API_URL = "http://localhost:8000/compare"
+
+def call_api(headline):
+    try:
+        response = requests.post(
+            API_URL,
+            json={"event": headline}
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        return data["lora_output"]
+
+    except Exception as e:
+        print(f"[ERROR] API failed for: {headline}")
+        print(e)
+        return None, None
+
+
+while(True):
+    headline = input()
+    if (headline == "none"):
+        break
+    results = call_api(headline)
+
+    for result in results:
+        print(result.split("\n")[0])
\ No newline at end of file
diff --git a/finemodel/q_lora2.py b/finemodel/q_lora2.py
index 36b9998..21deac9 100644
--- a/finemodel/q_lora2.py
+++ b/finemodel/q_lora2.py
@@ -8,7 +8,7 @@ from peft import PeftModel
 # Config
 # -----------------------------
 BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-ADAPTER_PATH = "./ft_lora_adapter"
+ADAPTER_PATH = "./ft_lora2_adapter"
 
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
diff --git a/query_model.py b/query_model.py
new file mode 100644
index 0000000..7fc42fb
--- /dev/null
+++ b/query_model.py
@@ -0,0 +1,162 @@
+import torch
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+
+# -----------------------------
+# Config
+# -----------------------------
+BASE_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+ADAPTER_PATH = "WillJeynes/LLMsForDisinformationPrediction"
+
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+app = FastAPI(title="Base vs LoRA API")
+
+# -----------------------------
+# Request schema
+# -----------------------------
+class EventRequest(BaseModel):
+    event: str
+    max_new_tokens: int = 20
+
+
+# -----------------------------
+# Load tokenizer
+# -----------------------------
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
+tokenizer.pad_token = tokenizer.eos_token
+
+
+# -----------------------------
+# Load BASE model
+# -----------------------------
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL_NAME,
+    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+)
+base_model.to(DEVICE)
+base_model.eval()
+
+
+# -----------------------------
+# Load LoRA model
+# -----------------------------
+lora_base = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL_NAME,
+    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+)
+
+lora_model = PeftModel.from_pretrained(lora_base, ADAPTER_PATH)
+lora_model.to(DEVICE)
+lora_model.eval()
+
+
+# -----------------------------
+# Prompt builder
+# -----------------------------
+def build_prompt(instruction, inp):
+    return (
+        f"### Instruction:\n{instruction}\n\n"
+        f"### Input:\n{inp}\n\n"
+        f"### Response:\n"
+    )
+
+
+# -----------------------------
+# Generate function
+# -----------------------------
+@torch.no_grad()
+def generate(
+    model,
+    prompt,
+    max_new_tokens=20,
+    num_first_tokens=5,
+    temperature=0.9,
+    top_p=0.95
+):
+    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
+    input_ids = inputs["input_ids"]
+
+    # Get first-tokens distribution
+    outputs = model(**inputs)
+    logits = outputs.logits[:, -1, :] / temperature
+    probs = torch.softmax(logits, dim=-1)
+
+    # Top-k first tokens
+    topk_probs, topk_indices = torch.topk(probs, num_first_tokens)
+
+    results = []
+
+    # For each possible
+    for token_id in topk_indices[0]:
+        token_id = token_id.view(1, 1).to(DEVICE)
+        # Start sequence with forced first token
+        generated = torch.cat([input_ids, token_id], dim=1)
+
+        # Continue gen
+        for _ in range(max_new_tokens):
+            outputs = model(input_ids=generated)
+            next_logits = outputs.logits[:, -1, :] / temperature
+
+            next_probs = torch.softmax(next_logits, dim=-1)
+
+            # nucleus sampling
+            sorted_probs, sorted_indices = torch.sort(next_probs, descending=True)
+            cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
+
+            cutoff = cumulative_probs > top_p
+            cutoff[..., 1:] = cutoff[..., :-1].clone()
+            cutoff[..., 0] = False
+
+            sorted_probs[cutoff] = 0
+            sorted_probs = sorted_probs / sorted_probs.sum(dim=-1, keepdim=True)
+
+            next_token = sorted_indices.gather(
+                -1,
+                torch.multinomial(sorted_probs, num_samples=1)
+            )
+
+            generated = torch.cat([generated, next_token], dim=1)
+            # early stop???
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+
+        text = tokenizer.decode(generated[0], skip_special_tokens=True)
+        results.append(text.split("### Response:")[-1].strip())
+
+    return results
+
+# -----------------------------
+# API Endpoint
+# -----------------------------
+@app.post("/compare")
+def compare(req: EventRequest):
+    instruction = "create a disinformation claim based on the real world event"
+    prompt = build_prompt(instruction, req.event)
+
+    # base_out = generate(base_model, prompt, req.max_new_tokens)
+    lora_out = generate(lora_model, prompt, req.max_new_tokens)
+
+    return {
+        "input_event": req.event,
+        "output": lora_out
+    }
+
+if __name__ == "__main__":
+    while(True):
+        print("Enter current event, or 'none' to stop: \n")
+        headline = input()
+        
+        if (headline == "none"):
+            break
+        
+        instruction = "create a disinformation claim based on the real world event"
+        prompt = build_prompt(instruction, headline)
+
+        results = generate(lora_model, prompt, 20)
+
+        print("Generated results:")
+        for result in results:
+            print(result.split("\n")[0])
\ No newline at end of file