Finalise graph visualisation

2026-04-09 15:07:18 +01:00
parent 2d5255e148
commit c613444c43
3 changed files with 50 additions and 26 deletions
@@ -2,3 +2,4 @@
 dist/
 node_modules/
 src/data.json
+src/titles.json
@@ -3,6 +3,7 @@ import ForceGraph2D from "react-force-graph-2d";
 import * as d3 from "d3-force-3d";

 import data from "./data.json";
+import titlesData from "./titles.json";

 function drawRoundedRect(ctx, x, y, width, height, radius) {
  const r = Math.min(radius, width / 2, height / 2);
@@ -24,10 +25,13 @@ function buildGraph(data) {
  const nodes = [];
  const links = [];

+  // Create a lookup map for quick access
+  const titleMap = new Map(titlesData.map(t => [t.cluster_id, t.title]));
+
  data.claim_clusters.forEach((cluster) => {
    nodes.push({
      id: cluster.cluster_id,
-      label: cluster.title || "Unnamed Claim Cluster",
+      label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Claim Cluster",
      type: "claim_cluster",
      members: cluster.members
    });
@@ -36,7 +40,7 @@ function buildGraph(data) {
  data.event_clusters.forEach((cluster) => {
    nodes.push({
      id: cluster.cluster_id,
-      label: cluster.title || "Unnamed Event Cluster",
+      label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Event Cluster",
      type: "event_cluster",
      members: cluster.members
    });
@@ -124,13 +128,15 @@ export function App() {
    // Stronger repulsion
    fgRef.current.d3Force(
      "charge",
-      d3.forceManyBody().strength(-3000)
+      d3.forceManyBody().strength(-10000)
    );

+    
+
    // Link distance
    fgRef.current.d3Force(
      "link",
-      d3.forceLink().distance(240)
+      d3.forceLink().distance(140)
    );

    // Collision based on dynamic box size
@@ -138,7 +144,7 @@ export function App() {
      "collision",
      d3.forceCollide((node) => {
        const dims = node.__bckgDimensions;
-        return dims ? Math.max(dims[0], dims[1]) / 2 + 16 : 20;
+        return dims ? Math.max(dims[0], dims[1]) / 2 + 32 : 40;
      })
    );

@@ -224,7 +230,7 @@ export function App() {
          <br />
          <input
            type="range"
-            min="8"
+            min="9"
            max="49"
            value={minGraphSize}
            onChange={(e) => setMinGraphSize(Number(e.target.value))}
@@ -4,9 +4,14 @@ from openai import OpenAI
 from tqdm import tqdm 
 from dotenv import load_dotenv
 import os
+from concurrent.futures import ThreadPoolExecutor, as_completed

+# -------------------------------
+# Load environment and OpenAI client
+# -------------------------------
 load_dotenv()  # Load environment variables from .env file
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 # -------------------------------
 # CONFIG
 # -------------------------------
@@ -14,7 +19,6 @@ INPUT_FILE = "../../data/clustered_output.json"        # Your original JSON
 OUTPUT_FILE = "../../data/clustered_output2.json"     # Output JSON file
 OPENAI_MODEL = "gpt-5-nano"

-
 # -------------------------------
 # Load data
 # -------------------------------
@@ -62,8 +66,8 @@ for node in graph:
                    queue.append(neighbor)
        components.append(component)

-# Filter components with size > 8
-large_components = [c for c in components if len(c) > 8 and len(c) < 50]
+# Filter components with size > 8 and < 50
+large_components = [c for c in components if 8 < len(c) < 50]

 print("Connected components (size > 8):", len(large_components))
 print("Total clusters in those components:", sum(len(c) for c in large_components))
@@ -94,13 +98,14 @@ def generate_title(texts):
        "\n\nTitle:"
    )
    try:
-        response = client.chat.completions.create(model=OPENAI_MODEL,
+        response = client.chat.completions.create(
+            model=OPENAI_MODEL,
            messages=[
                {"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
                {"role": "user", "content": prompt}
-        ])
+            ]
+        )
        title = response.choices[0].message.content.strip()
-        
        if title.lower().startswith("title:"):
            title = title[6:].strip()
        return title
@@ -109,19 +114,31 @@ def generate_title(texts):
        return "Untitled Cluster"

 # -------------------------------
-# Generate title per cluster with progress bar
+# Wrapper for parallel execution
+# -------------------------------
+def generate_title_for_cluster(cluster_id):
+    texts = extract_texts_for_cluster(cluster_id)
+    title = generate_title(texts)
+    return {"cluster_id": cluster_id, "title": title}
+
+# -------------------------------
+# Generate titles in parallel
 # -------------------------------
 clusters_in_large_components = [cid for comp in large_components for cid in comp]
 output = []

-print("\nGenerating GPT titles for clusters...")
-for cluster_id in tqdm(clusters_in_large_components, desc="Clusters", ncols=100):
-    texts = extract_texts_for_cluster(cluster_id)
-    title = generate_title(texts)
-    output.append({
-        "cluster_id": cluster_id,
-        "title": title
-    })
+print("\nGenerating GPT titles for clusters (parallel)...")
+
+with ThreadPoolExecutor(max_workers=10) as executor:
+    future_to_cluster = {executor.submit(generate_title_for_cluster, cid): cid for cid in clusters_in_large_components}
+    for future in tqdm(as_completed(future_to_cluster), total=len(clusters_in_large_components), desc="Clusters", ncols=100):
+        try:
+            result = future.result()
+            output.append(result)
+        except Exception as e:
+            cid = future_to_cluster[future]
+            print(f"Error processing cluster {cid}: {e}")
+            output.append({"cluster_id": cid, "title": "Untitled Cluster"})

 # -------------------------------
 # Save JSON