Finalise graph visualisation

This commit is contained in:
William Jeynes
2026-04-09 15:07:18 +01:00
parent 2d5255e148
commit c613444c43
3 changed files with 50 additions and 26 deletions
+1
View File
@@ -2,3 +2,4 @@
dist/ dist/
node_modules/ node_modules/
src/data.json src/data.json
src/titles.json
+12 -6
View File
@@ -3,6 +3,7 @@ import ForceGraph2D from "react-force-graph-2d";
import * as d3 from "d3-force-3d"; import * as d3 from "d3-force-3d";
import data from "./data.json"; import data from "./data.json";
import titlesData from "./titles.json";
function drawRoundedRect(ctx, x, y, width, height, radius) { function drawRoundedRect(ctx, x, y, width, height, radius) {
const r = Math.min(radius, width / 2, height / 2); const r = Math.min(radius, width / 2, height / 2);
@@ -24,10 +25,13 @@ function buildGraph(data) {
const nodes = []; const nodes = [];
const links = []; const links = [];
// Create a lookup map for quick access
const titleMap = new Map(titlesData.map(t => [t.cluster_id, t.title]));
data.claim_clusters.forEach((cluster) => { data.claim_clusters.forEach((cluster) => {
nodes.push({ nodes.push({
id: cluster.cluster_id, id: cluster.cluster_id,
label: cluster.title || "Unnamed Claim Cluster", label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Claim Cluster",
type: "claim_cluster", type: "claim_cluster",
members: cluster.members members: cluster.members
}); });
@@ -36,7 +40,7 @@ function buildGraph(data) {
data.event_clusters.forEach((cluster) => { data.event_clusters.forEach((cluster) => {
nodes.push({ nodes.push({
id: cluster.cluster_id, id: cluster.cluster_id,
label: cluster.title || "Unnamed Event Cluster", label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Event Cluster",
type: "event_cluster", type: "event_cluster",
members: cluster.members members: cluster.members
}); });
@@ -124,13 +128,15 @@ export function App() {
// Stronger repulsion // Stronger repulsion
fgRef.current.d3Force( fgRef.current.d3Force(
"charge", "charge",
d3.forceManyBody().strength(-3000) d3.forceManyBody().strength(-10000)
); );
// Link distance // Link distance
fgRef.current.d3Force( fgRef.current.d3Force(
"link", "link",
d3.forceLink().distance(240) d3.forceLink().distance(140)
); );
// Collision based on dynamic box size // Collision based on dynamic box size
@@ -138,7 +144,7 @@ export function App() {
"collision", "collision",
d3.forceCollide((node) => { d3.forceCollide((node) => {
const dims = node.__bckgDimensions; const dims = node.__bckgDimensions;
return dims ? Math.max(dims[0], dims[1]) / 2 + 16 : 20; return dims ? Math.max(dims[0], dims[1]) / 2 + 32 : 40;
}) })
); );
@@ -224,7 +230,7 @@ export function App() {
<br /> <br />
<input <input
type="range" type="range"
min="8" min="9"
max="49" max="49"
value={minGraphSize} value={minGraphSize}
onChange={(e) => setMinGraphSize(Number(e.target.value))} onChange={(e) => setMinGraphSize(Number(e.target.value))}
+36 -19
View File
@@ -4,17 +4,21 @@ from openai import OpenAI
from tqdm import tqdm from tqdm import tqdm
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from concurrent.futures import ThreadPoolExecutor, as_completed
# -------------------------------
# Load environment and OpenAI client
# -------------------------------
load_dotenv() # Load environment variables from .env file load_dotenv() # Load environment variables from .env file
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# ------------------------------- # -------------------------------
# CONFIG # CONFIG
# ------------------------------- # -------------------------------
INPUT_FILE = "../../data/clustered_output.json" # Your original JSON INPUT_FILE = "../../data/clustered_output.json" # Your original JSON
OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
OPENAI_MODEL = "gpt-5-nano" OPENAI_MODEL = "gpt-5-nano"
# ------------------------------- # -------------------------------
# Load data # Load data
# ------------------------------- # -------------------------------
@@ -62,8 +66,8 @@ for node in graph:
queue.append(neighbor) queue.append(neighbor)
components.append(component) components.append(component)
# Filter components with size > 8 # Filter components with size > 8 and < 50
large_components = [c for c in components if len(c) > 8 and len(c) < 50] large_components = [c for c in components if 8 < len(c) < 50]
print("Connected components (size > 8):", len(large_components)) print("Connected components (size > 8):", len(large_components))
print("Total clusters in those components:", sum(len(c) for c in large_components)) print("Total clusters in those components:", sum(len(c) for c in large_components))
@@ -94,13 +98,14 @@ def generate_title(texts):
"\n\nTitle:" "\n\nTitle:"
) )
try: try:
response = client.chat.completions.create(model=OPENAI_MODEL, response = client.chat.completions.create(
messages=[ model=OPENAI_MODEL,
{"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."}, messages=[
{"role": "user", "content": prompt} {"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
]) {"role": "user", "content": prompt}
]
)
title = response.choices[0].message.content.strip() title = response.choices[0].message.content.strip()
if title.lower().startswith("title:"): if title.lower().startswith("title:"):
title = title[6:].strip() title = title[6:].strip()
return title return title
@@ -109,19 +114,31 @@ def generate_title(texts):
return "Untitled Cluster" return "Untitled Cluster"
# ------------------------------- # -------------------------------
# Generate title per cluster with progress bar # Wrapper for parallel execution
# -------------------------------
def generate_title_for_cluster(cluster_id):
texts = extract_texts_for_cluster(cluster_id)
title = generate_title(texts)
return {"cluster_id": cluster_id, "title": title}
# -------------------------------
# Generate titles in parallel
# ------------------------------- # -------------------------------
clusters_in_large_components = [cid for comp in large_components for cid in comp] clusters_in_large_components = [cid for comp in large_components for cid in comp]
output = [] output = []
print("\nGenerating GPT titles for clusters...") print("\nGenerating GPT titles for clusters (parallel)...")
for cluster_id in tqdm(clusters_in_large_components, desc="Clusters", ncols=100):
texts = extract_texts_for_cluster(cluster_id) with ThreadPoolExecutor(max_workers=10) as executor:
title = generate_title(texts) future_to_cluster = {executor.submit(generate_title_for_cluster, cid): cid for cid in clusters_in_large_components}
output.append({ for future in tqdm(as_completed(future_to_cluster), total=len(clusters_in_large_components), desc="Clusters", ncols=100):
"cluster_id": cluster_id, try:
"title": title result = future.result()
}) output.append(result)
except Exception as e:
cid = future_to_cluster[future]
print(f"Error processing cluster {cid}: {e}")
output.append({"cluster_id": cid, "title": "Untitled Cluster"})
# ------------------------------- # -------------------------------
# Save JSON # Save JSON