Finalise graph visualisation

This commit is contained in:
William Jeynes
2026-04-09 15:07:18 +01:00
parent 2d5255e148
commit c613444c43
3 changed files with 50 additions and 26 deletions
+1
View File
@@ -2,3 +2,4 @@
dist/
node_modules/
src/data.json
src/titles.json
+12 -6
View File
@@ -3,6 +3,7 @@ import ForceGraph2D from "react-force-graph-2d";
import * as d3 from "d3-force-3d";
import data from "./data.json";
import titlesData from "./titles.json";
function drawRoundedRect(ctx, x, y, width, height, radius) {
const r = Math.min(radius, width / 2, height / 2);
@@ -24,10 +25,13 @@ function buildGraph(data) {
const nodes = [];
const links = [];
// Create a lookup map for quick access
const titleMap = new Map(titlesData.map(t => [t.cluster_id, t.title]));
data.claim_clusters.forEach((cluster) => {
nodes.push({
id: cluster.cluster_id,
label: cluster.title || "Unnamed Claim Cluster",
label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Claim Cluster",
type: "claim_cluster",
members: cluster.members
});
@@ -36,7 +40,7 @@ function buildGraph(data) {
data.event_clusters.forEach((cluster) => {
nodes.push({
id: cluster.cluster_id,
label: cluster.title || "Unnamed Event Cluster",
label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Event Cluster",
type: "event_cluster",
members: cluster.members
});
@@ -124,13 +128,15 @@ export function App() {
// Stronger repulsion
fgRef.current.d3Force(
"charge",
d3.forceManyBody().strength(-3000)
d3.forceManyBody().strength(-10000)
);
// Link distance
fgRef.current.d3Force(
"link",
d3.forceLink().distance(240)
d3.forceLink().distance(140)
);
// Collision based on dynamic box size
@@ -138,7 +144,7 @@ export function App() {
"collision",
d3.forceCollide((node) => {
const dims = node.__bckgDimensions;
return dims ? Math.max(dims[0], dims[1]) / 2 + 16 : 20;
return dims ? Math.max(dims[0], dims[1]) / 2 + 32 : 40;
})
);
@@ -224,7 +230,7 @@ export function App() {
<br />
<input
type="range"
min="8"
min="9"
max="49"
value={minGraphSize}
onChange={(e) => setMinGraphSize(Number(e.target.value))}
+36 -19
View File
@@ -4,17 +4,21 @@ from openai import OpenAI
from tqdm import tqdm
from dotenv import load_dotenv
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
# -------------------------------
# Load environment and OpenAI client
# -------------------------------
load_dotenv() # Load environment variables from .env file
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# -------------------------------
# CONFIG
# -------------------------------
INPUT_FILE = "../../data/clustered_output.json" # Your original JSON
OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
OPENAI_MODEL = "gpt-5-nano"
# -------------------------------
# Load data
# -------------------------------
@@ -62,8 +66,8 @@ for node in graph:
queue.append(neighbor)
components.append(component)
# Filter components with size > 8
large_components = [c for c in components if len(c) > 8 and len(c) < 50]
# Filter components with size > 8 and < 50
large_components = [c for c in components if 8 < len(c) < 50]
print("Connected components (size > 8):", len(large_components))
print("Total clusters in those components:", sum(len(c) for c in large_components))
@@ -94,13 +98,14 @@ def generate_title(texts):
"\n\nTitle:"
)
try:
response = client.chat.completions.create(model=OPENAI_MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
{"role": "user", "content": prompt}
])
response = client.chat.completions.create(
model=OPENAI_MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
{"role": "user", "content": prompt}
]
)
title = response.choices[0].message.content.strip()
if title.lower().startswith("title:"):
title = title[6:].strip()
return title
@@ -109,19 +114,31 @@ def generate_title(texts):
return "Untitled Cluster"
# -------------------------------
# Generate title per cluster with progress bar
# Wrapper for parallel execution
# -------------------------------
def generate_title_for_cluster(cluster_id):
texts = extract_texts_for_cluster(cluster_id)
title = generate_title(texts)
return {"cluster_id": cluster_id, "title": title}
# -------------------------------
# Generate titles in parallel
# -------------------------------
clusters_in_large_components = [cid for comp in large_components for cid in comp]
output = []
print("\nGenerating GPT titles for clusters...")
for cluster_id in tqdm(clusters_in_large_components, desc="Clusters", ncols=100):
texts = extract_texts_for_cluster(cluster_id)
title = generate_title(texts)
output.append({
"cluster_id": cluster_id,
"title": title
})
print("\nGenerating GPT titles for clusters (parallel)...")
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_cluster = {executor.submit(generate_title_for_cluster, cid): cid for cid in clusters_in_large_components}
for future in tqdm(as_completed(future_to_cluster), total=len(clusters_in_large_components), desc="Clusters", ncols=100):
try:
result = future.result()
output.append(result)
except Exception as e:
cid = future_to_cluster[future]
print(f"Error processing cluster {cid}: {e}")
output.append({"cluster_id": cid, "title": "Untitled Cluster"})
# -------------------------------
# Save JSON