diff --git a/graphviz/frontend/.gitignore b/graphviz/frontend/.gitignore
index a3a8c2a..a5e11f8 100644
--- a/graphviz/frontend/.gitignore
+++ b/graphviz/frontend/.gitignore
@@ -1,4 +1,5 @@
.parcel-cache/
dist/
node_modules/
-src/data.json
\ No newline at end of file
+src/data.json
+src/titles.json
\ No newline at end of file
diff --git a/graphviz/frontend/src/App.tsx b/graphviz/frontend/src/App.tsx
index be6d512..4022174 100644
--- a/graphviz/frontend/src/App.tsx
+++ b/graphviz/frontend/src/App.tsx
@@ -3,6 +3,7 @@ import ForceGraph2D from "react-force-graph-2d";
import * as d3 from "d3-force-3d";
import data from "./data.json";
+import titlesData from "./titles.json";
function drawRoundedRect(ctx, x, y, width, height, radius) {
const r = Math.min(radius, width / 2, height / 2);
@@ -24,10 +25,13 @@ function buildGraph(data) {
const nodes = [];
const links = [];
+ // Create a lookup map for quick access
+ const titleMap = new Map(titlesData.map(t => [t.cluster_id, t.title]));
+
data.claim_clusters.forEach((cluster) => {
nodes.push({
id: cluster.cluster_id,
- label: cluster.title || "Unnamed Claim Cluster",
+ label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Claim Cluster",
type: "claim_cluster",
members: cluster.members
});
@@ -36,7 +40,7 @@ function buildGraph(data) {
data.event_clusters.forEach((cluster) => {
nodes.push({
id: cluster.cluster_id,
- label: cluster.title || "Unnamed Event Cluster",
+ label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Event Cluster",
type: "event_cluster",
members: cluster.members
});
@@ -124,13 +128,15 @@ export function App() {
// Stronger repulsion
fgRef.current.d3Force(
"charge",
- d3.forceManyBody().strength(-3000)
+ d3.forceManyBody().strength(-10000)
);
+
+
// Link distance
fgRef.current.d3Force(
"link",
- d3.forceLink().distance(240)
+ d3.forceLink().distance(140)
);
// Collision based on dynamic box size
@@ -138,7 +144,7 @@ export function App() {
"collision",
d3.forceCollide((node) => {
const dims = node.__bckgDimensions;
- return dims ? Math.max(dims[0], dims[1]) / 2 + 16 : 20;
+ return dims ? Math.max(dims[0], dims[1]) / 2 + 32 : 40;
})
);
@@ -224,7 +230,7 @@ export function App() {
setMinGraphSize(Number(e.target.value))}
diff --git a/graphviz/processing/process_clusters.py b/graphviz/processing/process_clusters.py
index d62d7e6..250211e 100644
--- a/graphviz/processing/process_clusters.py
+++ b/graphviz/processing/process_clusters.py
@@ -4,17 +4,21 @@ from openai import OpenAI
from tqdm import tqdm
from dotenv import load_dotenv
import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+# -------------------------------
+# Load environment and OpenAI client
+# -------------------------------
load_dotenv() # Load environment variables from .env file
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
# -------------------------------
# CONFIG
# -------------------------------
INPUT_FILE = "../../data/clustered_output.json" # Your original JSON
-OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
+OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
OPENAI_MODEL = "gpt-5-nano"
-
# -------------------------------
# Load data
# -------------------------------
@@ -62,8 +66,8 @@ for node in graph:
queue.append(neighbor)
components.append(component)
-# Filter components with size > 8
-large_components = [c for c in components if len(c) > 8 and len(c) < 50]
+# Filter components with size > 8 and < 50
+large_components = [c for c in components if 8 < len(c) < 50]
print("Connected components (size > 8):", len(large_components))
print("Total clusters in those components:", sum(len(c) for c in large_components))
@@ -94,13 +98,14 @@ def generate_title(texts):
"\n\nTitle:"
)
try:
- response = client.chat.completions.create(model=OPENAI_MODEL,
- messages=[
- {"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
- {"role": "user", "content": prompt}
- ])
+ response = client.chat.completions.create(
+ model=OPENAI_MODEL,
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
+ {"role": "user", "content": prompt}
+ ]
+ )
title = response.choices[0].message.content.strip()
-
if title.lower().startswith("title:"):
title = title[6:].strip()
return title
@@ -109,19 +114,31 @@ def generate_title(texts):
return "Untitled Cluster"
# -------------------------------
-# Generate title per cluster with progress bar
+# Wrapper for parallel execution
+# -------------------------------
+def generate_title_for_cluster(cluster_id):
+ texts = extract_texts_for_cluster(cluster_id)
+ title = generate_title(texts)
+ return {"cluster_id": cluster_id, "title": title}
+
+# -------------------------------
+# Generate titles in parallel
# -------------------------------
clusters_in_large_components = [cid for comp in large_components for cid in comp]
output = []
-print("\nGenerating GPT titles for clusters...")
-for cluster_id in tqdm(clusters_in_large_components, desc="Clusters", ncols=100):
- texts = extract_texts_for_cluster(cluster_id)
- title = generate_title(texts)
- output.append({
- "cluster_id": cluster_id,
- "title": title
- })
+print("\nGenerating GPT titles for clusters (parallel)...")
+
+with ThreadPoolExecutor(max_workers=10) as executor:
+ future_to_cluster = {executor.submit(generate_title_for_cluster, cid): cid for cid in clusters_in_large_components}
+ for future in tqdm(as_completed(future_to_cluster), total=len(clusters_in_large_components), desc="Clusters", ncols=100):
+ try:
+ result = future.result()
+ output.append(result)
+ except Exception as e:
+ cid = future_to_cluster[future]
+ print(f"Error processing cluster {cid}: {e}")
+ output.append({"cluster_id": cid, "title": "Untitled Cluster"})
# -------------------------------
# Save JSON