Finalise graph visualisation
This commit is contained in:
@@ -2,3 +2,4 @@
|
|||||||
dist/
|
dist/
|
||||||
node_modules/
|
node_modules/
|
||||||
src/data.json
|
src/data.json
|
||||||
|
src/titles.json
|
||||||
@@ -3,6 +3,7 @@ import ForceGraph2D from "react-force-graph-2d";
|
|||||||
import * as d3 from "d3-force-3d";
|
import * as d3 from "d3-force-3d";
|
||||||
|
|
||||||
import data from "./data.json";
|
import data from "./data.json";
|
||||||
|
import titlesData from "./titles.json";
|
||||||
|
|
||||||
function drawRoundedRect(ctx, x, y, width, height, radius) {
|
function drawRoundedRect(ctx, x, y, width, height, radius) {
|
||||||
const r = Math.min(radius, width / 2, height / 2);
|
const r = Math.min(radius, width / 2, height / 2);
|
||||||
@@ -24,10 +25,13 @@ function buildGraph(data) {
|
|||||||
const nodes = [];
|
const nodes = [];
|
||||||
const links = [];
|
const links = [];
|
||||||
|
|
||||||
|
// Create a lookup map for quick access
|
||||||
|
const titleMap = new Map(titlesData.map(t => [t.cluster_id, t.title]));
|
||||||
|
|
||||||
data.claim_clusters.forEach((cluster) => {
|
data.claim_clusters.forEach((cluster) => {
|
||||||
nodes.push({
|
nodes.push({
|
||||||
id: cluster.cluster_id,
|
id: cluster.cluster_id,
|
||||||
label: cluster.title || "Unnamed Claim Cluster",
|
label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Claim Cluster",
|
||||||
type: "claim_cluster",
|
type: "claim_cluster",
|
||||||
members: cluster.members
|
members: cluster.members
|
||||||
});
|
});
|
||||||
@@ -36,7 +40,7 @@ function buildGraph(data) {
|
|||||||
data.event_clusters.forEach((cluster) => {
|
data.event_clusters.forEach((cluster) => {
|
||||||
nodes.push({
|
nodes.push({
|
||||||
id: cluster.cluster_id,
|
id: cluster.cluster_id,
|
||||||
label: cluster.title || "Unnamed Event Cluster",
|
label: titleMap.get(cluster.cluster_id) || cluster.title || "Unnamed Event Cluster",
|
||||||
type: "event_cluster",
|
type: "event_cluster",
|
||||||
members: cluster.members
|
members: cluster.members
|
||||||
});
|
});
|
||||||
@@ -124,13 +128,15 @@ export function App() {
|
|||||||
// Stronger repulsion
|
// Stronger repulsion
|
||||||
fgRef.current.d3Force(
|
fgRef.current.d3Force(
|
||||||
"charge",
|
"charge",
|
||||||
d3.forceManyBody().strength(-3000)
|
d3.forceManyBody().strength(-10000)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Link distance
|
// Link distance
|
||||||
fgRef.current.d3Force(
|
fgRef.current.d3Force(
|
||||||
"link",
|
"link",
|
||||||
d3.forceLink().distance(240)
|
d3.forceLink().distance(140)
|
||||||
);
|
);
|
||||||
|
|
||||||
// Collision based on dynamic box size
|
// Collision based on dynamic box size
|
||||||
@@ -138,7 +144,7 @@ export function App() {
|
|||||||
"collision",
|
"collision",
|
||||||
d3.forceCollide((node) => {
|
d3.forceCollide((node) => {
|
||||||
const dims = node.__bckgDimensions;
|
const dims = node.__bckgDimensions;
|
||||||
return dims ? Math.max(dims[0], dims[1]) / 2 + 16 : 20;
|
return dims ? Math.max(dims[0], dims[1]) / 2 + 32 : 40;
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -224,7 +230,7 @@ export function App() {
|
|||||||
<br />
|
<br />
|
||||||
<input
|
<input
|
||||||
type="range"
|
type="range"
|
||||||
min="8"
|
min="9"
|
||||||
max="49"
|
max="49"
|
||||||
value={minGraphSize}
|
value={minGraphSize}
|
||||||
onChange={(e) => setMinGraphSize(Number(e.target.value))}
|
onChange={(e) => setMinGraphSize(Number(e.target.value))}
|
||||||
|
|||||||
@@ -4,17 +4,21 @@ from openai import OpenAI
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Load environment and OpenAI client
|
||||||
|
# -------------------------------
|
||||||
load_dotenv() # Load environment variables from .env file
|
load_dotenv() # Load environment variables from .env file
|
||||||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
# CONFIG
|
# CONFIG
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
INPUT_FILE = "../../data/clustered_output.json" # Your original JSON
|
INPUT_FILE = "../../data/clustered_output.json" # Your original JSON
|
||||||
OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
|
OUTPUT_FILE = "../../data/clustered_output2.json" # Output JSON file
|
||||||
OPENAI_MODEL = "gpt-5-nano"
|
OPENAI_MODEL = "gpt-5-nano"
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
# Load data
|
# Load data
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
@@ -62,8 +66,8 @@ for node in graph:
|
|||||||
queue.append(neighbor)
|
queue.append(neighbor)
|
||||||
components.append(component)
|
components.append(component)
|
||||||
|
|
||||||
# Filter components with size > 8
|
# Filter components with size > 8 and < 50
|
||||||
large_components = [c for c in components if len(c) > 8 and len(c) < 50]
|
large_components = [c for c in components if 8 < len(c) < 50]
|
||||||
|
|
||||||
print("Connected components (size > 8):", len(large_components))
|
print("Connected components (size > 8):", len(large_components))
|
||||||
print("Total clusters in those components:", sum(len(c) for c in large_components))
|
print("Total clusters in those components:", sum(len(c) for c in large_components))
|
||||||
@@ -94,13 +98,14 @@ def generate_title(texts):
|
|||||||
"\n\nTitle:"
|
"\n\nTitle:"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(model=OPENAI_MODEL,
|
response = client.chat.completions.create(
|
||||||
messages=[
|
model=OPENAI_MODEL,
|
||||||
{"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
|
messages=[
|
||||||
{"role": "user", "content": prompt}
|
{"role": "system", "content": "You are a helpful assistant who creates short, meaningful titles."},
|
||||||
])
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
)
|
||||||
title = response.choices[0].message.content.strip()
|
title = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
if title.lower().startswith("title:"):
|
if title.lower().startswith("title:"):
|
||||||
title = title[6:].strip()
|
title = title[6:].strip()
|
||||||
return title
|
return title
|
||||||
@@ -109,19 +114,31 @@ def generate_title(texts):
|
|||||||
return "Untitled Cluster"
|
return "Untitled Cluster"
|
||||||
|
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
# Generate title per cluster with progress bar
|
# Wrapper for parallel execution
|
||||||
|
# -------------------------------
|
||||||
|
def generate_title_for_cluster(cluster_id):
|
||||||
|
texts = extract_texts_for_cluster(cluster_id)
|
||||||
|
title = generate_title(texts)
|
||||||
|
return {"cluster_id": cluster_id, "title": title}
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Generate titles in parallel
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
clusters_in_large_components = [cid for comp in large_components for cid in comp]
|
clusters_in_large_components = [cid for comp in large_components for cid in comp]
|
||||||
output = []
|
output = []
|
||||||
|
|
||||||
print("\nGenerating GPT titles for clusters...")
|
print("\nGenerating GPT titles for clusters (parallel)...")
|
||||||
for cluster_id in tqdm(clusters_in_large_components, desc="Clusters", ncols=100):
|
|
||||||
texts = extract_texts_for_cluster(cluster_id)
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
title = generate_title(texts)
|
future_to_cluster = {executor.submit(generate_title_for_cluster, cid): cid for cid in clusters_in_large_components}
|
||||||
output.append({
|
for future in tqdm(as_completed(future_to_cluster), total=len(clusters_in_large_components), desc="Clusters", ncols=100):
|
||||||
"cluster_id": cluster_id,
|
try:
|
||||||
"title": title
|
result = future.result()
|
||||||
})
|
output.append(result)
|
||||||
|
except Exception as e:
|
||||||
|
cid = future_to_cluster[future]
|
||||||
|
print(f"Error processing cluster {cid}: {e}")
|
||||||
|
output.append({"cluster_id": cid, "title": "Untitled Cluster"})
|
||||||
|
|
||||||
# -------------------------------
|
# -------------------------------
|
||||||
# Save JSON
|
# Save JSON
|
||||||
|
|||||||
Reference in New Issue
Block a user