Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/empty-yaks-follow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@workflow/web": patch
---

Invoke the endpoints health check in web o11y and render result in toolbar
4 changes: 3 additions & 1 deletion packages/web/src/app/layout-client.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { usePathname, useRouter, useSearchParams } from 'next/navigation';
import { ThemeProvider, useTheme } from 'next-themes';
import { useEffect, useRef } from 'react';
import { ConnectionStatus } from '@/components/display-utils/connection-status';
import { EndpointsHealthStatus } from '@/components/display-utils/endpoints-health-status';
import { SettingsDropdown } from '@/components/settings-dropdown';
import { Toaster } from '@/components/ui/sonner';
import { buildUrlWithConfig, useQueryParamConfig } from '@/lib/config';
Expand Down Expand Up @@ -144,7 +145,8 @@ function LayoutContent({ children }: LayoutClientProps) {
<Logo />
</h1>
</Link>
<div className="ml-auto flex items-center gap-2">
<div className="ml-auto flex items-center gap-4">
<EndpointsHealthStatus config={config} />
<ConnectionStatus config={config} />
<SettingsDropdown />
</div>
Expand Down
221 changes: 221 additions & 0 deletions packages/web/src/components/display-utils/endpoints-health-status.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
'use client';

import { AlertCircleIcon, CheckCircle2Icon, LoaderIcon } from 'lucide-react';
import { useEffect, useState } from 'react';
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from '@/components/ui/tooltip';
import type { WorldConfig } from '@/lib/config-world';

interface EndpointsHealthStatusProps {
config: WorldConfig;
}

interface HealthCheckResult {
flow: 'pending' | 'success' | 'error';
step: 'pending' | 'success' | 'error';
flowMessage?: string;
stepMessage?: string;
checkedAt?: string;
}

const HEALTH_CHECK_SESSION_KEY = 'workflow-endpoints-health-check';

function getSessionHealthCheck(configKey: string): HealthCheckResult | null {
try {
const stored = sessionStorage.getItem(
`${HEALTH_CHECK_SESSION_KEY}-${configKey}`
);
if (stored) {
return JSON.parse(stored);
}
} catch {
// Ignore sessionStorage errors (e.g., in SSR or private browsing)
}
return null;
}

function setSessionHealthCheck(
configKey: string,
result: HealthCheckResult
): void {
try {
sessionStorage.setItem(
`${HEALTH_CHECK_SESSION_KEY}-${configKey}`,
JSON.stringify(result)
);
} catch {
// Ignore sessionStorage errors
}
}

function getConfigKey(config: WorldConfig): string {
// Create a unique key based on relevant config values
return `${config.backend || 'local'}-${config.port || '3000'}`;
Comment on lines +55 to +56
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config key generation only considers 'backend' and 'port' fields, but this may not uniquely identify all possible backend configurations. For example, Vercel backends with different env/project/team combinations or Postgres backends with different database URLs would share the same cache key if they happen to use the same port. This could lead to cached health check results being incorrectly reused across different backend configurations. Consider including all relevant config fields that uniquely identify the backend (e.g., env, project, team, dataDir, postgresUrl).

Suggested change
// Create a unique key based on relevant config values
return `${config.backend || 'local'}-${config.port || '3000'}`;
// Create a unique key based on all relevant config values that uniquely identify the backend
// Include backend, port, and backend-specific fields
const keyObj: Record<string, unknown> = {
backend: config.backend || 'local',
port: config.port || '3000',
};
// Add backend-specific fields
if ('env' in config && config.env) keyObj.env = config.env;
if ('project' in config && config.project) keyObj.project = config.project;
if ('team' in config && config.team) keyObj.team = config.team;
if ('dataDir' in config && config.dataDir) keyObj.dataDir = config.dataDir;
if ('postgresUrl' in config && config.postgresUrl) keyObj.postgresUrl = config.postgresUrl;
// Add any other fields that may uniquely identify the backend as needed
return JSON.stringify(keyObj);

Copilot uses AI. Check for mistakes.
}

async function checkEndpointHealth(
baseUrl: string,
endpoint: 'flow' | 'step'
): Promise<{ success: boolean; message: string }> {
try {
const url = new URL(
`/.well-known/workflow/v1/${endpoint}?__health`,
baseUrl
);
const response = await fetch(url.toString(), {
method: 'POST',
// Short timeout for health checks
signal: AbortSignal.timeout(5000),
});

if (response.ok) {
const text = await response.text();
return { success: true, message: text };
}
return {
success: false,
message: `HTTP ${response.status}: ${response.statusText}`,
};
} catch (error) {
const message =
error instanceof Error ? error.message : 'Connection failed';
return { success: false, message };
}
}

export function EndpointsHealthStatus({ config }: EndpointsHealthStatusProps) {
const [healthCheck, setHealthCheck] = useState<HealthCheckResult>({
flow: 'pending',
step: 'pending',
});
const [isChecking, setIsChecking] = useState(false);

useEffect(() => {
const configKey = getConfigKey(config);
const cached = getSessionHealthCheck(configKey);

// If we have a cached result from this session, use it
if (cached) {
setHealthCheck(cached);
return;
}

// Otherwise, perform the health check
const performHealthCheck = async () => {
setIsChecking(true);

// Determine base URL based on config
const port = config.port || '3000';
const baseUrl = `http://localhost:${port}`;
Comment on lines +110 to +112
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The health check always uses localhost regardless of the backend configuration. When config.backend is 'vercel' or another non-local backend, this will fail to check the actual endpoints being used. The component should derive the appropriate base URL based on the backend type, similar to how other components like ConnectionStatus handle different backends.

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would fail if the user deployed this observability app on vercel right? could we use an env var like NEXT_PUBLIC_URL or something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to me like the World/runtime should have a canonical way of getting its public URL, which is partly in the versioning spec, but in the meantime, we should disable this feature for non-local worlds

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main motivation for this feature is to diagnose issues with Vercel deployments where workflows are not getting past the start() function (stuck in pending), so we should not disable for non-local worlds.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TooTallNate sure, but this PR doesn't do that, since it only calls localhost from the web UI, which isn't on the same host as the vercel deployment. This PR would need to be extended to include a conditional for world-vercel that checks for DEPLOYMENT_ID or DEPLOYMENT_URL or whatever we use, and ping against that, and probably would also need to use the vercel auth to bypass the preview environment protection

Comment on lines +110 to +112
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Determine base URL based on config
const port = config.port || '3000';
const baseUrl = `http://localhost:${port}`;
// Determine base URL based on backend type
// For local backend: use localhost with configured port
// For deployed backends (vercel, postgres): use current origin
let baseUrl: string;
const backend = config.backend || 'local';
if (backend === 'local') {
const port = config.port || '3000';
baseUrl = `http://localhost:${port}`;
} else {
// For deployed backends, use the current window origin
baseUrl = typeof window !== 'undefined' ? window.location.origin : '';
}

The health check component hard-codes http://localhost: for all backend types, but this will fail for deployed backends (Vercel, Postgres) where the server is not running on localhost.

View Details

Analysis

Health check hardcodes localhost, fails for deployed Vercel/Postgres backends

What fails: EndpointsHealthStatus component constructs health check URLs using hardcoded http://localhost: , which fails when the frontend is deployed to Vercel or a separate deployment. The component needs to check if endpoints at /.well-known/workflow/v1/flow and /.well-known/workflow/v1/step are available, but only attempts to access them on localhost.

How to reproduce:

  1. Deploy the observability UI frontend to Vercel (or any non-localhost domain)
  2. Set backend config to 'vercel' or 'postgres'
  3. Load the UI in a browser
  4. Observe the health status indicator showing "Endpoint issues" even though the endpoints are actually healthy

Result: Browser attempts to fetch from http://localhost:3000/.well-known/workflow/v1/flow which results in connection failures (localhost refers to the user's machine, not the server). The UI always shows "Endpoint issues" in the tooltip.

Expected: For 'vercel' and 'postgres' backends, the health check should use window.location.origin to construct the URL, so it checks endpoints on the current domain where the frontend is deployed. The localhost approach should only be used for the 'local' backend where the frontend actually runs on localhost.

Fix: Modified EndpointsHealthStatus to check the backend config type - if it's 'local', use http://localhost: (original behavior); otherwise, use window.location.origin to access endpoints on the current deployment domain.


const [flowResult, stepResult] = await Promise.all([
checkEndpointHealth(baseUrl, 'flow'),
checkEndpointHealth(baseUrl, 'step'),
]);

const result: HealthCheckResult = {
flow: flowResult.success ? 'success' : 'error',
step: stepResult.success ? 'success' : 'error',
flowMessage: flowResult.message,
stepMessage: stepResult.message,
checkedAt: new Date().toISOString(),
};

setHealthCheck(result);
setSessionHealthCheck(configKey, result);
setIsChecking(false);
};

performHealthCheck();
Comment on lines +107 to +132
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The useEffect that performs health checks does not handle cleanup when the component unmounts or when the config changes mid-check. If the component unmounts or config changes while async health checks are in progress, the state updates on lines 127-129 will execute on an unmounted component, causing a React warning. Consider using an AbortController to cancel in-flight requests and check if the component is still mounted before calling setState.

Copilot uses AI. Check for mistakes.
}, [config]);
Comment on lines +96 to +133
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The useEffect hook starts an async health check operation but lacks a cleanup function to handle component unmounting. This can cause React state update warnings if the component unmounts before the async operation completes.

View Details
📝 Patch Details
diff --git a/packages/web/src/components/display-utils/endpoints-health-status.tsx b/packages/web/src/components/display-utils/endpoints-health-status.tsx
index bad42f3..c0b19f2 100644
--- a/packages/web/src/components/display-utils/endpoints-health-status.tsx
+++ b/packages/web/src/components/display-utils/endpoints-health-status.tsx
@@ -58,17 +58,23 @@ function getConfigKey(config: WorldConfig): string {
 
 async function checkEndpointHealth(
   baseUrl: string,
-  endpoint: 'flow' | 'step'
+  endpoint: 'flow' | 'step',
+  signal?: AbortSignal
 ): Promise<{ success: boolean; message: string }> {
   try {
     const url = new URL(
       `/.well-known/workflow/v1/${endpoint}?__health`,
       baseUrl
     );
+    // Combine provided signal with timeout signal
+    const timeoutSignal = AbortSignal.timeout(5000);
+    const combinedSignal = signal
+      ? AbortSignal.any([signal, timeoutSignal])
+      : timeoutSignal;
+
     const response = await fetch(url.toString(), {
       method: 'POST',
-      // Short timeout for health checks
-      signal: AbortSignal.timeout(5000),
+      signal: combinedSignal,
     });
 
     if (response.ok) {
@@ -103,8 +109,13 @@ export function EndpointsHealthStatus({ config }: EndpointsHealthStatusProps) {
       return;
     }
 
+    // Track whether the component is still mounted
+    let isMounted = true;
+    const abortController = new AbortController();
+
     // Otherwise, perform the health check
     const performHealthCheck = async () => {
+      if (!isMounted || abortController.signal.aborted) return;
       setIsChecking(true);
 
       // Determine base URL based on config
@@ -112,10 +123,13 @@ export function EndpointsHealthStatus({ config }: EndpointsHealthStatusProps) {
       const baseUrl = `http://localhost:${port}`;
 
       const [flowResult, stepResult] = await Promise.all([
-        checkEndpointHealth(baseUrl, 'flow'),
-        checkEndpointHealth(baseUrl, 'step'),
+        checkEndpointHealth(baseUrl, 'flow', abortController.signal),
+        checkEndpointHealth(baseUrl, 'step', abortController.signal),
       ]);
 
+      // Only update state if the component is still mounted
+      if (!isMounted || abortController.signal.aborted) return;
+
       const result: HealthCheckResult = {
         flow: flowResult.success ? 'success' : 'error',
         step: stepResult.success ? 'success' : 'error',
@@ -130,6 +144,12 @@ export function EndpointsHealthStatus({ config }: EndpointsHealthStatusProps) {
     };
 
     performHealthCheck();
+
+    // Cleanup function: cancel pending requests and mark component as unmounted
+    return () => {
+      isMounted = false;
+      abortController.abort();
+    };
   }, [config]);
 
   const allSuccess =

Analysis

Missing cleanup function in useEffect allows state updates on unmounted component

What fails: The EndpointsHealthStatus component's useEffect hook (lines 96-133 in packages/web/src/components/display-utils/endpoints-health-status.tsx) initiates async fetch operations without a cleanup function. When the component unmounts before the async operation completes, the subsequent setHealthCheck() and setIsChecking() calls attempt to update state on an unmounted component.

How to reproduce:

  1. Navigate to a page containing the EndpointsHealthStatus component
  2. Immediately navigate away before the health check completes (within 5 seconds)
  3. In development mode with React's StrictMode, observe the warning in browser console

Result: React warning appears: "Can't perform a React state update on an unmounted component. This is a no-op, but it indicates a memory leak in your application."

Expected: The component should implement proper cleanup to cancel pending async operations when unmounting, preventing state updates on unmounted components. Per React documentation on removing effect dependencies, effects with async operations should return cleanup functions that abort pending requests.

Fix implemented:

  • Added AbortController within the effect to manage async operation lifecycles
  • Added isMounted flag to track component mount state
  • Updated checkEndpointHealth() function to accept optional AbortSignal parameter
  • Combined component's abort signal with existing timeout signal using AbortSignal.any()
  • Added guard checks before state updates to prevent updates on unmounted components
  • Implemented cleanup function that aborts pending requests and marks component as unmounted

This ensures all pending fetch requests are cancelled when the component unmounts or the effect re-runs, preventing "state update on unmounted component" warnings and memory leaks.

Comment on lines +96 to +133
Copy link

Copilot AI Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The useEffect hook has 'config' as a dependency, but it references the entire config object. In React, this will trigger the health check every time any field in the config object changes (even if it's a new object reference with the same values). Consider using a more stable dependency like a memoized config key from getConfigKey(config), or add specific config fields that actually affect the health check (backend, port) to the dependency array.

Copilot uses AI. Check for mistakes.

const allSuccess =
healthCheck.flow === 'success' && healthCheck.step === 'success';
const anyError = healthCheck.flow === 'error' || healthCheck.step === 'error';
const isPending =
healthCheck.flow === 'pending' || healthCheck.step === 'pending';

const getStatusIcon = () => {
if (isChecking || isPending) {
return (
<LoaderIcon className="w-4 h-4 text-muted-foreground animate-spin" />
);
}
if (allSuccess) {
return <CheckCircle2Icon className="w-4 h-4 text-green-500" />;
}
if (anyError) {
return <AlertCircleIcon className="w-4 h-4 text-amber-500" />;
}
return <LoaderIcon className="w-4 h-4 text-muted-foreground" />;
};

const getStatusText = () => {
if (isChecking || isPending) {
return 'Checking endpoints...';
}
if (allSuccess) {
return 'Endpoints healthy';
}
if (anyError) {
return 'Endpoint issues';
}
return 'Unknown status';
};

const getEndpointStatus = (status: 'pending' | 'success' | 'error') => {
if (status === 'success') {
return <CheckCircle2Icon className="w-3 h-3 text-green-500 inline" />;
}
if (status === 'error') {
return <AlertCircleIcon className="w-3 h-3 text-amber-500 inline" />;
}
return <LoaderIcon className="w-3 h-3 text-muted-foreground inline" />;
};

return (
<Tooltip>
<TooltipTrigger asChild>
<div className="flex items-center gap-1.5 text-sm text-muted-foreground cursor-default">
{getStatusIcon()}
<span>{getStatusText()}</span>
</div>
</TooltipTrigger>
<TooltipContent side="bottom" className="max-w-xs">
<div className="flex flex-col gap-2 text-xs">
<div className="font-medium">Workflow Endpoint Health</div>
<div className="flex flex-col gap-1">
<div className="flex items-center gap-2">
{getEndpointStatus(healthCheck.flow)}
<span className="font-mono">/.well-known/workflow/v1/flow</span>
</div>
{healthCheck.flowMessage && (
<div className="text-muted-foreground pl-5 truncate">
{healthCheck.flowMessage}
</div>
)}
</div>
<div className="flex flex-col gap-1">
<div className="flex items-center gap-2">
{getEndpointStatus(healthCheck.step)}
<span className="font-mono">/.well-known/workflow/v1/step</span>
</div>
{healthCheck.stepMessage && (
<div className="text-muted-foreground pl-5 truncate">
{healthCheck.stepMessage}
</div>
)}
</div>
{healthCheck.checkedAt && (
<div className="text-muted-foreground border-t pt-1 mt-1">
Checked at {new Date(healthCheck.checkedAt).toLocaleTimeString()}
</div>
)}
</div>
</TooltipContent>
</Tooltip>
);
}
Loading