diff --git a/CLAUDE.md b/CLAUDE.md
index 224ead6eb..34cddb4b6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -24,3 +24,48 @@ Use `exec_js` to run JS in the Phoenix browser runtime. jQuery `$()` is global.
**Click AI chat buttons:** `$('.ai-edit-restore-btn:contains("Undo")').click();`
**Check logs:** `get_browser_console_logs` with `filter` regex (e.g. `"AI UI"`, `"error"`) and `tail` — includes both browser console and Node.js (PhNode) logs. Use `get_terminal_logs` for Electron process output (only available if Phoenix was launched via `start_phoenix`).
+
+## Running Tests via MCP
+
+The test runner must be open as a separate Phoenix instance (it shows up as `phoenix-test-runner-*` in `get_phoenix_status`). Use `run_tests` to trigger test runs and `get_test_results` to poll for results. `take_screenshot` also works on the test runner.
+
+### Test categories
+- **unit** — Fast, no UI. Safe to run all at once (`run_tests category=unit`).
+- **integration** — Spawns a Phoenix iframe inside the test runner. Some specs require window focus and will hang if the test runner window isn't focused.
+- **LegacyInteg** — Like integration but uses the legacy test harness. Also spawns an embedded Phoenix instance.
+- **livepreview**, **mainview** — Specialized integration tests.
+- **Do NOT use:** `all`, `performance`, `extension`, `individualrun` — not actively supported.
+
+### Hierarchy: Category → Suite → Test
+- **Category** — top-level grouping: `unit`, `integration`, `LegacyInteg`, etc. Safe to run an entire category.
+- **Suite** — a group of related tests within a category (e.g. `integration: FileFilters` has ~20 tests). This is the `spec` parameter value.
+- **Test** — a single test within a suite.
+
+### Running all tests in a category
+```
+run_tests(category="unit")
+```
+
+### Running a single suite
+Pass the exact suite name as the `spec` parameter. **Suite names do NOT always have a category prefix.** Many suites are registered with just their plain name (e.g. `"CSS Parsing"`, `"Editor"`, `"JSUtils"`), while others include a prefix (e.g. `"unit:Phoenix Platform Tests"`, `"integration: FileFilters"`, `"LegacyInteg:ExtensionLoader"`). If the suite name is wrong, the test runner will show a blank page with 0 specs and appear stuck.
+
+**To discover the exact suite name**, run this in `exec_js` on the test runner instance:
+```js
+return jasmine.getEnv().topSuite().children.map(s => s.description);
+```
+
+Examples:
+```
+run_tests(category="unit", spec="CSS Parsing")
+run_tests(category="unit", spec="unit:Phoenix Platform Tests")
+run_tests(category="integration", spec="integration: FileFilters")
+run_tests(category="LegacyInteg", spec="LegacyInteg:ExtensionLoader")
+```
+
+### Running individual tests
+You can pass a specific test's full name as `spec` to run just that one test. It is perfectly valid to run a single test. However, if a single test fails, re-run the full suite to confirm — suites sometimes execute tests in order with shared state, so an individual test may fail in isolation but pass within its suite. If the suite passes, the test is valid.
+
+### Gotchas
+- **Instance name changes on reload:** The test runner gets a new random instance name each time the page reloads. Always check `get_phoenix_status` after a `run_tests` call to get the current instance name.
+- **Integration tests may hang:** Specs labeled "needs window focus" will hang indefinitely if the test runner doesn't have OS-level window focus. If `get_test_results` starts timing out, the event loop is likely blocked by a stuck spec — use `force_reload_phoenix` to recover.
+- **LegacyInteg/integration tests spawn an iframe:** These tests open an embedded Phoenix instance inside the test runner, so they are slower and more resource-intensive than unit tests.
diff --git a/phoenix-builder-mcp/mcp-tools.js b/phoenix-builder-mcp/mcp-tools.js
index 7c466d3dd..e63128a01 100644
--- a/phoenix-builder-mcp/mcp-tools.js
+++ b/phoenix-builder-mcp/mcp-tools.js
@@ -383,6 +383,81 @@ export function registerTools(server, processManager, wsControlServer, phoenixDe
}
);
+ server.tool(
+ "run_tests",
+ "Run tests in the Phoenix test runner (SpecRunner.html). Reloads the test runner with the specified " +
+ "category and optional spec filter. The test runner must already be open in a browser with MCP enabled. " +
+ "Supported categories: unit, integration, LegacyInteg, livepreview, mainview. " +
+ "WARNING: Do NOT use 'all', 'performance', 'extension', or 'individualrun' categories — they are " +
+ "not actively supported and the full 'all' suite should never be run. " +
+ "To run all tests in a category, omit the spec parameter. " +
+ "To run a single suite, pass the suite name as spec (e.g. spec='unit: HTML Code Hinting'). " +
+ "Suite names are prefixed with the category and a colon, e.g. 'unit: Editor', 'unit: CSS Parsing'. " +
+ "You can also run individual specs by passing the full spec name, but note that individual specs " +
+ "may fail when run alone because suites often run tests in order with shared state — prefer " +
+ "running the full suite instead of individual specs. " +
+ "After calling run_tests, use get_test_results to poll for results.",
+ {
+ category: z.string().describe("Test category to run: unit, integration, LegacyInteg, livepreview, or mainview."),
+ spec: z.string().optional().describe("Optional suite or spec name to run within the category. " +
+ "Use the full name including category prefix, e.g. 'unit: CSS Parsing' for a suite. " +
+ "Prefer running full suites over individual specs, as specs may depend on suite execution order. " +
+ "Omit to run all tests in the category."),
+ instance: z.string().optional().describe("Target a specific test runner instance by name. Required when multiple instances are connected.")
+ },
+ async ({ category, spec, instance }) => {
+ try {
+ const result = await wsControlServer.requestRunTests(category, spec, instance);
+ return {
+ content: [{
+ type: "text",
+ text: JSON.stringify({
+ success: true,
+ message: result.message || "Test runner is reloading with category=" + category
+ })
+ }]
+ };
+ } catch (err) {
+ return {
+ content: [{
+ type: "text",
+ text: JSON.stringify({ error: err.message })
+ }]
+ };
+ }
+ }
+ );
+
+ server.tool(
+ "get_test_results",
+ "Get structured test results from the Phoenix test runner. Returns running status, pass/fail counts, " +
+ "failure details, and the currently executing spec. The test runner must already be open with MCP enabled.",
+ {
+ instance: z.string().optional().describe("Target a specific test runner instance by name. Required when multiple instances are connected.")
+ },
+ async ({ instance }) => {
+ try {
+ const result = await wsControlServer.requestTestResults(instance);
+ // Remove internal WS fields
+ delete result.type;
+ delete result.id;
+ return {
+ content: [{
+ type: "text",
+ text: JSON.stringify(result, null, 2)
+ }]
+ };
+ } catch (err) {
+ return {
+ content: [{
+ type: "text",
+ text: JSON.stringify({ error: err.message })
+ }]
+ };
+ }
+ }
+ );
+
server.tool(
"get_phoenix_status",
"Check the status of the Phoenix process and WebSocket connection.",
diff --git a/phoenix-builder-mcp/ws-control-server.js b/phoenix-builder-mcp/ws-control-server.js
index 452a2ac86..1ed071a63 100644
--- a/phoenix-builder-mcp/ws-control-server.js
+++ b/phoenix-builder-mcp/ws-control-server.js
@@ -109,6 +109,28 @@ export function createWSControlServer(port) {
break;
}
+ case "run_tests_response": {
+ const pendingRt = pendingRequests.get(msg.id);
+ if (pendingRt) {
+ pendingRequests.delete(msg.id);
+ if (msg.success) {
+ pendingRt.resolve({ success: true, message: msg.message });
+ } else {
+ pendingRt.reject(new Error(msg.message || "run_tests failed"));
+ }
+ }
+ break;
+ }
+
+ case "get_test_results_response": {
+ const pendingTr = pendingRequests.get(msg.id);
+ if (pendingTr) {
+ pendingRequests.delete(msg.id);
+ pendingTr.resolve(msg);
+ }
+ break;
+ }
+
case "reload_response": {
const pending3 = pendingRequests.get(msg.id);
if (pending3) {
@@ -390,6 +412,80 @@ export function createWSControlServer(port) {
});
}
+ function requestRunTests(category, spec, instanceName) {
+ return new Promise((resolve, reject) => {
+ const resolved = _resolveClient(instanceName);
+ if (resolved.error) {
+ reject(new Error(resolved.error));
+ return;
+ }
+
+ const { client } = resolved;
+ if (client.ws.readyState !== 1) {
+ reject(new Error("Phoenix client \"" + resolved.name + "\" is not connected"));
+ return;
+ }
+
+ const id = ++requestIdCounter;
+ const timeout = setTimeout(() => {
+ pendingRequests.delete(id);
+ reject(new Error("run_tests request timed out (30s)"));
+ }, 30000);
+
+ pendingRequests.set(id, {
+ resolve: (data) => {
+ clearTimeout(timeout);
+ resolve(data);
+ },
+ reject: (err) => {
+ clearTimeout(timeout);
+ reject(err);
+ }
+ });
+
+ const msg = { type: "run_tests_request", id, category };
+ if (spec) {
+ msg.spec = spec;
+ }
+ client.ws.send(JSON.stringify(msg));
+ });
+ }
+
+ function requestTestResults(instanceName) {
+ return new Promise((resolve, reject) => {
+ const resolved = _resolveClient(instanceName);
+ if (resolved.error) {
+ reject(new Error(resolved.error));
+ return;
+ }
+
+ const { client } = resolved;
+ if (client.ws.readyState !== 1) {
+ reject(new Error("Phoenix client \"" + resolved.name + "\" is not connected"));
+ return;
+ }
+
+ const id = ++requestIdCounter;
+ const timeout = setTimeout(() => {
+ pendingRequests.delete(id);
+ reject(new Error("get_test_results request timed out (30s)"));
+ }, 30000);
+
+ pendingRequests.set(id, {
+ resolve: (data) => {
+ clearTimeout(timeout);
+ resolve(data);
+ },
+ reject: (err) => {
+ clearTimeout(timeout);
+ reject(err);
+ }
+ });
+
+ client.ws.send(JSON.stringify({ type: "get_test_results_request", id }));
+ });
+ }
+
function getBrowserLogs(sinceLast, instanceName) {
const resolved = _resolveClient(instanceName);
if (resolved.error) {
@@ -442,6 +538,8 @@ export function createWSControlServer(port) {
requestLogs,
requestExecJs,
requestExecJsLivePreview,
+ requestRunTests,
+ requestTestResults,
getBrowserLogs,
clearBrowserLogs,
isClientConnected,
diff --git a/src/phoenix-builder/phoenix-builder-boot.js b/src/phoenix-builder/phoenix-builder-boot.js
index c577720ec..94116ec1f 100644
--- a/src/phoenix-builder/phoenix-builder-boot.js
+++ b/src/phoenix-builder/phoenix-builder-boot.js
@@ -90,7 +90,8 @@
let name = sessionStorage.getItem(INSTANCE_NAME_KEY);
if (!name) {
const hex = Math.floor(Math.random() * 0x10000).toString(16).padStart(4, "0");
- name = "phoenix-" + _getPlatformTag() + "-" + hex;
+ const prefix = window._phoenixBuilderNamePrefix || "phoenix";
+ name = prefix + "-" + _getPlatformTag() + "-" + hex;
sessionStorage.setItem(INSTANCE_NAME_KEY, name);
}
return name;
diff --git a/test/SpecRunner.html b/test/SpecRunner.html
index 1665f48c2..095bcb4e5 100644
--- a/test/SpecRunner.html
+++ b/test/SpecRunner.html
@@ -394,6 +394,10 @@
}());
+
+
+
+
diff --git a/test/SpecRunner.js b/test/SpecRunner.js
index 0c17e0ace..99bb6e82d 100644
--- a/test/SpecRunner.js
+++ b/test/SpecRunner.js
@@ -484,6 +484,7 @@ define(function (require, exports, module) {
// Create the reporter, which is really a model class that just gathers
// spec and performance data.
reporter = new UnitTestReporter(jasmineEnv, params.get("spec"), selectedCategories);
+ window._unitTestReporter = reporter;
SpecRunnerUtils.setUnitTestReporter(reporter);
// Optionally emit JUnit XML file for automated runs
diff --git a/test/phoenix-test-runner-mcp.js b/test/phoenix-test-runner-mcp.js
new file mode 100644
index 000000000..1284a49f4
--- /dev/null
+++ b/test/phoenix-test-runner-mcp.js
@@ -0,0 +1,188 @@
+/*
+ * GNU AGPL-3.0 License
+ *
+ * Copyright (c) 2021 - present core.ai . All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see https://opensource.org/licenses/AGPL-3.0.
+ *
+ */
+
+// Test-runner-specific MCP WebSocket handlers.
+// Loaded as a plain script (non-AMD) in SpecRunner.html after phoenix-builder-boot.js.
+// Registers handlers for run_tests_request and get_test_results_request.
+
+(function () {
+
+ var builder = window._phoenixBuilder;
+ if (!builder) {
+ // MCP not enabled — nothing to do
+ return;
+ }
+
+ // --- screenshot_request ---
+ // Handles screenshot capture in the test runner window.
+ // Reuses Phoenix.app.screenShotBinary which works on Tauri, Electron, and browser (with extension).
+ builder.registerHandler("screenshot_request", function (msg) {
+ if (!Phoenix || !Phoenix.app || !Phoenix.app.screenShotBinary) {
+ builder.sendMessage({
+ type: "error",
+ id: msg.id,
+ message: "Screenshot API not available"
+ });
+ return;
+ }
+
+ Phoenix.app.screenShotBinary(msg.selector || undefined)
+ .then(function (bytes) {
+ var binary = "";
+ var chunkSize = 8192;
+ for (var i = 0; i < bytes.length; i += chunkSize) {
+ var chunk = bytes.subarray(i, Math.min(i + chunkSize, bytes.length));
+ binary += String.fromCharCode.apply(null, chunk);
+ }
+ var base64 = btoa(binary);
+ builder.sendMessage({
+ type: "screenshot_response",
+ id: msg.id,
+ data: base64
+ });
+ })
+ .catch(function (err) {
+ builder.sendMessage({
+ type: "error",
+ id: msg.id,
+ message: err.message || "Screenshot failed"
+ });
+ });
+ });
+
+ // --- run_tests_request ---
+ // Reloads SpecRunner with the requested category and/or spec URL params.
+ builder.registerHandler("run_tests_request", function (msg) {
+ var category = msg.category || "unit";
+ var spec = msg.spec || null;
+
+ // Respond before reloading so the MCP server gets the ack
+ builder.sendMessage({
+ type: "run_tests_response",
+ id: msg.id,
+ success: true,
+ message: "Reloading test runner with category=" + category + (spec ? ", spec=" + spec : "")
+ });
+
+ // Build the new URL and reload.
+ // Construct the query string manually with encodeURIComponent so spaces
+ // become %20 (not +). The SpecRunner UrlParams parser uses decodeURIComponent
+ // which only decodes %20, not +.
+ var base = window.location.href.split("?")[0];
+ var qs = "category=" + encodeURIComponent(category) +
+ "&spec=" + encodeURIComponent(spec || "all");
+
+ setTimeout(function () {
+ window.location.href = base + "?" + qs;
+ }, 100);
+ });
+
+ // --- get_test_results_request ---
+ // Returns structured test results from the live reporter and window.testResults.
+ builder.registerHandler("get_test_results_request", function (msg) {
+ var results = _gatherTestResults();
+ results.type = "get_test_results_response";
+ results.id = msg.id;
+ builder.sendMessage(results);
+ });
+
+ function _gatherTestResults() {
+ var testResults = window.testResults || {};
+ var completed = !!window.playWrightRunComplete;
+
+ // Try to access the reporter via the global that SpecRunner sets up.
+ // The reporter is attached to the BootstrapReporterView which reads from UnitTestReporter.
+ // We get what we can from the DOM and globals.
+ var reporter = window._unitTestReporter || null;
+
+ var totalSpecCount = 0;
+ var totalPassedCount = 0;
+ var totalFailedCount = 0;
+ var activeSpecCompleteCount = 0;
+ var currentSpec = "";
+ var activeSuite = null;
+ var categories = [];
+ var running = false;
+ var passed = !!testResults.passed;
+ var failures = [];
+
+ if (reporter) {
+ totalSpecCount = reporter.totalSpecCount || 0;
+ totalPassedCount = reporter.totalPassedCount || 0;
+ totalFailedCount = reporter.totalFailedCount || 0;
+ activeSpecCompleteCount = reporter.activeSpecCompleteCount || 0;
+ activeSuite = reporter.activeSuite || null;
+ categories = reporter.selectedCategories || [];
+ passed = !!reporter.passed;
+
+ // If tests started but haven't completed, they're running
+ running = !!activeSuite && !completed;
+
+ // Current spec from the info element
+ var infoEl = document.querySelector(".alert-info");
+ if (infoEl && infoEl.textContent && infoEl.textContent.indexOf("Running ") === 0) {
+ currentSpec = infoEl.textContent.substring("Running ".length);
+ }
+
+ // Gather failures from reporter suites
+ var suiteNames = Object.keys(reporter.suites || {});
+ for (var i = 0; i < suiteNames.length; i++) {
+ var suite = reporter.suites[suiteNames[i]];
+ if (suite && suite.specs) {
+ for (var j = 0; j < suite.specs.length; j++) {
+ var spec = suite.specs[j];
+ if (spec && !spec.passed) {
+ var msgs = [];
+ if (spec.messages && spec.messages.length) {
+ for (var k = 0; k < spec.messages.length; k++) {
+ var m = spec.messages[k];
+ msgs.push(m.message || String(m));
+ }
+ }
+ failures.push({
+ suite: suite.name,
+ spec: spec.name,
+ messages: msgs
+ });
+ }
+ }
+ }
+ }
+ } else {
+ // No reporter yet — tests haven't loaded
+ running = false;
+ }
+
+ return {
+ running: running,
+ completed: completed,
+ passed: passed,
+ totalSpecCount: totalSpecCount,
+ totalPassedCount: totalPassedCount,
+ totalFailedCount: totalFailedCount,
+ activeSpecCompleteCount: activeSpecCompleteCount,
+ failures: failures,
+ currentSpec: currentSpec,
+ categories: categories,
+ activeSuite: activeSuite
+ };
+ }
+
+}());