From 6fd9d71fb20838d250b219642cac1f40064e0d70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Rzepecki?= <divided.mind@gmail.com>
Date: Wed, 9 Apr 2025 13:22:19 +0100
Subject: [PATCH 1/2] fix: Accept simpler schema in @observe command

Gemini has some issues with consistently generating the required
schema, so accept a simpler schema too.
---
 .../navie/src/commands/observe-command.ts     | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/packages/navie/src/commands/observe-command.ts b/packages/navie/src/commands/observe-command.ts
index 32d3065dcf..9e57540260 100644
--- a/packages/navie/src/commands/observe-command.ts
+++ b/packages/navie/src/commands/observe-command.ts
@@ -38,10 +38,15 @@ const RelevantTest = z.object({
     .describe('An ordered list of terminal command(s) necessary to execute to install AppMap'),
   testCommands: z
     .array(
-      z.object({
-        command: z.string().describe('The command to execute'),
-        description: z.string().optional().describe('A description of the command'),
-      })
+      z
+        .union([
+          z.object({
+            command: z.string().describe('The command to execute'),
+            description: z.string().optional().describe('A description of the command'),
+          }),
+          z.string().describe('A command to execute'),
+        ])
+        .describe('The command to execute')
     )
     .optional()
     .describe('The ordered list of terminal command(s) that can be executed to run the test'),
@@ -205,7 +210,7 @@ ${
   testCommands?.length
     ? `I've identified the following commands that you may need to run to execute the test:
 <commands>
-${testCommands?.map((command) => `- \`${command.command}\`: ${command.description}`).join('\n')}
+${testCommands?.map((command) => `- ${commandDescription(command)}`).join('\n')}
 </commands>
 `
     : ''
@@ -250,3 +255,10 @@ Do not include:
     }
   }
 }
+
+function commandDescription(command: string | { command: string; description?: string }): string {
+  if (typeof command === 'string') {
+    return `\`${command}\``;
+  }
+  return `\`${command.command}\`: ${command.description ?? ''}`;
+}

From cae623f1dfcd06053bd6ff8ad029db9626d5abe4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Rzepecki?= <divided.mind@gmail.com>
Date: Thu, 24 Apr 2025 14:45:45 +0200
Subject: [PATCH 2/2] feat: Observe command can now suggest creating new tests

Other improvements:
- enhance UserOptions constructor to accept both Map and object formats;
- add clone method to InteractionHistory;
- implement closingTags function for unclosed XML tags;
- introduce replaceStream for async string replacement in streams;
- update ExplainCommand to handle optional classifierService.
---
 .../navie/src/commands/explain-command.ts     |   9 +-
 .../navie/src/commands/observe-command.ts     | 143 +++++++++++++++---
 packages/navie/src/interaction-history.ts     |  15 ++
 packages/navie/src/lib/closing-tags.ts        |  33 ++++
 packages/navie/src/lib/parse-options.ts       |  16 +-
 packages/navie/src/lib/replace-stream.ts      |  35 +++++
 .../test/commands/observe-command.spec.ts     |  28 +++-
 packages/navie/test/lib/closing-tags.spec.ts  |  35 +++++
 8 files changed, 281 insertions(+), 33 deletions(-)
 create mode 100644 packages/navie/src/lib/closing-tags.ts
 create mode 100644 packages/navie/src/lib/replace-stream.ts
 create mode 100644 packages/navie/test/lib/closing-tags.spec.ts

diff --git a/packages/navie/src/commands/explain-command.ts b/packages/navie/src/commands/explain-command.ts
index 5208f66326..b96b9df7d0 100644
--- a/packages/navie/src/commands/explain-command.ts
+++ b/packages/navie/src/commands/explain-command.ts
@@ -34,8 +34,11 @@ export default class ExplainCommand implements Command {
     private readonly options: ExplainOptions,
     public readonly interactionHistory: InteractionHistory,
     private readonly completionService: CompletionService,
-    private readonly classifierService: ClassificationService,
-    private readonly agentSelectionService: AgentSelectionService,
+    private readonly classifierService: ClassificationService | undefined,
+    private readonly agentSelectionService: Pick<
+      AgentSelectionService,
+      'selectAgent' | 'contextService'
+    >,
     private readonly codeSelectionService: CodeSelectionService,
     private readonly projectInfoService: ProjectInfoService,
     private readonly memoryService: MemoryService
@@ -52,7 +55,7 @@ export default class ExplainCommand implements Command {
     let contextLabelsFn: Promise<ContextV2.ContextLabel[]> | undefined;
 
     performance.mark('classifyStart');
-    if (classifyEnabled)
+    if (classifyEnabled && this.classifierService)
       contextLabelsFn = this.classifierService
         .classifyQuestion(baseQuestion, chatHistory)
         .catch((err) => {
diff --git a/packages/navie/src/commands/observe-command.ts b/packages/navie/src/commands/observe-command.ts
index 9e57540260..b71e22045b 100644
--- a/packages/navie/src/commands/observe-command.ts
+++ b/packages/navie/src/commands/observe-command.ts
@@ -1,32 +1,63 @@
 import { z } from 'zod';
 
+import { AgentMode } from '../agent';
+import TestAgent from '../agents/test-agent';
 import type Command from '../command';
 import type { CommandRequest } from '../command';
-import CompletionService from '../services/completion-service';
-import LookupContextService from '../services/lookup-context-service';
-import VectorTermsService from '../services/vector-terms-service';
 import { ContextV2 } from '../context';
-import { ExplainOptions } from './explain-command';
-import Message from '../message';
 import InteractionHistory, {
   CompletionEvent,
   PromptInteractionEvent,
 } from '../interaction-history';
+import closingTags from '../lib/closing-tags';
 import { UserOptions } from '../lib/parse-options';
+import replaceStream from '../lib/replace-stream';
+import Message from '../message';
+import ApplyContextService from '../services/apply-context-service';
+import CodeSelectionService from '../services/code-selection-service';
+import CompletionService from '../services/completion-service';
+import ContextService from '../services/context-service';
+import FileChangeExtractorService from '../services/file-change-extractor-service';
+import LookupContextService from '../services/lookup-context-service';
+import { NaiveMemoryService } from '../services/memory-service';
 import ProjectInfoService from '../services/project-info-service';
+import VectorTermsService from '../services/vector-terms-service';
+
+import ExplainCommand, { ExplainOptions } from './explain-command';
 
 const RelevantTest = z.object({
   relevantTest: z
     .object({
-      name: z.string().optional().describe('The name of the test case, if known'),
-      path: z.string().describe('The file path of the test file'),
+      name: z
+        .string()
+        .optional()
+        .describe('The name of the test case, if known')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
+      path: z
+        .string()
+        .describe('The file path of the test file')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
       language: z
         .enum(['ruby', 'python', 'java', 'javascript', 'other'])
         .describe('The programming language of the test file'),
-      framework: z.string().optional().describe('The test framework used'),
+      framework: z
+        .string()
+        .optional()
+        .describe('The test framework used')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
     })
     .optional()
+    .nullable()
     .describe('A descriptor of the most relevant test to the requested behavior'),
+  suggestedTest: z
+    .string()
+    .optional()
+    .nullable()
+    .transform((value) => (value === null ? undefined : value))
+    .describe('A suggested test case to write, if no relevant test is found'),
   installCommands: z
     .array(
       z.object({
@@ -126,6 +157,9 @@ export default class ObserveCommand implements Command {
       {
         role: 'system',
         content: `Given the following code snippets, identify the single most relevant test to the user request.
+          If no test seems relevant, suggest a test case to write instead. Do not provide test case code, just describe it in detail.
+          The test case should be relevant to the user request, and ideally, it should be written in the same language as the code snippets provided.
+          Do suggest a name and path for the test case and take it into account when generating the test run instructions.
 
 ${projectLanguageDirective}
 
@@ -156,24 +190,83 @@ ${context.join('\n')}
     return result;
   }
 
+  private async *suggestTestCase(
+    suggestedTest: string,
+    userOptions: UserOptions,
+    history: InteractionHistory
+  ): AsyncIterable<string> {
+    const applyContextService = new ApplyContextService(history);
+    const contextService = new ContextService(
+      history,
+      this.vectorTermsService,
+      this.lookupContextService,
+      applyContextService
+    );
+    const testAgent = new TestAgent(
+      history,
+      contextService,
+      new FileChangeExtractorService(history, this.completionService)
+    );
+    // create an ExplainCommand
+    const explainCommand = new ExplainCommand(
+      this.options,
+      history,
+      this.completionService,
+      undefined,
+      {
+        selectAgent: () => ({
+          agentMode: AgentMode.Test,
+          agent: testAgent,
+          question: suggestedTest,
+        }),
+        contextService,
+      },
+      new CodeSelectionService(history),
+      this.projectInfoService,
+      NaiveMemoryService
+    );
+
+    // call the explainCommand with the suggested test
+    const commandRequest: CommandRequest = {
+      question: suggestedTest + '\n\nOnly include code, no explanations.',
+      userOptions: new UserOptions({
+        ...userOptions,
+        format: 'xml',
+        classify: false,
+        tokenlimit: String(userOptions.numberValue('tokenlimit') || this.options.tokenLimit),
+      }),
+    };
+
+    const explainCommandResponse: string[] = [];
+    yield '\n';
+    for await (const token of explainCommand.execute(commandRequest)) {
+      yield token;
+      explainCommandResponse.push(token);
+    }
+
+    yield closingTags(explainCommandResponse.join('').trim());
+
+    yield '\n';
+  }
+
   async *execute({ question: userRequest, userOptions }: CommandRequest): AsyncIterable<string> {
     const vectorTerms = await this.vectorTermsService.suggestTerms(userRequest);
     const tokenLimit = userOptions.numberValue('tokenlimit') || this.options.tokenLimit;
     const testSnippets = await this.getTestSnippets(vectorTerms, tokenLimit);
     const result = await this.getMostRelevantTest(userRequest, userOptions, testSnippets);
-    const { relevantTest, installCommands, testCommands } = result || {};
-    if (!relevantTest) {
-      yield 'Sorry, I could not find any relevant tests to record.';
-      return;
-    }
+    const { relevantTest, installCommands, testCommands, suggestedTest } = result || {};
+
+    const history = this.interactionHistory.clone();
 
-    if (relevantTest.language === 'other') {
+    if (relevantTest?.language === 'other') {
       yield `I found a relevant test at \`${relevantTest.path}\`, but I'm unable to help you record it at this time. This language does not appear to be supported.`;
       return;
     }
 
     const helpDocs = await this.lookupContextService.lookupHelp(
-      ['record', 'agent', 'tests', relevantTest.framework].filter(Boolean) as string[],
+      ['record', 'agent', 'tests', relevantTest?.language, relevantTest?.framework].filter(
+        Boolean
+      ) as string[],
       tokenLimit
     );
 
@@ -191,11 +284,15 @@ ${userRequest}
       },
       {
         role: 'assistant',
-        content: `Based on the request, the most relevant test case is:
+        content:
+          (relevantTest?.path
+            ? `Based on the request, the most relevant test case is:
 ${relevantTest.name ? `**Name:** \`${relevantTest.name}\`` : ''}
 ${relevantTest.framework ? `**Framework:** \`${relevantTest.framework}\`` : ''}
 ${relevantTest.language ? `**Language:** \`${relevantTest.language}\`` : ''}
-**Path:** \`${relevantTest.path}\`
+**Path:** \`${relevantTest.path}\``
+            : `Based on the request, a ${relevantTest?.language} ${relevantTest?.framework} test case needs to be created first:\n${suggestedTest}\n\n`) +
+          `
 
 ${
   installCommands?.length
@@ -227,11 +324,11 @@ ${helpDocs
       },
       {
         role: 'user',
-        content: `Restate the information you've provided to me, in standalone format, as a step by step guide outlining the steps required to record the single test case that you've identified.
+        content: `Restate the information you've provided to me, in standalone format, as a step by step guide outlining the steps required to record the single test case that you've identified or suggested creating.
 If possible, include the terminal command needed to run the test. Only specify test patterns that are guaranteed to match based on previous context. For example, do not include file ranges not supported by the test runner.
 In your response, please include the following:
-- The name of the test case (if known)
-- The path to the test file
+- If an existing test was found, indicate the test case name and path
+- Otherwise, steps and suggested location to create it (don't generate code itself, instead use <generated-test-case /> placeholder — DO NOT surround it with code fences)
 - Any steps and terminal commands required to install the AppMap recording agent
 - Any steps and terminal commands required to run the specific test case
 
@@ -250,9 +347,9 @@ Do not include:
     );
     const completion = this.completionService.complete(messages, { temperature });
 
-    for await (const token of completion) {
-      yield token;
-    }
+    yield* replaceStream(completion, '<generated-test-case />', () =>
+      this.suggestTestCase(suggestedTest ?? '', userOptions, history)
+    );
   }
 }
 
diff --git a/packages/navie/src/interaction-history.ts b/packages/navie/src/interaction-history.ts
index 79be2bd381..ed01b769e7 100644
--- a/packages/navie/src/interaction-history.ts
+++ b/packages/navie/src/interaction-history.ts
@@ -316,6 +316,21 @@ class InteractionHistory extends EventEmitter {
     this.events.push(event);
   }
 
+  /**
+   * Clone the interaction history.
+   * This is useful for creating a copy of the interaction history
+   * that can be modified without affecting the original.
+   * @note this is a shallow copy, so the events are not cloned.
+   * This means that modifying the events in the clone will also modify the events in the original.
+   * @returns a shallow copy of the interaction history
+   */
+  clone(): InteractionHistory {
+    const clone = new InteractionHistory();
+    clone.events.push(...this.events);
+    clone.acceptPinnedFileContext = this.acceptPinnedFileContext;
+    return clone;
+  }
+
   stopAcceptingPinnedFileContext() {
     this.acceptPinnedFileContext = false;
   }
diff --git a/packages/navie/src/lib/closing-tags.ts b/packages/navie/src/lib/closing-tags.ts
new file mode 100644
index 0000000000..01baf53363
--- /dev/null
+++ b/packages/navie/src/lib/closing-tags.ts
@@ -0,0 +1,33 @@
+/** Scan the text for any unclosed XML tags and returns the closing tags to be appended. */
+export default function closingTags(text: string): string {
+  // Stack to keep track of open tags
+  const openTags: string[] = [];
+
+  // Match opening and self-closing tags
+  const tagRegex = /<(\/)?([a-zA-Z]+)([^>]*?)(\/)?>/g;
+
+  for (const match of text.matchAll(tagRegex)) {
+    const [, close, tagName, attributes, selfClosing] = match;
+
+    // Skip self-closing tags
+    if (selfClosing) continue;
+
+    // Check if this is a closing tag
+    if (close || attributes.trim().startsWith('/')) {
+      // Found a closing tag, remove the matching opening tag if it exists
+      const lastOpenTag = openTags[openTags.length - 1];
+      if (lastOpenTag === tagName) {
+        openTags.pop();
+      }
+    } else {
+      // Found an opening tag, push it onto the stack
+      openTags.push(tagName);
+    }
+  }
+
+  // Generate closing tags for any remaining open tags in reverse order
+  return openTags
+    .reverse()
+    .map((tag) => `</${tag}>`)
+    .join('');
+}
diff --git a/packages/navie/src/lib/parse-options.ts b/packages/navie/src/lib/parse-options.ts
index faa8a1295a..c5d8bc7112 100644
--- a/packages/navie/src/lib/parse-options.ts
+++ b/packages/navie/src/lib/parse-options.ts
@@ -1,7 +1,21 @@
 import { ContextV2 } from '../context';
 
 export class UserOptions {
-  constructor(private options: Map<string, string | boolean>) {}
+  private options: Map<string, string | boolean>;
+  constructor(options: Map<string, string | boolean> | Record<string, string | boolean>) {
+    this.options = new Map<string, string | boolean>();
+    if (options instanceof Map) {
+      this.options = options;
+    } else {
+      Object.entries(options).forEach(([key, value]) => {
+        this.options.set(key.toLowerCase(), value);
+      });
+    }
+  }
+
+  [Symbol.iterator](): IterableIterator<[string, string | boolean]> {
+    return this.options.entries();
+  }
 
   has(key: string): boolean {
     return this.options.has(key);
diff --git a/packages/navie/src/lib/replace-stream.ts b/packages/navie/src/lib/replace-stream.ts
new file mode 100644
index 0000000000..8c53423403
--- /dev/null
+++ b/packages/navie/src/lib/replace-stream.ts
@@ -0,0 +1,35 @@
+import splitOn from './split-on';
+
+/**
+ * Replace a needle in a stream with a replacement function.
+ * The replacement function is called with the found string and returns an async iterable of strings.
+ * The replacement function can be async, and will be awaited.
+ * The replacement function can yield multiple strings.
+ * The replacement function can be called multiple times if the needle is found multiple times.
+ * @param source the source stream
+ * @param needle the string or regex to search
+ * @param replacement the replacement function
+ */
+export default async function* replaceStream(
+  source: AsyncIterable<string>,
+  needle: string | RegExp,
+  replacement: (found: string) => AsyncIterable<string>
+): AsyncIterable<string> {
+  let buffer = '';
+  for await (const chunk of source) {
+    buffer += chunk;
+    while (buffer) {
+      const [before, found, after] = splitOn(buffer, needle);
+      yield before;
+      if (found) {
+        if (after) {
+          yield* replacement(found);
+        } else {
+          buffer = found;
+          break;
+        }
+      }
+      buffer = after;
+    }
+  }
+}
diff --git a/packages/navie/test/commands/observe-command.spec.ts b/packages/navie/test/commands/observe-command.spec.ts
index a8d41dae10..37b12d37c5 100644
--- a/packages/navie/test/commands/observe-command.spec.ts
+++ b/packages/navie/test/commands/observe-command.spec.ts
@@ -107,19 +107,35 @@ describe('ObserveCommand', () => {
     expect(lookupContextService.lookupHelp).toBeCalledTimes(1);
   });
 
-  it('exits early if no test is identified', async () => {
+  it('suggests a test if no test is identified', async () => {
     lookupContextService.lookupContext = jest.fn().mockResolvedValue([]);
-    completionService.json = jest.fn().mockReturnValueOnce(undefined);
-    lookupContextService.lookupHelp = jest.fn();
+    completionService.json = jest.fn().mockReturnValueOnce({
+      suggestedTest: 'Write a test for the observe command that handles missing tests',
+      relevantTest: { language: 'javascript' },
+    });
+    completionService.complete = jest
+      .fn()
+      .mockImplementationOnce(function* () {
+        yield '1. Create the following test:\n<generated-test-case />\n2. Run the test';
+      })
+      .mockImplementation(function* () {
+        yield 'Generated test case code';
+      });
+
     const result = await read(
       command.execute({
         question: 'what?',
         userOptions: new UserOptions(new Map()),
       })
     );
-    expect(result).toEqual('Sorry, I could not find any relevant tests to record.');
-    expect(lookupContextService.lookupHelp).not.toBeCalled();
-    expect(completionService.complete).not.toBeCalled();
+    expect(result).toMatchInlineSnapshot(`
+      "1. Create the following test:
+
+      Generated test case code
+
+      2. Run the test"
+    `);
+    expect(completionService.complete).toHaveBeenCalledTimes(4);
   });
 
   it('exits early if the language is not supported', async () => {
diff --git a/packages/navie/test/lib/closing-tags.spec.ts b/packages/navie/test/lib/closing-tags.spec.ts
new file mode 100644
index 0000000000..f2d5ecb5e6
--- /dev/null
+++ b/packages/navie/test/lib/closing-tags.spec.ts
@@ -0,0 +1,35 @@
+import closingTags from '../../src/lib/closing-tags';
+
+describe('closingTags', () => {
+  it('returns empty string for no unclosed tags', () => {
+    expect(closingTags('<tag></tag>')).toBe('');
+  });
+
+  it('returns closing tag for single unclosed tag', () => {
+    expect(closingTags('<tag>')).toBe('</tag>');
+  });
+
+  it('handles multiple unclosed tags', () => {
+    expect(closingTags('<outer><inner>')).toBe('</inner></outer>');
+  });
+
+  it('handles self-closing tags', () => {
+    expect(closingTags('<outer><self/><inner>')).toBe('</inner></outer>');
+  });
+
+  it('handles properly closed inner tags', () => {
+    expect(closingTags('<outer><inner></inner>')).toBe('</outer>');
+  });
+
+  it('handles mixed closed and unclosed tags', () => {
+    expect(closingTags('<a><b></b><c>')).toBe('</c></a>');
+  });
+
+  it('handles tags with attributes', () => {
+    expect(closingTags('<div class="test"><span id="inner">')).toBe('</span></div>');
+  });
+
+  it('returns empty string for text without tags', () => {
+    expect(closingTags('plain text')).toBe('');
+  });
+});