Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import * as assert from 'assert'
import { unescapeHtml } from './textFormatting'

describe('textFormatting', () => {
describe('unescapeHtml', () => {
it('unescapes HTML entities', () => {
assert.strictEqual(unescapeHtml('&lt;div&gt;'), '<div>')
assert.strictEqual(unescapeHtml('&quot;hello&quot;'), '"hello"')
assert.strictEqual(unescapeHtml('&#39;world&#39;'), "'world'")
assert.strictEqual(unescapeHtml('foo &amp; bar'), 'foo & bar')
})

it('unescapes backslash-escaped angle brackets', () => {
assert.strictEqual(unescapeHtml('\\<tag\\>'), '<tag>')
assert.strictEqual(unescapeHtml('a \\< b \\> c'), 'a < b > c')
})

it('handles both HTML entities and backslash escaping together', () => {
const input = '[!@#$%^&amp;*()_+\\-=\\[\\]{}|;&#39;:&quot;,./\\<\\>?]'
const expected = '[!@#$%^&*()_+\\-=\\[\\]{}|;\':\",./<>?]'
assert.strictEqual(unescapeHtml(input), expected)
})

it('handles regex patterns with escaped characters', () => {
const input = "re.search(r'[!@#$%^&amp;*()_+\\-=\\[\\]{}|;&#39;:&quot;,./\\<\\>?]', password)"
const expected = "re.search(r'[!@#$%^&*()_+\\-=\\[\\]{}|;\':\",./<>?]', password)"
assert.strictEqual(unescapeHtml(input), expected)
})

it('returns unchanged text when no escaping is present', () => {
assert.strictEqual(unescapeHtml('hello world'), 'hello world')
assert.strictEqual(unescapeHtml('no special chars'), 'no special chars')
})

it('handles empty string', () => {
assert.strictEqual(unescapeHtml(''), '')
})

it('handles mixed content', () => {
assert.strictEqual(
unescapeHtml('Text with &lt;html&gt; and \\<escaped\\> brackets'),
'Text with <html> and <escaped> brackets'
)
})
})
})
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
import { ToolUse } from '@amzn/codewhisperer-streaming'

/**
* Unescapes HTML entities and backslash-escaped angle brackets in a string.
* This reverses:
* 1. HTML escaping done by escape-html: " → &quot;, & → &amp;, ' → &#39;, < → &lt;, > → &gt;
* 2. Backslash escaping of angle brackets that may appear in the LLM response: \< → <, \> → >
*/
export function unescapeHtml(text: string): string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use "unescape-html": "^1.1.0" also

declare module 'unescape-html' {
    function unescapeHTML(str: string): string
    export = unescapeHTML
}

Check this PR: https://github.com/aws/language-servers/pull/2360/files#diff-fbaaeff7c4b028ae726c46e4eea2adec92e5fd886d26d5eee21fba8dc67ecfe8L71

const htmlEntities: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
}
// First unescape HTML entities
let result = text.replace(/&(?:amp|lt|gt|quot|#39);/g, match => htmlEntities[match] || match)

// Then unescape backslash-escaped angle brackets (but only when they appear as \< or \>)
// This handles cases where the LLM returns literal backslash-escaped angle brackets
result = result.replace(/\\</g, '<').replace(/\\>/g, '>')

return result
}

function codeBlocked(s: string) {
const codeBlock = `\`\`\`\``

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { CommandValidation, ExplanatoryParams, InvokeOutput, requiresPathAccepta
import { EmptyPathError, EmptyDiffsError, FileNotExistsError, TextNotFoundError, MultipleMatchesError } from '../errors'
import { Features } from '@aws/language-server-runtimes/server-interface/server'
import { sanitize } from '@aws/lsp-core/out/util/path'
import { unescapeHtml } from '../textFormatting'
import * as os from 'os'

interface BaseParams extends ExplanatoryParams {
Expand Down Expand Up @@ -138,9 +139,13 @@ const getReplaceContent = (params: ReplaceParams, fileContent: string) => {
continue
}

// Unescape HTML entities in oldStr since the prompt was HTML-escaped before being sent to LLM
const unescapedOldStr = unescapeHtml(diff.oldStr)
const unescapedNewStr = unescapeHtml(diff.newStr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we sure this will not introduce any regression? This is a risky change, what is prompting us to make this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fsReplace failures because LLm is reading sanitized prompt. When users are sending code in prompt usingSend to prompt right click. we are sanitizing it with escapeHtml. When there are special characters in said code is making LLM retrun code with incorrect old str


// Normalize oldStr and newStr to match fileContent's line ending style
const normalizedOldStr = diff.oldStr.split(/\r\n|\r|\n/).join(lineEnding)
const normalizedNewStr = diff.newStr.split(/\r\n|\r|\n/).join(lineEnding)
const normalizedOldStr = unescapedOldStr.split(/\r\n|\r|\n/).join(lineEnding)
const normalizedNewStr = unescapedNewStr.split(/\r\n|\r|\n/).join(lineEnding)

// Use string indexOf and substring for safer replacement with special characters
const startIndex = fileContent.indexOf(normalizedOldStr)
Expand Down
Loading