Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
477f175
Fix UTF8 encoding of invalid character
alberk8 Jun 30, 2025
a038bea
Add missing IndexOf
alberk8 Jun 30, 2025
0d081a1
Remove the INT_MAX and replace with 0x7fffffff
alberk8 Jun 30, 2025
1c9b25a
Replace INT_MAX with 0x7FFFFFFF
alberk8 Jun 30, 2025
3756002
Update src/CLR/Core/CLR_RT_UnicodeHelper.cpp
alberk8 Jun 30, 2025
9a72e87
Update src/CLR/Core/CLR_RT_UnicodeHelper.cpp
alberk8 Jun 30, 2025
213b06f
Fix Duplicate declaration by Coderabbitai and remove unwanted comments.
alberk8 Jun 30, 2025
6f2d785
Code style fixes
nfbot Jun 30, 2025
e3a2aef
Add curly brackets to lone if statement
alberk8 Jul 1, 2025
de4ab77
Fix formating sytle and revert some comments to orginal
alberk8 Jul 1, 2025
51c0343
CodeRabbit recommended fix and more Code Style fix
alberk8 Jul 1, 2025
8e7ec96
Code style fixes
nfbot Jul 1, 2025
4d6582e
Code style fixes
nfbot Jul 1, 2025
3d9c8e2
Code style fixes
nfbot Jul 24, 2025
3078ad2
Resolve formating
alberk8 Jul 24, 2025
6c77666
Code style fixes
nfbot Jul 24, 2025
ee20438
- Add back several comments to the code (as this is a complex algorit…
josesimoes Aug 11, 2025
884b4b8
- Revert changes, unrelated with this PR.
josesimoes Aug 11, 2025
4a98b19
- Revert changes, unrelated with this PR.
josesimoes Aug 11, 2025
310056e
Fix line ending
josesimoes Aug 11, 2025
8787303
Fix formatting
josesimoes Aug 11, 2025
0904a05
Fix line endings
josesimoes Aug 11, 2025
9db45e9
Remove static from MatchString
alberk8 Aug 12, 2025
9cab2bb
Code style fixes
nfbot Aug 12, 2025
bfa6da0
Merge remote-tracking branch 'upstream/nfbot/clang-format-fix/99cbb52…
josesimoes Aug 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 106 additions & 98 deletions src/CLR/CorLib/corlib_native_System_String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,59 @@ HRESULT Library_corlib_native_System_String::ToCharArray(CLR_RT_StackFrame &stac
NANOCLR_NOCLEANUP();
}

// Helper function for comparing UTF-8 substrings
bool MatchString(CLR_RT_UnicodeHelper &inputIter, const char *searchStr, int searchCharLen)
{
// Create copies to preserve original iterator state
CLR_RT_UnicodeHelper inputCopy = inputIter;
CLR_RT_UnicodeHelper searchIter;
searchIter.SetInputUTF8(searchStr);

for (int i = 0; i < searchCharLen; i++)
{
CLR_UINT16 bufInput[3] = {0};
CLR_UINT16 bufSearch[3] = {0};

// Set up buffers for character conversion
inputCopy.m_outputUTF16 = bufInput;
inputCopy.m_outputUTF16_size = MAXSTRLEN(bufInput);
searchIter.m_outputUTF16 = bufSearch;
searchIter.m_outputUTF16_size = MAXSTRLEN(bufSearch);

// Convert next character from input
if (!inputCopy.ConvertFromUTF8(1, false))
{
// Input ended prematurely
return false;
}

// Convert next character from search string
if (!searchIter.ConvertFromUTF8(1, false))
{
// Shouldn't happen for valid search string
return false;
}

// Compare first UTF-16 code unit
if (bufInput[0] != bufSearch[0])
{
return false;
}

// Handle surrogate pairs (4-byte UTF-8 sequences)
if (bufInput[0] >= 0xD800 && bufInput[0] <= 0xDBFF)
{
// High surrogate
if (bufInput[1] != bufSearch[1])
{
// Low surrogate mismatch
return false;
}
}
}
return true;
}

HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, int mode)
{
NATIVE_PROFILE_CLR_CORE();
Expand All @@ -594,8 +647,8 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
int startIndex;
int count;
int pos;
const char *pString;
const CLR_UINT16 *pChars;
const char *pString = NULL;
const CLR_UINT16 *pChars = NULL;
int iChars = 0;
CLR_RT_UnicodeHelper inputIterator;
int inputLen;
Expand All @@ -605,8 +658,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
if (!szText)
szText = "";
pos = -1;
pString = NULL;
pChars = NULL;

if (mode & c_IndexOf__SingleChar)
{
Expand All @@ -617,23 +668,20 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
{
CLR_RT_HeapBlock_Array *array = stack.Arg1().DereferenceArray();
FAULT_ON_NULL(array);

pChars = (const CLR_UINT16 *)array->GetFirstElement();
iChars = array->m_numOfElements;
}
else if (mode & c_IndexOf__String)
{
pString = stack.Arg1().RecoverString();
FAULT_ON_NULL(pString);
// how long is the search string?
inputIterator.SetInputUTF8(pString);
searchLen = inputIterator.CountNumberOfCharacters();
}

// calculate input string length
// Calculate input length
inputIterator.SetInputUTF8(szText);
inputLen = inputIterator.CountNumberOfCharacters();

if (0 == inputLen)
{
pos = -1;
Expand All @@ -647,7 +695,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
}
else
{
// for mode LastIndex... we are searching backwards toward the start of the string
if (mode & c_IndexOf__Last)
{
startIndex = inputLen - 1;
Expand All @@ -663,49 +710,53 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
NANOCLR_SET_AND_LEAVE(CLR_E_OUT_OF_RANGE);

// for mode LastIndex... with string we move the start index back by the search string length -1
// if we search forward
if ((mode & c_IndexOf__String_Last) == c_IndexOf__String_Last)
{
startIndex -= searchLen - 1;
// check the start index; if not in range skip the search
// check the start index; if not in range, skip the search
if (startIndex < 0 || startIndex > inputLen)
{
goto Exit;
}
}

// calculate the iteration count
if (mode & c_IndexOf__Count)
{
// count form parameter
// count (from parameter)
count = stack.Arg3().NumericByRefConst().s4;
}
else
{
// for mode LastIndex... we are searching from start index backwards toward the start of the string
if (mode & c_IndexOf__Last)
{
// backward until the start of string
// one more time than the startIndex because we should iterate until zero
// backwards until the start of the string
// one position ahead of the startIndex because we should iterate until position zero
count = startIndex + 1;
}
else
{
// forward until the end of string
// move forward until reaching the end of the string
count = inputLen - startIndex;
}
}

// for mode with string we reduce the count by the search string length -1
// if we search foreward
// forward search with index of string mode: adjust the count by the search string length -1
if ((mode & c_IndexOf__String_Last) == c_IndexOf__String)
{
count -= searchLen - 1;
}

// check the count
// validate count
if (mode & c_IndexOf__Last)
{
// check for backward mode; no exception; just exit
if (count > startIndex + 1)
{
goto Exit;
}
}
else
{
Expand All @@ -717,132 +768,87 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
// First move to the character, then read it.
if (inputIterator.ConvertFromUTF8(startIndex, true))
{
// string mode?
// String search mode
if (pString)
{
// iterate thru all positions
while (count-- > 0)
{
CLR_RT_UnicodeHelper inputString;
inputString.SetInputUTF8((const char *)inputIterator.m_inputUTF8);
CLR_RT_UnicodeHelper searchString;
searchString.SetInputUTF8(pString);
bool finished = false;

while (true)
{
CLR_UINT16 bufInput[3];
CLR_UINT16 bufSearch[3];

inputString.m_outputUTF16 = bufInput;
inputString.m_outputUTF16_size = MAXSTRLEN(bufInput);

searchString.m_outputUTF16 = bufSearch;
searchString.m_outputUTF16_size = MAXSTRLEN(bufSearch);

// read next char from search string; if no more chars to read (false)
// then we are done and found the search string in the input string
if (searchString.ConvertFromUTF8(1, false) == false)
{
pos = startIndex;
finished = true;
break;
}

// read the next char from the input string; if no more chars to read (false)
// we didn't found the search string in the input string; we abort the search now
if (inputString.ConvertFromUTF8(1, false) == false)
{
finished = true;
break;
}

// does the char from input not match the char from the search string
if (bufInput[0] != bufSearch[0])
{
// next iteration round but not finished
break;
}
}

// finished (with or without a found) then break
if (finished)
// Use helper for proper UTF-8 comparison
if (MatchString(inputIterator, pString, searchLen))
{
pos = startIndex;
break;
}

// reading forward or backward
// Move to next candidate position (both forward or backward reading)
if (mode & c_IndexOf__Last)
{
startIndex--;
// move one chars backward
if (inputIterator.MoveBackwardInUTF8(szText, 1) == false)
// move backwards one char
if (!inputIterator.MoveBackwardInUTF8(szText, 1))
{
break;
}
}
else
{
startIndex++;
// move to the next char
if (inputIterator.ConvertFromUTF8(1, true) == false)
// move forward to the next char
if (!inputIterator.ConvertFromUTF8(1, true))
{
break;
}
}
}
}

// char mode?
if (pChars)
// Character search mode
else if (pChars)
{
// iterate thru all positions
// iterate through all positions
while (count-- > 0)
{
CLR_UINT16 buf[3];
CLR_UINT16 buf[3] = {0};

inputIterator.m_outputUTF16 = buf;
inputIterator.m_outputUTF16_size = MAXSTRLEN(buf);

// read the next char from the input string; if no more chars to read (false)
// we didn't found the search chars in the input string
if (inputIterator.ConvertFromUTF8(1, false) == false)
// the search chars weren't found in the input string
if (!inputIterator.ConvertFromUTF8(1, false))
{
break;
}

// test each search char if it's a match
// test each search char for a match
for (int i = 0; i < iChars; i++)
{
// match?
if (buf[0] == pChars[i])
{
// position found!
// found position for next char
pos = startIndex;
break;
}
}

// found? => break
// didn't find any, break
if (pos != -1)
{
break;
}

// for mode LastIndex... we are searching from start index backwards toward the start of the string
// for search mode LastIndex: we are searching from start index backwards toward the start of the string
if (mode & c_IndexOf__Last)
{
// in backward mode
// backwards mode
startIndex--;
// move two chars backward, because the current char is already read
if (inputIterator.MoveBackwardInUTF8(szText, 2) == false)
{
// have to move two chars backwards, because the current char is already read
if (!inputIterator.MoveBackwardInUTF8(szText, 2))
break;
}
}
else
{
// forward mode; simple advance the start index
// forward mode: just advance the start index
startIndex++;
}
}
Expand All @@ -851,7 +857,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i

Exit:
stack.SetResult_I4(pos);

NANOCLR_NOCLEANUP();
}

Expand Down Expand Up @@ -889,10 +894,11 @@ HRESULT Library_corlib_native_System_String::ChangeCase(CLR_RT_StackFrame &stack
*ptr++ = c;
}

NANOCLR_CHECK_HRESULT(CLR_RT_HeapBlock_String::CreateInstance(
stack.PushValue(),
(CLR_UINT16 *)arrayTmp->GetFirstElement(),
arrayTmp->m_numOfElements));
NANOCLR_CHECK_HRESULT(
CLR_RT_HeapBlock_String::CreateInstance(
stack.PushValue(),
(CLR_UINT16 *)arrayTmp->GetFirstElement(),
arrayTmp->m_numOfElements));

NANOCLR_NOCLEANUP();
}
Expand Down Expand Up @@ -923,10 +929,11 @@ HRESULT Library_corlib_native_System_String::Substring(CLR_RT_StackFrame &stack,
NANOCLR_SET_AND_LEAVE(CLR_E_OUT_OF_RANGE);
}

NANOCLR_CHECK_HRESULT(CLR_RT_HeapBlock_String::CreateInstance(
stack.PushValue(),
(CLR_UINT16 *)arrayTmp->GetElement(startIndex),
length));
NANOCLR_CHECK_HRESULT(
CLR_RT_HeapBlock_String::CreateInstance(
stack.PushValue(),
(CLR_UINT16 *)arrayTmp->GetElement(startIndex),
length));

NANOCLR_NOCLEANUP();
}
Expand Down Expand Up @@ -1102,10 +1109,11 @@ HRESULT Library_corlib_native_System_String::Split(CLR_RT_StackFrame &stack, CLR
{
CLR_RT_HeapBlock *str = (CLR_RT_HeapBlock *)arrayDst->GetElement(count);

NANOCLR_CHECK_HRESULT(CLR_RT_HeapBlock_String::CreateInstance(
*str,
pSrcStart,
(CLR_UINT32)(pSrc - pSrcStart)));
NANOCLR_CHECK_HRESULT(
CLR_RT_HeapBlock_String::CreateInstance(
*str,
pSrcStart,
(CLR_UINT32)(pSrc - pSrcStart)));

pSrcStart = pSrc + 1;
}
Expand Down
Loading