Skip to content

Conversation

@gardonkoo
Copy link

Hi there!
I noticed that case_insensitive_strcmp is a frequently used function, which is always called when searching for cJSON items.
I found that this function can be further optimized. Below is a comparison of the assembly instructions of the two implementations.

Original implementation:

0x0000555555555149 <+0>:     endbr64
0x000055555555514d <+4>:     push   %rbp
0x000055555555514e <+5>:     mov    %rsp,%rbp
0x0000555555555151 <+8>:     push   %rbx
0x0000555555555152 <+9>:     sub    $0x18,%rsp
0x0000555555555156 <+13>:    mov    %rdi,-0x18(%rbp)
0x000055555555515a <+17>:    mov    %rsi,-0x20(%rbp)
=> 0x000055555555515e <+21>:    cmpq   $0x0,-0x18(%rbp)
0x0000555555555163 <+26>:    je     0x55555555516c <case_insensitive_strcmp+35>
0x0000555555555165 <+28>:    cmpq   $0x0,-0x20(%rbp)
0x000055555555516a <+33>:    jne    0x555555555173 <case_insensitive_strcmp+42>
0x000055555555516c <+35>:    mov    $0x1,%eax
0x0000555555555171 <+40>:    jmp    0x5555555551f2 <case_insensitive_strcmp+169>
0x0000555555555173 <+42>:    mov    -0x18(%rbp),%rax
0x0000555555555177 <+46>:    cmp    -0x20(%rbp),%rax
0x000055555555517b <+50>:    jne    0x5555555551a0 <case_insensitive_strcmp+87>
0x000055555555517d <+52>:    mov    $0x0,%eax
0x0000555555555182 <+57>:    jmp    0x5555555551f2 <case_insensitive_strcmp+169>
0x0000555555555184 <+59>:    mov    -0x18(%rbp),%rax
0x0000555555555188 <+63>:    movzbl (%rax),%eax
0x000055555555518b <+66>:    test   %al,%al
0x000055555555518d <+68>:    jne    0x555555555196 <case_insensitive_strcmp+77>
0x000055555555518f <+70>:    mov    $0x0,%eax
0x0000555555555194 <+75>:    jmp    0x5555555551f2 <case_insensitive_strcmp+169>
0x0000555555555196 <+77>:    addq   $0x1,-0x18(%rbp)
0x000055555555519b <+82>:    addq   $0x1,-0x20(%rbp)
0x00005555555551a0 <+87>:    mov    -0x18(%rbp),%rax
0x00005555555551a4 <+91>:    movzbl (%rax),%eax
0x00005555555551a7 <+94>:    movzbl %al,%eax
0x00005555555551aa <+97>:    mov    %eax,%edi
0x00005555555551ac <+99>:    call   0x555555555050 <tolower@plt>
0x00005555555551b1 <+104>:   mov    %eax,%ebx
0x00005555555551b3 <+106>:   mov    -0x20(%rbp),%rax
0x00005555555551b7 <+110>:   movzbl (%rax),%eax
0x00005555555551ba <+113>:   movzbl %al,%eax
0x00005555555551bd <+116>:   mov    %eax,%edi
0x00005555555551bf <+118>:   call   0x555555555050 <tolower@plt>
0x00005555555551c4 <+123>:   cmp    %eax,%ebx
0x00005555555551c6 <+125>:   je     0x555555555184 <case_insensitive_strcmp+59>
0x00005555555551c8 <+127>:   mov    -0x18(%rbp),%rax
0x00005555555551cc <+131>:   movzbl (%rax),%eax
0x00005555555551cf <+134>:   movzbl %al,%eax
0x00005555555551d2 <+137>:   mov    %eax,%edi
0x00005555555551d4 <+139>:   call   0x555555555050 <tolower@plt>
0x00005555555551d9 <+144>:   mov    %eax,%ebx
0x00005555555551db <+146>:   mov    -0x20(%rbp),%rax
0x00005555555551df <+150>:   movzbl (%rax),%eax
0x00005555555551e2 <+153>:   movzbl %al,%eax
0x00005555555551e5 <+156>:   mov    %eax,%edi
0x00005555555551e7 <+158>:   call   0x555555555050 <tolower@plt>
0x00005555555551ec <+163>:   mov    %eax,%edx
0x00005555555551ee <+165>:   mov    %ebx,%eax
0x00005555555551f0 <+167>:   sub    %edx,%eax
0x00005555555551f2 <+169>:   mov    -0x8(%rbp),%rbx
0x00005555555551f6 <+173>:   leave
0x00005555555551f7 <+174>:   ret

New implementation:

0x0000000000001149 <+0>:     endbr64
0x000000000000114d <+4>:     push   %rbp
0x000000000000114e <+5>:     mov    %rsp,%rbp
0x0000000000001151 <+8>:     sub    $0x20,%rsp
0x0000000000001155 <+12>:    mov    %rdi,-0x18(%rbp)
0x0000000000001159 <+16>:    mov    %rsi,-0x20(%rbp)
0x000000000000115d <+20>:    movl   $0x0,-0x8(%rbp)
0x0000000000001164 <+27>:    movl   $0x0,-0x4(%rbp)
0x000000000000116b <+34>:    cmpq   $0x0,-0x18(%rbp)
0x0000000000001170 <+39>:    je     0x1179 <case_insensitive_strcmp+48>
0x0000000000001172 <+41>:    cmpq   $0x0,-0x20(%rbp)
0x0000000000001177 <+46>:    jne    0x1180 <case_insensitive_strcmp+55>
0x0000000000001179 <+48>:    mov    $0x1,%eax
0x000000000000117e <+53>:    jmp    0x11dd <case_insensitive_strcmp+148>
0x0000000000001180 <+55>:    mov    -0x18(%rbp),%rax
0x0000000000001184 <+59>:    cmp    -0x20(%rbp),%rax
0x0000000000001188 <+63>:    jne    0x1191 <case_insensitive_strcmp+72>
0x000000000000118a <+65>:    mov    $0x0,%eax
0x000000000000118f <+70>:    jmp    0x11dd <case_insensitive_strcmp+148>
0x0000000000001191 <+72>:    mov    -0x18(%rbp),%rax
0x0000000000001195 <+76>:    lea    0x1(%rax),%rdx
0x0000000000001199 <+80>:    mov    %rdx,-0x18(%rbp)
0x000000000000119d <+84>:    movzbl (%rax),%eax
0x00000000000011a0 <+87>:    movzbl %al,%eax
0x00000000000011a3 <+90>:    mov    %eax,%edi
0x00000000000011a5 <+92>:    call   0x1050 <tolower@plt>
0x00000000000011aa <+97>:    mov    %eax,-0x8(%rbp)
0x00000000000011ad <+100>:   mov    -0x20(%rbp),%rax
0x00000000000011b1 <+104>:   lea    0x1(%rax),%rdx
0x00000000000011b5 <+108>:   mov    %rdx,-0x20(%rbp)
0x00000000000011b9 <+112>:   movzbl (%rax),%eax
0x00000000000011bc <+115>:   movzbl %al,%eax
0x00000000000011bf <+118>:   mov    %eax,%edi
0x00000000000011c1 <+120>:   call   0x1050 <tolower@plt>
0x00000000000011c6 <+125>:   mov    %eax,-0x4(%rbp)
0x00000000000011c9 <+128>:   mov    -0x8(%rbp),%eax
0x00000000000011cc <+131>:   cmp    -0x4(%rbp),%eax
0x00000000000011cf <+134>:   jne    0x11d7 <case_insensitive_strcmp+142>
0x00000000000011d1 <+136>:   cmpl   $0x0,-0x8(%rbp)
0x00000000000011d5 <+140>:   jne    0x1191 <case_insensitive_strcmp+72>
0x00000000000011d7 <+142>:   mov    -0x8(%rbp),%eax
0x00000000000011da <+145>:   sub    -0x4(%rbp),%eax
0x00000000000011dd <+148>:   leave
0x00000000000011de <+149>:   ret

Environment:
OS: Ubuntu 24.04.3 LTS
cmake: 3.28.3
gcc: 13.3.0

@gardonkoo gardonkoo closed this Sep 10, 2025
@gardonkoo gardonkoo reopened this Sep 10, 2025

if (string1 == string2)
{
return 0;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider calling strcasecmp(3) directly when available after checking NULL pointer? This library function has been optimized, and you almost can't write one faster than it.

Copy link
Author

@gardonkoo gardonkoo Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function case_insensitive_strcmp is a native implementation of the CJSON library.
According to the code, it is mainly used to find items in a JSON object by name.
By default, two NULL values are not considered equal.
It does not care about the length/size of the two strings, neither the magnitude relationship in ASCII order.
It only cares about whether they are equal.
Its functionality is not entirely consistent with that of strcasecmp(3) (at least in the current usage scenario within the source code).
Additionally, strcasecmp(3) is a POSIX-compliant function, while CJSON is a cross-platform library.
Perhaps we need to use macros to distinguish between different platforms and call the optimized functions for each respective platform; or we need to implement a cross-platform general interface ourselves. CJSON chose the latter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants