Added checks for integer overflow

cschreib · cschreib · commit 16f936b27931 · 2024-09-02T11:54:04.000+01:00
diff --git a/README.md b/README.md
@@ -96,7 +96,7 @@ White spaces are not significant and will be ignored (except inside strings).
 
 ### Operators
 
- - Integers and floating point numbers can be mixed in all operations. If an operation involves an integer and a floating point number, the integer number is first converted to floating point before the operation. Available operations: `+`, `-`, `*`, `/`, `%` (modulo), `**` (exponentiation/power). Integer division and modulo by zero will raise an error. Integer overflow is undefined.
+ - Integers and floating point numbers can be mixed in all operations. If an operation involves an integer and a floating point number, the integer number is first converted to floating point before the operation. Available operations: `+`, `-`, `*`, `/`, `%` (modulo), `**` (exponentiation/power). Integer division/modulo by zero and integer overflow will raise an error.
  - Numbers can also be tested for equality (`==` and `!=`) and compared for ordering (`>`, `>=`, `<`, `<=`), with the same conversion rules.
  - Strings can be concatenated with `+`. They can be tested for equality and compared for ordering (using simple alphabetical ordering). Individual characters can be accessed with angled brackets (`'str'[0]`); the index must be an integer, and is zero-based (first character had index zero). Range access is also possible with `str[a:b]` (`a` is the index of the first character to extract, and `b` is 1 plus the index of the last character to extract, so `'abc'[0:2]` is `'ab'`; an empty string is returned if `a >= b`).
  - Booleans can only be tested for equality, and combined with the boolean operators `and` and `or`. The boolean operators are "short-circuiting"; namely, when evaluating `a and b` and `a` evaluates to `false`, `b` will not be evaluated. This allows bypassing evaluation of operations that would otherwise be invalid (e.g. accessing elements beyond the length of an array). Finally, booleans can also be negated (`not a`). No other operation is possible.
@@ -330,13 +330,10 @@ first_non_null(1, 1+'abc')    -> 1 (second argument was invalid, but no error si
 
 # Security
 
-All operations allowed in the language are meant to be safe, in the sense that they should not make the host process abort or behave in an unspecified manner (e.g., through out-of-bounds read or writes, use-after-free, incorrect type accesses, read of uninitialized memory, etc.). This is  tested by running the test suite with sanitizers, and by fuzzing. The underlying JSON library is also battle-tested.
+All operations allowed in the language are meant to be safe, in the sense that they should not make the host process abort or behave in an unspecified manner (e.g., through out-of-bounds read or writes, use-after-free, incorrect type accesses, read of uninitialized memory, signed overflow, division by zero, etc.). This is tested by running the test suite with sanitizers, and by fuzzing. The underlying JSON library is also battle-tested.
 
 Furthermore, the parser has a fixed maximum recursion depth to prevent stack overflows. This depth  can be changed with the CMake/compilation option `JSONEXPR_MAX_AST_DEPTH`.
 
-Despite the above, the library is not 100% risk-free. In particular, the following is currently unsafe:
- - integer overflow and underflow in evaluated expression
-
 The following would trigger an exception (or abort the process if exceptions are disabled):
  - running out of heap memory while parsing or evaluating an expression
 
diff --git a/libjsonexpr/src/functions.cpp b/libjsonexpr/src/functions.cpp
@@ -67,12 +67,44 @@ function_result safe_max(const T& lhs, const U& rhs) {
     }
 }
 
+constexpr number_integer_t int_max = std::numeric_limits<number_integer_t>::max();
+constexpr number_integer_t int_min = -int_max; // We loose the normally allowed most negative int.
+
+bool check_overflow_add(number_integer_t lhs, number_integer_t rhs) {
+    if (lhs >= 0) {
+        if (rhs > int_max - lhs) {
+            return true;
+        }
+    } else {
+        if (rhs < int_min - lhs) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool check_overflow_sub(number_integer_t lhs, number_integer_t rhs) {
+    return check_overflow_add(lhs, -rhs);
+}
+
+bool check_overflow_mul(number_integer_t lhs, number_integer_t rhs) {
+    return std::abs(lhs) > int_max / std::abs(rhs);
+}
+
 #define MATHS_OPERATOR(NAME, OPERATOR)                                                             \
     template<typename T, typename U>                                                               \
     function_result safe_##NAME(const T& lhs, const U& rhs) {                                      \
         if constexpr (std::is_floating_point_v<T> || std::is_floating_point_v<U>) {                \
             return static_cast<number_float_t>(lhs) OPERATOR static_cast<number_float_t>(rhs);     \
         } else {                                                                                   \
+            if constexpr (std::is_arithmetic_v<T> && std::is_arithmetic_v<U>) {                    \
+                if (check_overflow_##NAME(lhs, rhs)) {                                             \
+                    return unexpected(std::string(                                                 \
+                        "integer overflow in '" + std::to_string(lhs) + (" " #OPERATOR " ") +      \
+                        std::to_string(rhs) + "'"));                                               \
+                }                                                                                  \
+            }                                                                                      \
             return lhs OPERATOR rhs;                                                               \
         }                                                                                          \
     }
diff --git a/libjsonexpr/src/parse.cpp b/libjsonexpr/src/parse.cpp
@@ -351,6 +351,17 @@ try_parse_literal(depth_counter depth, std::span<const token>& tokens) {
         if (parsed.type() == json::value_t::discarded) {
             return unexpected(abort_parse(t, "could not parse literal"));
         }
+
+        if (parsed.type() == json::value_t::number_unsigned) {
+            const auto unsigned_value = parsed.get<json::number_unsigned_t>();
+            if (unsigned_value > static_cast<json::number_unsigned_t>(
+                                     std::numeric_limits<number_integer_t>::max())) {
+                return unexpected(
+                    abort_parse(t, "integer overflow in '" + std::string(t.content) + "'"));
+            }
+
+            parsed = static_cast<json::number_integer_t>(unsigned_value);
+        }
     }
 
     tokens = tokens.subspan(1);
diff --git a/tests/data/errors.txt b/tests/data/errors.txt
@@ -7376,6 +7376,9 @@ error: could not parse literal
 1 0
   ^
 error: unexpected content in expression
+9223372036854775808
+^~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775808'
 1 e-2
   ^
 error: unexpected content in expression
@@ -7409,6 +7412,63 @@ error: expected unary expression
 --
   ^
 error: expected unary expression
+9223372036854775807+1
+^~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 + 1'
+9223372036854775807+2
+^~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 + 2'
+-9223372036854775807+(-1)
+^~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-9223372036854775807 + -1'
+-9223372036854775807+(-2)
+^~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-9223372036854775807 + -2'
+1+9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '1 + 9223372036854775807'
+2+9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '2 + 9223372036854775807'
+-1+(-9223372036854775807)
+^~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-1 + -9223372036854775807'
+-2+(-9223372036854775807)
+^~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-2 + -9223372036854775807'
+9223372036854775807+9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 + 9223372036854775807'
+-9223372036854775807-1
+^~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-9223372036854775807 - 1'
+-9223372036854775807-2
+^~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-9223372036854775807 - 2'
+9223372036854775807-(-1)
+^~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 - -1'
+9223372036854775807-(-2)
+^~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 - -2'
+1-(-9223372036854775807)
+^~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '1 - -9223372036854775807'
+2-(-9223372036854775807)
+^~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '2 - -9223372036854775807'
+-1-9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-1 - 9223372036854775807'
+-2-9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-2 - 9223372036854775807'
+9223372036854775807-(-2)
+^~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 - -2'
+-9223372036854775807-9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '-9223372036854775807 - 9223372036854775807'
 1*
   ^
 error: expected unary expression
@@ -7433,6 +7493,21 @@ error: integer division by zero
 0/0
 ^~~
 error: integer division by zero
+9223372036854775807*2
+^~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 * 2'
+9223372036854775807*9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 * 9223372036854775807'
+9223372036854775807*-2
+^~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 * -2'
+9223372036854775807*-9223372036854775807
+^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775807 * -9223372036854775807'
+-9223372036854775808/-1
+ ^~~~~~~~~~~~~~~~~~~
+error: integer overflow in '9223372036854775808'
 1%
   ^
 error: expected unary expression
diff --git a/tests/src/number.cpp b/tests/src/number.cpp
@@ -4,6 +4,7 @@ TEST_CASE("number literal", "[maths]") {
     SECTION("bad") {
         CHECK_ERROR("01");
         CHECK_ERROR("1 0");
+        CHECK_ERROR("9223372036854775808");
         CHECK_ERROR("1 e-2");
         CHECK_ERROR("1e -2");
         CHECK_ERROR("1e- 2");
@@ -16,6 +17,7 @@ TEST_CASE("number literal", "[maths]") {
         CHECK(evaluate("10") == "10"_json);
         CHECK(evaluate("0.1") == "0.1"_json);
         CHECK(evaluate("123456789") == "123456789"_json);
+        CHECK(evaluate("9223372036854775807") == "9223372036854775807"_json);
         CHECK(evaluate("1e2") == "1e2"_json);
         CHECK(evaluate("1E2") == "1E2"_json);
         CHECK(evaluate("1e-2") == "1e-2"_json);
@@ -68,6 +70,31 @@ TEST_CASE("adsub", "[maths]") {
         CHECK(evaluate("1 ++1") == "2"_json);
         CHECK(evaluate("1++ 1") == "2"_json);
     }
+
+    SECTION("int overflow") {
+        CHECK(evaluate("9223372036854775807+0") == "9223372036854775807"_json);
+        CHECK_ERROR("9223372036854775807+1");
+        CHECK_ERROR("9223372036854775807+2");
+        CHECK_ERROR("-9223372036854775807+(-1)");
+        CHECK_ERROR("-9223372036854775807+(-2)");
+        CHECK_ERROR("1+9223372036854775807");
+        CHECK_ERROR("2+9223372036854775807");
+        CHECK_ERROR("-1+(-9223372036854775807)");
+        CHECK_ERROR("-2+(-9223372036854775807)");
+        CHECK_ERROR("9223372036854775807+9223372036854775807");
+
+        CHECK(evaluate("-9223372036854775807-0") == "-9223372036854775807"_json);
+        CHECK_ERROR("-9223372036854775807-1");
+        CHECK_ERROR("-9223372036854775807-2");
+        CHECK_ERROR("9223372036854775807-(-1)");
+        CHECK_ERROR("9223372036854775807-(-2)");
+        CHECK_ERROR("1-(-9223372036854775807)");
+        CHECK_ERROR("2-(-9223372036854775807)");
+        CHECK_ERROR("-1-9223372036854775807");
+        CHECK_ERROR("-2-9223372036854775807");
+        CHECK_ERROR("9223372036854775807-(-2)");
+        CHECK_ERROR("-9223372036854775807-9223372036854775807");
+    }
 }
 
 TEST_CASE("muldiv", "[maths]") {
@@ -89,6 +116,17 @@ TEST_CASE("muldiv", "[maths]") {
         CHECK_ERROR("0/0");
     }
 
+    SECTION("int overflow") {
+        CHECK(evaluate("9223372036854775807*1") == "9223372036854775807"_json);
+        CHECK_ERROR("9223372036854775807*2");
+        CHECK_ERROR("9223372036854775807*9223372036854775807");
+        CHECK(evaluate("9223372036854775807*-1") == "-9223372036854775807"_json);
+        CHECK_ERROR("9223372036854775807*-2");
+        CHECK_ERROR("9223372036854775807*-9223372036854775807");
+
+        CHECK_ERROR("-9223372036854775808/-1");
+    }
+
     SECTION("float") {
         CHECK(evaluate("2.0*4.0") == "8.0"_json);
         CHECK(evaluate("4.0/2.0") == "2.0"_json);