ruby · samyron · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
@@ -479,21 +479,16 @@ static const signed char digit_values[256] = {
 
 static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
 {
-    signed char b;
-    uint32_t result = 0;
-    b = digit_values[p[0]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[1]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[2]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[3]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    return result;
+    signed char b0 = digit_values[p[0]];
+    signed char b1 = digit_values[p[1]];
+    signed char b2 = digit_values[p[2]];
+    signed char b3 = digit_values[p[3]];
+
+    if ((b0 | b1 | b2 | b3) < 0) {
+        raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
+    }
+
+    return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
 }
 
 #define GET_PARSER_CONFIG                          \
@@ -643,9 +638,58 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
 typedef struct _json_unescape_positions {
     long size;
     const char **positions;
-    bool has_more;
+    unsigned long additional_backslashes;
 } JSON_UnescapePositions;
 
+ALWAYS_INLINE(static) void *find_backslash(const void *src, size_t n) {
+// HAVE_SIMD_NEON and JSON_CPU_LITTLE_ENDIAN_64BITS are implied by __APPLE__ && __aarch64__
+// but they are here for clarity and consistency with code in this file.
+#if defined(__APPLE__) && defined(__aarch64__) && HAVE_SIMD_NEON && JSON_CPU_LITTLE_ENDIAN_64BITS
+    const unsigned char *s = (const unsigned char *)src;
+
+    static const uint8_t offsets[16] = { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 };
+    while (n >= sizeof(uint8x16_t)) {
+        uint8x16_t chunk             = vld1q_u8(s);
+        uint8x16_t backslashes       = vdupq_n_u8('\\');
+        uint8x16_t has_backslashes   = vceqq_u8(chunk, backslashes);
+        uint8x16_t backslash_offsets = vandq_u8(has_backslashes, vld1q_u8(offsets));
+        int first_backslash_offset   = vmaxvq_u8(backslash_offsets);
+        if (first_backslash_offset) {
+            // The indexes are stored in reverse order so we need to subtract from 16
+            // to get the first backslash offset. We do this to avoid having to use
+            // a negation + OR operation along with a vminvq_u8 if the indexes were stored
+            // in normal order.
+            return (void *)(s + (16 - first_backslash_offset));
+        }
+        s += sizeof(uint8x16_t);
+        n -= sizeof(uint8x16_t);
+    }
+
+    if (n >= sizeof(uint64_t)) {
+        uint64_t word;
+        memcpy(&word, s, sizeof(uint64_t));
+        uint64_t xor = word ^ 0x5c5c5c5c5c5c5c5c;
+        uint64_t has_backslash = (xor - 0x0101010101010101) & ((~xor) & 0x8080808080808080);
+        if (has_backslash) {
+            int byte_offset = trailing_zeros64(has_backslash) / CHAR_BIT;
+            return (void *)(s + byte_offset);
+        }
+        s += sizeof(uint64_t);
+        n -= sizeof(uint64_t);
+    }
+
+    for (size_t i = 0; i < n; i++) {
+        if (s[i] == '\\') {
+            return (void *)(s + i);
+        }
+    }
+
+    return NULL;
+#else
+    return memchr(src, '\\', n);
+#endif
+}
+
 static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
 {
     while (positions->size) {
@@ -657,13 +701,43 @@ static inline const char *json_next_backslash(const char *pe, const char *string
         }
     }
 
-    if (positions->has_more) {
-        return memchr(pe, '\\', stringEnd - pe);
+    if (positions->additional_backslashes) {
+        positions->additional_backslashes--;
+        return find_backslash(pe, stringEnd - pe);
     }
 
     return NULL;
 }
 
+static inline void json_memcpy(char *dest, const char *src, size_t size) {
+#if defined(__APPLE__) && defined(__aarch64__) && HAVE_SIMD_NEON && JSON_CPU_LITTLE_ENDIAN_64BITS
+    while (size >= sizeof(uint8x16_t)) {
+        uint8x16_t chunk;
+        chunk = vld1q_u8((const uint8_t *)src);
+        vst1q_u8((uint8_t *)dest, chunk);
+        dest += sizeof(uint8x16_t);
+        src  += sizeof(uint8x16_t);
+        size -= sizeof(uint8x16_t);
+    }
+
+    if (size >= sizeof(uint64_t)) {
+        uint64_t chunk;
+        memcpy(&chunk, src, sizeof(uint64_t));
+        memcpy(dest, &chunk, sizeof(uint64_t));
+        dest += sizeof(uint64_t);
+        src  += sizeof(uint64_t);
+        size -= sizeof(uint64_t);
+    }
+
+    while(size) {
+        *dest++ = *src++;
+        size--;
+    }
+#else
+    memcpy(dest, src, size);
+#endif
+}
+
 NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
 {
     bool intern = is_name || config->freeze;
@@ -681,7 +755,7 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
 
     while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
         if (pe > p) {
-          MEMCPY(buffer, p, char, pe - p);
+          json_memcpy(buffer, p, pe - p);
           buffer += pe - p;
         }
         switch (*++pe) {
@@ -746,7 +820,7 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
 
                     char buf[4];
                     int unescape_len = convert_UTF32_to_UTF8(buf, ch);
-                    MEMCPY(buffer, buf, char, unescape_len);
+                    json_memcpy(buffer, buf, unescape_len);
                     buffer += unescape_len;
                     p = ++pe;
                 }
@@ -768,7 +842,7 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
 #undef APPEND_CHAR
 
     if (stringEnd > p) {
-      MEMCPY(buffer, p, char, stringEnd - p);
+      json_memcpy(buffer, p, stringEnd - p);
       buffer += stringEnd - p;
     }
     rb_str_set_len(result, buffer - bufferStart);
@@ -992,7 +1066,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
     JSON_UnescapePositions positions = {
         .size = 0,
         .positions = backslashes,
-        .has_more = false,
+        .additional_backslashes = 0,
     };
 
     do {
@@ -1007,7 +1081,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
                     backslashes[positions.size] = state->cursor;
                     positions.size++;
                 } else {
-                    positions.has_more = true;
+                    positions.additional_backslashes++;
                 }
                 state->cursor++;
                 break;

diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
@@ -543,6 +543,10 @@ def test_backslash
     json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
     assert_equal data, parse(json)
 
+    data = ['""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""']
+    json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
+    assert_equal data, parse(json)
+
     data = '["This is a "test" of the emergency broadcast system."]'
     json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
     assert_equal data, parse(json)
@@ -611,6 +615,10 @@ def test_backslash
     json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
     assert_equal data, parse(json)
 
+    data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c"
+    json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\""
+    assert_equal data, parse(json)
+
     data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f"
     json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\""
     assert_equal data, parse(json)
@@ -619,9 +627,21 @@ def test_backslash
     json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
     assert_equal data, parse(json)
 
+    data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b"
+    json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\""
+    assert_equal data, parse(json)
+
     data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
     json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
     assert_equal data, parse(json)
+
+    data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\ta\n\t\f\b\n\t\f\b\n\t\f\b\n\ta\n\t\f\b\n\t\f\b\n\t\f\b\n\ta\n\t\f\b\n\t\f\b\n\t\f\b\n\ta\n\t\f\b\n\t\f\b\n\t\f\b\n\t"
+    json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\ta\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\ta\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\ta\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\ta\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\""
+    assert_equal data, parse(json)
+
+    data = "\n" * 63
+    json = "\""+("\\n" * 63)+"\""
+    assert_equal data, parse(json)
   end
 
   class SubArray < Array