From 17479f880c085864812f9cfb4686ca16f02b97ef Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 8 Dec 2024 01:13:06 +0100 Subject: [PATCH 1/7] handle apache DocumentRoot cyrillic encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When DocumentRoot contains cyrillic characters like DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html and PHP is invoked with SetHandler (*PS not applicable to ProxySetMatch, the problem occurs with SetHandler specifically) like DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html SetHandler "proxy:unix:/run/php/php8.3-fpm-cyrillicрф.ratma.net.sock" then apache will url-encode the cyrillic characters before sending it to fpm, so env_script_filename will contain /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php and we need to url-decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php otherwise we hit that zlog(ZLOG_DEBUG, "Primary script unknown"); SG(sapi_headers).http_response_code = 404; PUTS("File not found.\n"); error code path. --- sapi/fpm/fpm/fpm_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 57006a15c7a08..df1745459f5dd 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1035,8 +1035,17 @@ static void init_request_info(void) /* Copy path portion in place to avoid memory leak. Note * that this also affects what script_path_translated points * to. */ - memmove(env_script_filename, p, strlen(p) + 1); + size_t plen = strlen(p); + memmove(env_script_filename, p, plen + 1); apache_was_here = 1; + // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxySetMatch), + // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with + // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html + // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. + // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. + if(memchr(env_script_filename, '%', plen) != NULL){ + plen = php_url_decode(env_script_filename, plen); + } } /* ignore query string if sent by Apache (RewriteRule) */ p = strchr(env_script_filename, '?'); From 2aceb359d1b7b5f5525e59e67375d3f18521d92f Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 8 Dec 2024 01:24:55 +0100 Subject: [PATCH 2/7] typo --- sapi/fpm/fpm/fpm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index df1745459f5dd..45ec279ecc67e 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1038,7 +1038,7 @@ static void init_request_info(void) size_t plen = strlen(p); memmove(env_script_filename, p, plen + 1); apache_was_here = 1; - // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxySetMatch), + // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. From 0f2be03e32535aeeae7b2958a3c5662b54b4cfe8 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 8 Dec 2024 02:13:55 +0100 Subject: [PATCH 3/7] raw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit improves compatibility with sapi/fpm/tests/fcgi-env-pif-apache-pp-sn-strip-encoded-plus.phpt does not fix it entirely, but it does help 🤔 --- sapi/fpm/fpm/fpm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 45ec279ecc67e..5e862218f5146 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1044,7 +1044,7 @@ static void init_request_info(void) // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. if(memchr(env_script_filename, '%', plen) != NULL){ - plen = php_url_decode(env_script_filename, plen); + plen = php_raw_url_decode(env_script_filename, plen); } } /* ignore query string if sent by Apache (RewriteRule) */ From 4c95d0c9d72c34dcc8448242d2c6ab0417020b8f Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 8 Dec 2024 02:45:27 +0100 Subject: [PATCH 4/7] simpler --- sapi/fpm/fpm/fpm_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 5e862218f5146..8aee433a134c7 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1035,17 +1035,8 @@ static void init_request_info(void) /* Copy path portion in place to avoid memory leak. Note * that this also affects what script_path_translated points * to. */ - size_t plen = strlen(p); - memmove(env_script_filename, p, plen + 1); + memmove(env_script_filename, p, strlen(p) + 1); apache_was_here = 1; - // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), - // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with - // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html - // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. - // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. - if(memchr(env_script_filename, '%', plen) != NULL){ - plen = php_raw_url_decode(env_script_filename, plen); - } } /* ignore query string if sent by Apache (RewriteRule) */ p = strchr(env_script_filename, '?'); @@ -1112,6 +1103,16 @@ static void init_request_info(void) script_path_translated = __unixify(script_path_translated, 0, NULL, 1, 0); #endif + // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), + // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with + // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html + // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. + // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. + if (apache_was_here && strchr(script_path_translated, '%')) + { + php_raw_url_decode(script_path_translated, strlen(script_path_translated)); + } + /* * if the file doesn't exist, try to extract PATH_INFO out * of it by stat'ing back through the '/' From 3e7f9f95be0bb34a1147892220648e86e9c9b3a2 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 8 Dec 2024 12:18:05 +0100 Subject: [PATCH 5/7] fix some failing tests --- sapi/fpm/fpm/fpm_main.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 8aee433a134c7..f99872c5c8723 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1103,15 +1103,6 @@ static void init_request_info(void) script_path_translated = __unixify(script_path_translated, 0, NULL, 1, 0); #endif - // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), - // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with - // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html - // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. - // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. - if (apache_was_here && strchr(script_path_translated, '%')) - { - php_raw_url_decode(script_path_translated, strlen(script_path_translated)); - } /* * if the file doesn't exist, try to extract PATH_INFO out @@ -1128,8 +1119,21 @@ static void init_request_info(void) char *ptr; if (pt) { + // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), + // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with + // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html + // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. + // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. + bool firstrun_apache_cyrillic_encoding = apache_was_here && memchr(pt, '%', len); + if(firstrun_apache_cyrillic_encoding) { + len = php_raw_url_decode(pt, len); + } while ((ptr = strrchr(pt, '/')) || (ptr = strrchr(pt, '\\'))) { - *ptr = 0; + if(firstrun_apache_cyrillic_encoding) { + firstrun_apache_cyrillic_encoding = false; + } else { + *ptr = 0; + } if (stat(pt, &st) == 0 && S_ISREG(st.st_mode)) { /* * okay, we found the base script! From 6c9d63d003fe11a14cf9c55e5eb84bc703262a16 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Mon, 9 Dec 2024 10:47:36 +0100 Subject: [PATCH 6/7] less state --- sapi/fpm/fpm/fpm_main.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index f99872c5c8723..dd5d6e6b6cea9 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1124,16 +1124,14 @@ static void init_request_info(void) // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. - bool firstrun_apache_cyrillic_encoding = apache_was_here && memchr(pt, '%', len); - if(firstrun_apache_cyrillic_encoding) { + if(apache_was_here && memchr(pt, '%', len)) { len = php_raw_url_decode(pt, len); - } + ptr = &pt[len]; // php_raw_url_decode() writes a trailing null byte, &pt[len] is that null byte. + goto apache_cyrillic_jump; + } while ((ptr = strrchr(pt, '/')) || (ptr = strrchr(pt, '\\'))) { - if(firstrun_apache_cyrillic_encoding) { - firstrun_apache_cyrillic_encoding = false; - } else { - *ptr = 0; - } + *ptr = 0; + apache_cyrillic_jump: if (stat(pt, &st) == 0 && S_ISREG(st.st_mode)) { /* * okay, we found the base script! From b937b597a53917a9b1dba808105bc06a4a44e056 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Sun, 15 Dec 2024 00:26:09 +0100 Subject: [PATCH 7/7] not limited to cyrillic.. % is also affected. I suspect a great deal of other characters are affected as well --- sapi/fpm/fpm/fpm_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index dd5d6e6b6cea9..b05114e68216f 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1119,19 +1119,19 @@ static void init_request_info(void) char *ptr; if (pt) { - // If DocumentRoot contains cyrillic characters and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), - // then the cyrillic characters are urlencoded by apache, and we need to decode them, for example with - // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html - // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/index.php. - // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/index.php. + // If DocumentRoot contains special characters like '%' or cyrillic 'рф' and PHP is invoked with SetHandler (not applicable to ProxyPassMatch), + // then the special characters are urlencoded by apache, and we need to decode them, for example with + // DocumentRoot /home/hans/web/cyrillicрф.ratma.net/public_html/test%lol + // env_script_filename contains /home/hans/web/cyrillic%D1%80%D1%84.ratma.net/public_html/test%25lol/index.php. + // and we must decode it to /home/hans/web/cyrillicрф.ratma.net/public_html/test%lol/index.php. if(apache_was_here && memchr(pt, '%', len)) { len = php_raw_url_decode(pt, len); ptr = &pt[len]; // php_raw_url_decode() writes a trailing null byte, &pt[len] is that null byte. - goto apache_cyrillic_jump; + goto apache_special_jump; } while ((ptr = strrchr(pt, '/')) || (ptr = strrchr(pt, '\\'))) { *ptr = 0; - apache_cyrillic_jump: + apache_special_jump: if (stat(pt, &st) == 0 && S_ISREG(st.st_mode)) { /* * okay, we found the base script!