mirror of
https://github.com/openwrt/packages.git
synced 2025-01-31 03:41:44 +00:00
a2d801cdf3
Backport required patch for PCRE2 support and move to PCRE2 library as PCRE is now EOL and won't receive any security updates anymore. Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
1015 lines
33 KiB
Diff
1015 lines
33 KiB
Diff
From 53748ca8ca3c893025be34dd4f104546fcbd0602 Mon Sep 17 00:00:00 2001
|
|
From: Fabian Keil <fk@fabiankeil.de>
|
|
Date: Sat, 17 Jun 2023 13:20:24 +0200
|
|
Subject: [PATCH] Add pcre2 support
|
|
|
|
This is currently expected to cause crashes on Windows
|
|
when compiled with GUI support.
|
|
|
|
Closes bug #935.
|
|
Initial patch submitted by: Gagan Sidhu
|
|
---
|
|
acconfig.h | 6 ++
|
|
actions.c | 8 +++
|
|
cgi.c | 9 ++-
|
|
client-tags.c | 5 ++
|
|
configure.in | 64 ++++++++++++++++-
|
|
pcrs.c | 148 +++++++++++++++++++++++++++++----------
|
|
pcrs.h | 40 +++++++----
|
|
project.h | 54 +++++++++-----
|
|
templates/show-status | 5 +-
|
|
urlmatch.c | 159 ++++++++++++++++++++++++++++++++++++++++++
|
|
urlmatch.h | 4 ++
|
|
w32log.c | 3 +
|
|
12 files changed, 430 insertions(+), 75 deletions(-)
|
|
|
|
--- a/acconfig.h
|
|
+++ b/acconfig.h
|
|
@@ -225,11 +225,17 @@
|
|
/* Define if pcre.h must be included as <pcre/pcre.h>
|
|
*/
|
|
#undef PCRE_H_IN_SUBDIR
|
|
+#undef PCRE2_H_IN_SUBDIR
|
|
+
|
|
+#undef HAVE_PCRE2
|
|
+#undef HAVE_PCRE2POSIX
|
|
|
|
/* Define if pcreposix.h must be included as <pcre/pcreposix.h>
|
|
*/
|
|
#undef PCREPOSIX_H_IN_SUBDIR
|
|
|
|
+#undef PCRE2POSIX_H_IN_SUBDIR
|
|
+
|
|
@BOTTOM@
|
|
|
|
/*
|
|
--- a/actions.c
|
|
+++ b/actions.c
|
|
@@ -828,8 +828,12 @@ int update_action_bits_for_tag(struct cl
|
|
continue;
|
|
}
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag))
|
|
+#else
|
|
/* and check if one of the tag patterns matches the tag, */
|
|
if (0 == regexec(b->url->pattern.tag_regex, tag, 0, NULL, 0))
|
|
+#endif
|
|
{
|
|
/* if it does, update the action bit map, */
|
|
if (merge_current_action(csp->action, b->action))
|
|
@@ -884,7 +888,11 @@ jb_err check_negative_tag_patterns(struc
|
|
}
|
|
for (tag = csp->tags->first; NULL != tag; tag = tag->next)
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag->str))
|
|
+#else
|
|
if (0 == regexec(b->url->pattern.tag_regex, tag->str, 0, NULL, 0))
|
|
+#endif
|
|
{
|
|
/*
|
|
* The pattern matches at least one tag, thus the action
|
|
--- a/cgi.c
|
|
+++ b/cgi.c
|
|
@@ -2023,7 +2023,7 @@ jb_err template_fill(char **template_ptr
|
|
char buf[BUFFER_SIZE];
|
|
char *tmp_out_buffer;
|
|
char *file_buffer;
|
|
- size_t size;
|
|
+ size_t buffer_size, new_size;
|
|
int error;
|
|
const char *flags;
|
|
|
|
@@ -2032,7 +2032,7 @@ jb_err template_fill(char **template_ptr
|
|
assert(exports);
|
|
|
|
file_buffer = *template_ptr;
|
|
- size = strlen(file_buffer) + 1;
|
|
+ buffer_size = strlen(file_buffer) + 1;
|
|
|
|
/*
|
|
* Assemble pcrs joblist from exports map
|
|
@@ -2082,7 +2082,10 @@ jb_err template_fill(char **template_ptr
|
|
}
|
|
else
|
|
{
|
|
- error = pcrs_execute(job, file_buffer, size, &tmp_out_buffer, &size);
|
|
+ error = pcrs_execute(job, file_buffer, buffer_size, &tmp_out_buffer,
|
|
+ &new_size);
|
|
+
|
|
+ buffer_size = new_size;
|
|
|
|
pcrs_free_job(job);
|
|
if (NULL == tmp_out_buffer)
|
|
--- a/client-tags.c
|
|
+++ b/client-tags.c
|
|
@@ -43,6 +43,7 @@
|
|
#include "miscutil.h"
|
|
#include "errlog.h"
|
|
#include "parsers.h"
|
|
+#include "urlmatch.h"
|
|
|
|
struct client_specific_tag
|
|
{
|
|
@@ -658,7 +659,11 @@ int client_tag_match(const struct patter
|
|
|
|
for (tag = tags->first; tag != NULL; tag = tag->next)
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (pcre2_pattern_matches(pattern->pattern.tag_regex, tag->str))
|
|
+#else
|
|
if (0 == regexec(pattern->pattern.tag_regex, tag->str, 0, NULL, 0))
|
|
+#endif
|
|
{
|
|
log_error(LOG_LEVEL_TAGGING, "Client tag '%s' matches.", tag->str);
|
|
return 1;
|
|
--- a/configure.in
|
|
+++ b/configure.in
|
|
@@ -863,12 +863,47 @@ else
|
|
])
|
|
fi
|
|
|
|
+AC_ARG_ENABLE(pcre2,
|
|
+[ --disable-pcre2 Don't try to use pcre2 even if it's available],
|
|
+[enableval2=$enableval],
|
|
+[enableval2=yes])
|
|
+if test $enableval2 = yes; then
|
|
+ try_pcre2=yes
|
|
+else
|
|
+ AC_MSG_WARN([Ignoring pcre2 even if it's available])
|
|
+ try_pcre2=no
|
|
+fi
|
|
+
|
|
+if test $try_pcre2 != no; then
|
|
dnl =================================================================
|
|
dnl Checks for libraries.
|
|
dnl =================================================================
|
|
dnl Note: Some systems may have the library but not the system header
|
|
dnl file, so we must check for both.
|
|
dnl Also check for correct version
|
|
+AC_CHECK_LIB(pcre2-8, pcre2_compile_8, [
|
|
+ AC_CHECK_HEADER(pcre2.h, [
|
|
+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2.h,[have_pcre2=yes; AC_DEFINE(HAVE_PCRE2)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
|
|
+ ], [
|
|
+ AC_CHECK_HEADER(pcre2/pcre2.h, [
|
|
+ AC_EGREP_HEADER(pcre2_pattern_info, pcre2/pcre2.h, [have_pcre2=yes; AC_DEFINE(PCRE2_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no])
|
|
+ ], [have_pcre2=no])
|
|
+ ], [#define PCRE2_CODE_UNIT_WIDTH 8])
|
|
+], [have_pcre2=no])
|
|
+
|
|
+AC_CHECK_LIB(pcre2-posix, regcomp, [
|
|
+ AC_CHECK_HEADER(pcre2posix.h, [
|
|
+ AC_EGREP_HEADER(pcre2_regerror, pcre2posix.h, [have_pcre2posix=yes],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
|
|
+ ], [
|
|
+ AC_CHECK_HEADER(pcre/pcre2posix.h, [
|
|
+ AC_EGREP_HEADER(pcre2_regerror, pcre2/pcre2posix.h, [have_pcre2posix=yes; AC_DEFINE(PCRE2POSIX_H_IN_SUBDIR)],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no])
|
|
+ ], [have_pcre2posix=no])
|
|
+ ])
|
|
+], [have_pcre2posix=no], -lpcre2-8)
|
|
+fi
|
|
+
|
|
+if test $have_pcre2 = "no"; then
|
|
+
|
|
AC_CHECK_LIB(pcre, pcre_compile, [
|
|
AC_CHECK_HEADER(pcre.h, [
|
|
AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no])
|
|
@@ -889,6 +924,7 @@ AC_CHECK_LIB(pcreposix, regcomp, [
|
|
])
|
|
], [have_pcreposix=no], -lpcre)
|
|
|
|
+fi
|
|
dnl ================================================================
|
|
dnl libpcrs is temporarily disabled.
|
|
dnl
|
|
@@ -1095,6 +1131,31 @@ fi
|
|
# we don't need pcreposix, then link pcre dynamically; else
|
|
# build it and link statically
|
|
#
|
|
+
|
|
+#check for libpcre2 first. then regular pcre
|
|
+
|
|
+if test $have_pcre2 = "yes"; then
|
|
+ echo "using libpcre2"
|
|
+ STATIC_PCRE_ONLY=#
|
|
+ LIBS="$LIBS -lpcre2-8 -lpcre2-posix"
|
|
+ if test "$use_static_pcre" = "yes"; then
|
|
+ pcre_dyn=no
|
|
+ AC_DEFINE(PCRE_STATIC,1,[Define to statically link to pcre library on Windows.])
|
|
+# see /usr/i686-w64-mingw32/sys-root/mingw/include/pcre.h line 54
|
|
+# #if defined(_WIN32) && !defined(PCRE_STATIC)
|
|
+# # ifndef PCRE_EXP_DECL
|
|
+# # define PCRE_EXP_DECL extern __declspec(dllimport)
|
|
+# # endif
|
|
+# If you want to statically link a program against a PCRE library in the form of
|
|
+# a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
|
|
+# pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
|
|
+# be declared __declspec(dllimport), with unwanted results.
|
|
+ else
|
|
+ pcre_dyn=yes
|
|
+ AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
|
|
+ fi
|
|
+else
|
|
+
|
|
if test $have_pcre = "yes"; then
|
|
echo "using libpcre"
|
|
STATIC_PCRE_ONLY=#
|
|
@@ -1116,7 +1177,8 @@ if test $have_pcre = "yes"; then
|
|
AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.])
|
|
fi
|
|
else
|
|
- AC_MSG_ERROR(pcre library not detected.)
|
|
+ AC_MSG_ERROR(Detected neither pcre2 nor pcre library.)
|
|
+fi
|
|
fi
|
|
|
|
AC_DEFINE(FEATURE_CONNECTION_KEEP_ALIVE)
|
|
--- a/pcrs.c
|
|
+++ b/pcrs.c
|
|
@@ -57,7 +57,7 @@
|
|
* Internal prototypes
|
|
*/
|
|
|
|
-static int pcrs_parse_perl_options(const char *optstring, int *flags);
|
|
+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags);
|
|
static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int trivialflag,
|
|
int capturecount, int *errptr);
|
|
static int is_hex_sequence(const char *sequence);
|
|
@@ -83,25 +83,25 @@ const char *pcrs_strerror(const int erro
|
|
switch (error)
|
|
{
|
|
/* Passed-through PCRE error: */
|
|
- case PCRE_ERROR_NOMEMORY: return "(pcre:) No memory";
|
|
+ case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory";
|
|
|
|
/* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */
|
|
- case PCRE_ERROR_NULL: return "(pcre:) NULL code or subject or ovector";
|
|
- case PCRE_ERROR_BADOPTION: return "(pcre:) Unrecognized option bit";
|
|
- case PCRE_ERROR_BADMAGIC: return "(pcre:) Bad magic number in code";
|
|
+ case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector";
|
|
+ case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit";
|
|
+ case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code";
|
|
+#if defined(PCRE_ERROR_UNKNOWN_NODE)
|
|
case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern";
|
|
-
|
|
+#endif
|
|
/* Can't happen / not passed: */
|
|
- case PCRE_ERROR_NOSUBSTRING: return "(pcre:) Fire in power supply";
|
|
- case PCRE_ERROR_NOMATCH: return "(pcre:) Water in power supply";
|
|
+ case PCREn(ERROR_NOSUBSTRING): return "(pcre:) Fire in power supply";
|
|
+ case PCREn(ERROR_NOMATCH): return "(pcre:) Water in power supply";
|
|
|
|
#ifdef PCRE_ERROR_MATCHLIMIT
|
|
/*
|
|
* Only reported by PCRE versions newer than our own.
|
|
*/
|
|
- case PCRE_ERROR_MATCHLIMIT: return "(pcre:) Match limit reached";
|
|
+ case PCREn(ERROR_MATCHLIMIT): return "(pcre:) Match limit reached";
|
|
#endif /* def PCRE_ERROR_MATCHLIMIT */
|
|
-
|
|
/* PCRS errors: */
|
|
case PCRS_ERR_NOMEM: return "(pcrs:) No memory";
|
|
case PCRS_ERR_CMDSYNTAX: return "(pcrs:) Syntax error while parsing command";
|
|
@@ -111,16 +111,14 @@ const char *pcrs_strerror(const int erro
|
|
case PCRS_WARN_TRUNCATION:
|
|
return "(pcrs:) At least one variable was too big and has been truncated before compilation";
|
|
|
|
- /*
|
|
- * XXX: With the exception of PCRE_ERROR_MATCHLIMIT we
|
|
- * only catch PCRE errors that can happen with our internal
|
|
- * version. If Privoxy is linked against a newer
|
|
- * PCRE version all bets are off ...
|
|
- */
|
|
default:
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf));
|
|
+#else
|
|
snprintf(buf, sizeof(buf),
|
|
"Error code %d. For details, check the pcre documentation.",
|
|
error);
|
|
+#endif
|
|
return buf;
|
|
}
|
|
}
|
|
@@ -149,7 +147,7 @@ const char *pcrs_strerror(const int erro
|
|
* Returns : option integer suitable for pcre
|
|
*
|
|
*********************************************************************/
|
|
-static int pcrs_parse_perl_options(const char *optstring, int *flags)
|
|
+static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags)
|
|
{
|
|
size_t i;
|
|
int rc = 0;
|
|
@@ -163,13 +161,13 @@ static int pcrs_parse_perl_options(const
|
|
{
|
|
case 'e': break; /* ToDo ;-) */
|
|
case 'g': *flags |= PCRS_GLOBAL; break;
|
|
- case 'i': rc |= PCRE_CASELESS; break;
|
|
- case 'm': rc |= PCRE_MULTILINE; break;
|
|
+ case 'i': rc |= PCREn(CASELESS); break;
|
|
+ case 'm': rc |= PCREn(MULTILINE); break;
|
|
case 'o': break;
|
|
- case 's': rc |= PCRE_DOTALL; break;
|
|
- case 'x': rc |= PCRE_EXTENDED; break;
|
|
+ case 's': rc |= PCREn(DOTALL); break;
|
|
+ case 'x': rc |= PCREn(EXTENDED); break;
|
|
case 'D': *flags |= PCRS_DYNAMIC; break;
|
|
- case 'U': rc |= PCRE_UNGREEDY; break;
|
|
+ case 'U': rc |= PCREn(UNGREEDY); break;
|
|
case 'T': *flags |= PCRS_TRIVIAL; break;
|
|
default: break;
|
|
}
|
|
@@ -471,7 +469,15 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
|
|
else
|
|
{
|
|
next = job->next;
|
|
- if (job->pattern != NULL) free(job->pattern);
|
|
+ if (job->pattern != NULL)
|
|
+ {
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_code_free(job->pattern);
|
|
+#else
|
|
+ free(job->pattern);
|
|
+#endif
|
|
+ }
|
|
+#ifndef HAVE_PCRE2
|
|
if (job->hints != NULL)
|
|
{
|
|
#ifdef PCRE_CONFIG_JIT
|
|
@@ -480,6 +486,7 @@ pcrs_job *pcrs_free_job(pcrs_job *job)
|
|
free(job->hints);
|
|
#endif
|
|
}
|
|
+#endif
|
|
if (job->substitute != NULL)
|
|
{
|
|
if (job->substitute->text != NULL) free(job->substitute->text);
|
|
@@ -626,10 +633,14 @@ pcrs_job *pcrs_compile_command(const cha
|
|
pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char *options, int *errptr)
|
|
{
|
|
pcrs_job *newjob;
|
|
- int flags;
|
|
+ unsigned int flags;
|
|
int capturecount;
|
|
- const char *error;
|
|
+#ifdef HAVE_PCRE2
|
|
+ int ret;
|
|
+#else
|
|
int pcre_study_options = 0;
|
|
+ const char *error;
|
|
+#endif
|
|
|
|
*errptr = 0;
|
|
|
|
@@ -661,25 +672,43 @@ pcrs_job *pcrs_compile(const char *patte
|
|
/*
|
|
* Compile the pattern
|
|
*/
|
|
+#ifdef HAVE_PCRE2
|
|
+ PCRE2_SIZE error_offset;
|
|
+ newjob->pattern = pcre2_compile((const unsigned char *)pattern,
|
|
+ PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr,
|
|
+ &error_offset, NULL);
|
|
+#else
|
|
newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL);
|
|
+#endif
|
|
if (newjob->pattern == NULL)
|
|
{
|
|
pcrs_free_job(newjob);
|
|
return NULL;
|
|
}
|
|
|
|
-
|
|
-#ifdef PCRE_STUDY_JIT_COMPILE
|
|
+#if defined(PCRE_STUDY_JIT_COMPILE) || defined(HAVE_PCRE2)
|
|
#ifdef DISABLE_PCRE_JIT_COMPILATION
|
|
#warning PCRE_STUDY_JIT_COMPILE is supported but Privoxy has been configured not to use it
|
|
#else
|
|
if (!(flags & PCRS_DYNAMIC))
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ /* Try to enable JIT compilation but continue if it's unsupported. */
|
|
+ if ((ret = pcre2_jit_compile(newjob->pattern, PCRE2_JIT_COMPLETE)) &&
|
|
+ (ret != PCRE2_ERROR_JIT_BADOPTION))
|
|
+ {
|
|
+ *errptr = ret;
|
|
+ pcrs_free_job(newjob);
|
|
+ return NULL;
|
|
+ }
|
|
+#else
|
|
pcre_study_options = PCRE_STUDY_JIT_COMPILE;
|
|
+#endif
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
+#ifndef HAVE_PCRE2
|
|
/*
|
|
* Generate hints. This has little overhead, since the
|
|
* hints will be NULL for a boring pattern anyway.
|
|
@@ -691,13 +720,17 @@ pcrs_job *pcrs_compile(const char *patte
|
|
pcrs_free_job(newjob);
|
|
return NULL;
|
|
}
|
|
-
|
|
+#endif
|
|
|
|
/*
|
|
* Determine the number of capturing subpatterns.
|
|
* This is needed for handling $+ in the substitute.
|
|
*/
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (0 > (*errptr = pcre2_pattern_info(newjob->pattern, PCRE2_INFO_CAPTURECOUNT, &capturecount)))
|
|
+#else
|
|
if (0 > (*errptr = pcre_fullinfo(newjob->pattern, newjob->hints, PCRE_INFO_CAPTURECOUNT, &capturecount)))
|
|
+#endif
|
|
{
|
|
pcrs_free_job(newjob);
|
|
return NULL;
|
|
@@ -809,14 +842,20 @@ int pcrs_execute_list(pcrs_job *joblist,
|
|
*********************************************************************/
|
|
int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length)
|
|
{
|
|
- int offsets[3 * PCRS_MAX_SUBMATCHES],
|
|
- offset,
|
|
+ int offset,
|
|
i, k,
|
|
matches_found,
|
|
submatches,
|
|
max_matches = PCRS_MAX_MATCH_INIT;
|
|
size_t newsize;
|
|
+#ifdef HAVE_PCRE2
|
|
pcrs_match *matches, *dummy;
|
|
+ pcre2_match_data *pcre2_matches;
|
|
+ size_t *offsets;
|
|
+#else
|
|
+ pcrs_match *matches, *dummy;
|
|
+ int offsets[3 * PCRS_MAX_SUBMATCHES];
|
|
+#endif
|
|
char *result_offset;
|
|
|
|
offset = i = 0;
|
|
@@ -830,27 +869,38 @@ int pcrs_execute(pcrs_job *job, const ch
|
|
return(PCRS_ERR_BADJOB);
|
|
}
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (NULL == (pcre2_matches = pcre2_match_data_create_from_pattern(job->pattern, NULL)))
|
|
+ {
|
|
+ return(PCRS_ERR_NOMEM);
|
|
+ }
|
|
+ offsets = pcre2_get_ovector_pointer(pcre2_matches);
|
|
+#endif
|
|
if (NULL == (matches = (pcrs_match *)malloc((size_t)max_matches * sizeof(pcrs_match))))
|
|
{
|
|
return(PCRS_ERR_NOMEM);
|
|
}
|
|
memset(matches, '\0', (size_t)max_matches * sizeof(pcrs_match));
|
|
|
|
-
|
|
/*
|
|
* Find the pattern and calculate the space
|
|
* requirements for the result
|
|
*/
|
|
newsize = subject_length;
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+ while ((submatches = pcre2_match(job->pattern, (const unsigned char *)subject,
|
|
+ subject_length, (size_t)offset, 0, pcre2_matches, NULL)) > 0)
|
|
+#else
|
|
while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0)
|
|
+#endif
|
|
{
|
|
job->flags |= PCRS_SUCCESS;
|
|
matches[i].submatches = submatches;
|
|
|
|
for (k = 0; k < submatches; k++)
|
|
{
|
|
- matches[i].submatch_offset[k] = offsets[2 * k];
|
|
+ matches[i].submatch_offset[k] = (int)offsets[2 * k];
|
|
|
|
/* Note: Non-found optional submatches have length -1-(-1)==0 */
|
|
matches[i].submatch_length[k] = (size_t)(offsets[2 * k + 1] - offsets[2 * k]);
|
|
@@ -867,7 +917,7 @@ int pcrs_execute(pcrs_job *job, const ch
|
|
newsize += (size_t)offsets[0] * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES];
|
|
|
|
/* chunk after match */
|
|
- matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = offsets[1];
|
|
+ matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = (int)offsets[1];
|
|
matches[i].submatch_length[PCRS_MAX_SUBMATCHES + 1] = subject_length - (size_t)offsets[1] - 1;
|
|
newsize += (subject_length - (size_t)offsets[1]) * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES + 1];
|
|
|
|
@@ -894,12 +944,19 @@ int pcrs_execute(pcrs_job *job, const ch
|
|
break;
|
|
/* Go find the next one */
|
|
else
|
|
- offset = offsets[1];
|
|
+ offset = (int)offsets[1];
|
|
}
|
|
/* Pass pcre error through if (bad) failure */
|
|
+#ifdef HAVE_PCRE2
|
|
+ if (submatches < PCRE2_ERROR_NOMATCH)
|
|
+#else
|
|
if (submatches < PCRE_ERROR_NOMATCH)
|
|
+#endif
|
|
{
|
|
free(matches);
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_match_data_free(pcre2_matches);
|
|
+#endif
|
|
return submatches;
|
|
}
|
|
matches_found = i;
|
|
@@ -909,9 +966,19 @@ int pcrs_execute(pcrs_job *job, const ch
|
|
* Get memory for the result (must be freed by caller!)
|
|
* and append terminating null byte.
|
|
*/
|
|
- if ((*result = (char *)malloc(newsize + 1)) == NULL)
|
|
+ if ((*result = (char *)malloc(newsize + 1
|
|
+#ifdef HAVE_PCRE2
|
|
+ /*
|
|
+ * Work around to prevent invalid reads in the jit code.
|
|
+ */
|
|
+ + 16
|
|
+#endif
|
|
+ )) == NULL)
|
|
{
|
|
free(matches);
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_match_data_free(pcre2_matches);
|
|
+#endif
|
|
return PCRS_ERR_NOMEM;
|
|
}
|
|
else
|
|
@@ -964,6 +1031,9 @@ int pcrs_execute(pcrs_job *job, const ch
|
|
memcpy(result_offset, subject + offset, subject_length - (size_t)offset);
|
|
|
|
*result_length = newsize;
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_match_data_free(pcre2_matches);
|
|
+#endif
|
|
free(matches);
|
|
return matches_found;
|
|
|
|
@@ -1101,7 +1171,7 @@ char pcrs_get_delimiter(const char *stri
|
|
*********************************************************************/
|
|
char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, int *hits)
|
|
{
|
|
- size_t size;
|
|
+ size_t buffer_size, new_size;
|
|
char *result = NULL;
|
|
pcrs_job *job;
|
|
|
|
@@ -1109,12 +1179,14 @@ char *pcrs_execute_single_command(const
|
|
assert(pcrs_command);
|
|
|
|
*hits = 0;
|
|
- size = strlen(subject);
|
|
+ buffer_size = strlen(subject);
|
|
|
|
job = pcrs_compile_command(pcrs_command, hits);
|
|
if (NULL != job)
|
|
{
|
|
- *hits = pcrs_execute(job, subject, size, &result, &size);
|
|
+ *hits = pcrs_execute(job, subject, buffer_size, &result, &new_size);
|
|
+ buffer_size = new_size;
|
|
+
|
|
if (*hits < 0)
|
|
{
|
|
freez(result);
|
|
--- a/pcrs.h
|
|
+++ b/pcrs.h
|
|
@@ -33,9 +33,18 @@
|
|
*********************************************************************/
|
|
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+#define PCRE2_CODE_UNIT_WIDTH 8
|
|
+#define PCREn(x) PCRE2_ ## x
|
|
+#ifndef _PCRE2_H
|
|
+#include <pcre2.h>
|
|
+#endif
|
|
+#else
|
|
+#define PCREn(x) PCRE_ ## x
|
|
#ifndef _PCRE_H
|
|
#include <pcre.h>
|
|
#endif
|
|
+#endif
|
|
|
|
/*
|
|
* Constants:
|
|
@@ -55,22 +64,23 @@
|
|
* They are supposed to be handled together with PCRE error
|
|
* codes and have to start with an offset to prevent overlaps.
|
|
*
|
|
- * PCRE 6.7 uses error codes from -1 to -21, PCRS error codes
|
|
- * below -100 should be safe for a while.
|
|
+ * PCRE 6.7 uses error codes from -1 to -21,
|
|
+ * PCRE2 10.42 uses error codes from -66 to 101.
|
|
+ * PCRS error codes below -300 should be safe for a while.
|
|
*/
|
|
-#define PCRS_ERR_NOMEM -100 /* Failed to acquire memory. */
|
|
-#define PCRS_ERR_CMDSYNTAX -101 /* Syntax of s///-command */
|
|
-#define PCRS_ERR_STUDY -102 /* pcre error while studying the pattern */
|
|
-#define PCRS_ERR_BADJOB -103 /* NULL job pointer, pattern or substitute */
|
|
-#define PCRS_WARN_BADREF -104 /* Backreference out of range */
|
|
-#define PCRS_WARN_TRUNCATION -105 /* At least one pcrs variable was too big,
|
|
+#define PCRS_ERR_NOMEM -300 /* Failed to acquire memory. */
|
|
+#define PCRS_ERR_CMDSYNTAX -301 /* Syntax of s///-command */
|
|
+#define PCRS_ERR_STUDY -302 /* pcre error while studying the pattern */
|
|
+#define PCRS_ERR_BADJOB -303 /* NULL job pointer, pattern or substitute */
|
|
+#define PCRS_WARN_BADREF -304 /* Backreference out of range */
|
|
+#define PCRS_WARN_TRUNCATION -305 /* At least one pcrs variable was too big,
|
|
* only the first part was used. */
|
|
|
|
/* Flags */
|
|
-#define PCRS_GLOBAL 1 /* Job should be applied globally, as with perl's g option */
|
|
-#define PCRS_TRIVIAL 2 /* Backreferences in the substitute are ignored */
|
|
-#define PCRS_SUCCESS 4 /* Job did previously match */
|
|
-#define PCRS_DYNAMIC 8 /* Job is dynamic (used to disable JIT compilation) */
|
|
+#define PCRS_GLOBAL 0x08000000u /* Job should be applied globally, as with perl's g option */
|
|
+#define PCRS_TRIVIAL 0x10000000u /* Backreferences in the substitute are ignored */
|
|
+#define PCRS_SUCCESS 0x20000000u /* Job did previously match */
|
|
+#define PCRS_DYNAMIC 0x40000000u /* Job is dynamic (used to disable JIT compilation) */
|
|
|
|
|
|
/*
|
|
@@ -107,10 +117,14 @@ typedef struct {
|
|
/* A PCRS job */
|
|
|
|
typedef struct PCRS_JOB {
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_code *pattern;
|
|
+#else
|
|
pcre *pattern; /* The compiled pcre pattern */
|
|
pcre_extra *hints; /* The pcre hints for the pattern */
|
|
+#endif
|
|
int options; /* The pcre options (numeric) */
|
|
- int flags; /* The pcrs and user flags (see "Flags" above) */
|
|
+ unsigned int flags; /* The pcrs and user flags (see "Flags" above) */
|
|
pcrs_substitute *substitute; /* The compiled pcrs substitute */
|
|
struct PCRS_JOB *next; /* Pointer for chaining jobs to joblists */
|
|
} pcrs_job;
|
|
--- a/project.h
|
|
+++ b/project.h
|
|
@@ -94,12 +94,38 @@
|
|
*/
|
|
|
|
#ifdef STATIC_PCRE
|
|
+#ifdef HAVE_PCRE2
|
|
+# include "pcre2.h"
|
|
+# include "pcre2posix.h"
|
|
+#else
|
|
# include "pcre.h"
|
|
+# include "pcreposix.h"
|
|
+#endif
|
|
#else
|
|
-# ifdef PCRE_H_IN_SUBDIR
|
|
-# include <pcre/pcre.h>
|
|
+# ifdef HAVE_PCRE2
|
|
+# ifdef PCRE2_H_IN_SUBDIR
|
|
+# define PCRE2_CODE_UNIT_WIDTH 8
|
|
+# include <pcre2/pcre2.h>
|
|
+# else
|
|
+# define PCRE2_CODE_UNIT_WIDTH 8
|
|
+# include <pcre2.h>
|
|
+# endif
|
|
+# ifdef PCRE2POSIX_H_IN_SUBDIR
|
|
+# include <pcre2/pcre2posix.h>
|
|
+# else
|
|
+# include <pcre2posix.h>
|
|
+# endif
|
|
# else
|
|
-# include <pcre.h>
|
|
+# ifdef PCRE_H_IN_SUBDIR
|
|
+# include <pcre/pcre.h>
|
|
+# else
|
|
+# include <pcre.h>
|
|
+# endif
|
|
+# ifdef PCREPOSIX_H_IN_SUBDIR
|
|
+# include <pcre/pcreposix.h>
|
|
+# else
|
|
+# include <pcreposix.h>
|
|
+# endif
|
|
# endif
|
|
#endif
|
|
|
|
@@ -109,16 +135,6 @@
|
|
# include <pcrs.h>
|
|
#endif
|
|
|
|
-#ifdef STATIC_PCRE
|
|
-# include "pcreposix.h"
|
|
-#else
|
|
-# ifdef PCRE_H_IN_SUBDIR
|
|
-# include <pcre/pcreposix.h>
|
|
-# else
|
|
-# include <pcreposix.h>
|
|
-# endif
|
|
-#endif
|
|
-
|
|
#ifdef _WIN32
|
|
/*
|
|
* I don't want to have to #include all this just for the declaration
|
|
@@ -404,10 +420,16 @@ struct http_response
|
|
enum crunch_reason crunch_reason; /**< Why the response was generated in the first place. */
|
|
};
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+#define REGEX_TYPE pcre2_code
|
|
+#else
|
|
+#define REGEX_TYPE regex_t
|
|
+#endif
|
|
+
|
|
struct url_spec
|
|
{
|
|
#ifdef FEATURE_PCRE_HOST_PATTERNS
|
|
- regex_t *host_regex;/**< Regex for host matching */
|
|
+ REGEX_TYPE *host_regex;/**< Regex for host matching */
|
|
enum host_regex_type { VANILLA_HOST_PATTERN, PCRE_HOST_PATTERN } host_regex_type;
|
|
#endif /* defined FEATURE_PCRE_HOST_PATTERNS */
|
|
int dcount; /**< How many parts to this domain? (length of dvec) */
|
|
@@ -417,7 +439,7 @@ struct url_spec
|
|
|
|
char *port_list; /**< List of acceptable ports, or NULL to match all ports */
|
|
|
|
- regex_t *preg; /**< Regex for matching path part */
|
|
+ REGEX_TYPE *preg; /**< Regex for matching path part */
|
|
};
|
|
|
|
/**
|
|
@@ -432,7 +454,7 @@ struct pattern_spec
|
|
union
|
|
{
|
|
struct url_spec url_spec;
|
|
- regex_t *tag_regex;
|
|
+ REGEX_TYPE *tag_regex;
|
|
} pattern;
|
|
|
|
unsigned int flags; /**< Bitmap with various pattern properties. */
|
|
--- a/templates/show-status
|
|
+++ b/templates/show-status
|
|
@@ -298,10 +298,7 @@
|
|
<tr>
|
|
<td><code>FEATURE_DYNAMIC_PCRE</code></td>
|
|
<td>@if-FEATURE_DYNAMIC_PCRE-then@ Yes @else-not-FEATURE_DYNAMIC_PCRE@ No @endif-FEATURE_DYNAMIC_PCRE@</td>
|
|
- <td>Dynamically link to the PCRE library. This is set automatically
|
|
- by <code>./configure</code> if you do not have libpcre installed.
|
|
- Dynamically linking to an external libpcre is recommended as the one that is distributed
|
|
- with Privoxy itself is outdated and lacks various features and bug-fixes you may be interested in.</td>
|
|
+ <td>Dynamically link to the PCRE(2) library (recommended).</td>
|
|
</tr>
|
|
<tr>
|
|
<td><code>FEATURE_EXTENDED_STATISTICS</code></td>
|
|
--- a/urlmatch.c
|
|
+++ b/urlmatch.c
|
|
@@ -604,6 +604,100 @@ jb_err parse_http_request(const char *re
|
|
}
|
|
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+/*********************************************************************
|
|
+ *
|
|
+ * Function : compile_pattern
|
|
+ *
|
|
+ * Description : Compiles a host, domain or TAG pattern.
|
|
+ *
|
|
+ * Parameters :
|
|
+ * 1 : pattern = The pattern to compile.
|
|
+ * 2 : anchoring = How the regex should be modified
|
|
+ * before compilation. Can be either
|
|
+ * one of NO_ANCHORING, LEFT_ANCHORED,
|
|
+ * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST.
|
|
+ * 3 : url = In case of failures, the spec member is
|
|
+ * logged and the structure freed.
|
|
+ * 4 : regex = Where the compiled regex should be stored.
|
|
+ *
|
|
+ * Returns : JB_ERR_OK - Success
|
|
+ * JB_ERR_PARSE - Cannot parse regex
|
|
+ *
|
|
+ *********************************************************************/
|
|
+static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring,
|
|
+ struct pattern_spec *url, pcre2_code **regex)
|
|
+{
|
|
+ int errcode;
|
|
+ const char *fmt = NULL;
|
|
+ char *rebuf;
|
|
+ size_t rebuf_size;
|
|
+ PCRE2_SIZE error_offset;
|
|
+ int ret;
|
|
+
|
|
+ assert(pattern);
|
|
+
|
|
+ if (pattern[0] == '\0')
|
|
+ {
|
|
+ *regex = NULL;
|
|
+ return JB_ERR_OK;
|
|
+ }
|
|
+
|
|
+ switch (anchoring)
|
|
+ {
|
|
+ case NO_ANCHORING:
|
|
+ fmt = "%s";
|
|
+ break;
|
|
+ case RIGHT_ANCHORED:
|
|
+ fmt = "%s$";
|
|
+ break;
|
|
+ case RIGHT_ANCHORED_HOST:
|
|
+ fmt = "%s\\.?$";
|
|
+ break;
|
|
+ case LEFT_ANCHORED:
|
|
+ fmt = "^%s";
|
|
+ break;
|
|
+ default:
|
|
+ log_error(LOG_LEVEL_FATAL,
|
|
+ "Invalid anchoring in compile_pattern %d", anchoring);
|
|
+ }
|
|
+ rebuf_size = strlen(pattern) + strlen(fmt);
|
|
+ rebuf = malloc_or_die(rebuf_size);
|
|
+
|
|
+ snprintf(rebuf, rebuf_size, fmt, pattern);
|
|
+
|
|
+ *regex = pcre2_compile((const unsigned char *)pattern,
|
|
+ PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode,
|
|
+ &error_offset, NULL);
|
|
+ if (*regex == NULL)
|
|
+ {
|
|
+ log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s",
|
|
+ pattern, url->spec, rebuf);
|
|
+ freez(rebuf);
|
|
+
|
|
+ return JB_ERR_PARSE;
|
|
+ }
|
|
+
|
|
+#ifndef DISABLE_PCRE_JIT_COMPILATION
|
|
+ /* Try to enable JIT compilation but continue if it's unsupported. */
|
|
+ if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) &&
|
|
+ (ret != PCRE2_ERROR_JIT_BADOPTION))
|
|
+ {
|
|
+ log_error(LOG_LEVEL_ERROR,
|
|
+ "Unexpected error enabling JIT compilation for %s from %s: %s",
|
|
+ pattern, url->spec, rebuf);
|
|
+ freez(rebuf);
|
|
+
|
|
+ return JB_ERR_PARSE;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ freez(rebuf);
|
|
+
|
|
+ return JB_ERR_OK;
|
|
+
|
|
+}
|
|
+#else
|
|
/*********************************************************************
|
|
*
|
|
* Function : compile_pattern
|
|
@@ -686,6 +780,7 @@ static jb_err compile_pattern(const char
|
|
return JB_ERR_OK;
|
|
|
|
}
|
|
+#endif
|
|
|
|
|
|
/*********************************************************************
|
|
@@ -1051,6 +1146,49 @@ static int simplematch(const char *patte
|
|
}
|
|
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+/*********************************************************************
|
|
+ *
|
|
+ * Function : pcre2_pattern_matches
|
|
+ *
|
|
+ * Description : Checks if a compiled pcre2 pattern matches a string.
|
|
+ *
|
|
+ * Parameters :
|
|
+ * 1 : pattern = The compiled pattern
|
|
+ * 2 : string = The string to check
|
|
+ *
|
|
+ * Returns : TRUE for yes, FALSE otherwise.
|
|
+ *
|
|
+ *********************************************************************/
|
|
+int pcre2_pattern_matches(const pcre2_code *pattern, const char *string)
|
|
+{
|
|
+ PCRE2_SIZE offset;
|
|
+ int ret;
|
|
+ pcre2_match_data *pcre2_matches;
|
|
+
|
|
+ assert(pattern != NULL);
|
|
+ assert(string != NULL);
|
|
+
|
|
+ offset = 0;
|
|
+
|
|
+ pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL);
|
|
+ if (NULL == pcre2_matches)
|
|
+ {
|
|
+ log_error(LOG_LEVEL_ERROR,
|
|
+ "Out of memory while matching pattern against %s", string);
|
|
+ return FALSE;
|
|
+ }
|
|
+
|
|
+ ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string),
|
|
+ offset, 0, pcre2_matches, NULL);
|
|
+
|
|
+ pcre2_match_data_free(pcre2_matches);
|
|
+
|
|
+ return (ret >= 0);
|
|
+}
|
|
+#endif
|
|
+
|
|
+
|
|
/*********************************************************************
|
|
*
|
|
* Function : simple_domaincmp
|
|
@@ -1268,8 +1406,12 @@ void free_pattern_spec(struct pattern_sp
|
|
{
|
|
if (pattern->pattern.tag_regex)
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_code_free(pattern->pattern.tag_regex);
|
|
+#else
|
|
regfree(pattern->pattern.tag_regex);
|
|
freez(pattern->pattern.tag_regex);
|
|
+#endif
|
|
}
|
|
return;
|
|
}
|
|
@@ -1277,8 +1419,12 @@ void free_pattern_spec(struct pattern_sp
|
|
#ifdef FEATURE_PCRE_HOST_PATTERNS
|
|
if (pattern->pattern.url_spec.host_regex)
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_code_free(pattern->pattern.url_spec.host_regex);
|
|
+#else
|
|
regfree(pattern->pattern.url_spec.host_regex);
|
|
freez(pattern->pattern.url_spec.host_regex);
|
|
+#endif
|
|
}
|
|
#endif /* def FEATURE_PCRE_HOST_PATTERNS */
|
|
freez(pattern->pattern.url_spec.dbuffer);
|
|
@@ -1287,8 +1433,12 @@ void free_pattern_spec(struct pattern_sp
|
|
freez(pattern->pattern.url_spec.port_list);
|
|
if (pattern->pattern.url_spec.preg)
|
|
{
|
|
+#ifdef HAVE_PCRE2
|
|
+ pcre2_code_free(pattern->pattern.url_spec.preg);
|
|
+#else
|
|
regfree(pattern->pattern.url_spec.preg);
|
|
freez(pattern->pattern.url_spec.preg);
|
|
+#endif
|
|
}
|
|
}
|
|
|
|
@@ -1333,8 +1483,13 @@ static int host_matches(const struct htt
|
|
if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN)
|
|
{
|
|
return ((NULL == pattern->pattern.url_spec.host_regex)
|
|
+#ifdef HAVE_PCRE2
|
|
+ || pcre2_pattern_matches(pattern->pattern.url_spec.host_regex,
|
|
+ http->host));
|
|
+#else
|
|
|| (0 == regexec(pattern->pattern.url_spec.host_regex,
|
|
http->host, 0, NULL, 0)));
|
|
+#endif
|
|
}
|
|
#endif
|
|
return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http)));
|
|
@@ -1357,7 +1512,11 @@ static int host_matches(const struct htt
|
|
static int path_matches(const char *path, const struct pattern_spec *pattern)
|
|
{
|
|
return ((NULL == pattern->pattern.url_spec.preg)
|
|
+#ifdef HAVE_PCRE2
|
|
+ || (pcre2_pattern_matches(pattern->pattern.url_spec.preg, path)));
|
|
+#else
|
|
|| (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0)));
|
|
+#endif
|
|
}
|
|
|
|
|
|
--- a/urlmatch.h
|
|
+++ b/urlmatch.h
|
|
@@ -50,6 +50,10 @@ extern int url_requires_percent_encoding
|
|
extern int url_match(const struct pattern_spec *pattern,
|
|
const struct http_request *http);
|
|
|
|
+#ifdef HAVE_PCRE2
|
|
+extern int pcre2_pattern_matches(const pcre2_code *pattern, const char *string);
|
|
+#endif
|
|
+
|
|
extern jb_err create_pattern_spec(struct pattern_spec *url, char *buf);
|
|
extern void free_pattern_spec(struct pattern_spec *url);
|
|
extern int match_portlist(const char *portlist, int port);
|
|
--- a/w32log.c
|
|
+++ b/w32log.c
|
|
@@ -316,6 +316,9 @@ void TermLogWindow(void)
|
|
void LogCreatePatternMatchingBuffers(void)
|
|
{
|
|
int i;
|
|
+#ifdef HAVE_PCRE2
|
|
+#warning The win32 build of Privoxy is expected to crash when compiled with pcre2 support.
|
|
+#endif
|
|
for (i = 0; patterns_to_highlight[i].str != NULL; i++)
|
|
{
|
|
regcomp(&patterns_to_highlight[i].buffer, patterns_to_highlight[i].str, REG_ICASE);
|