From 66a1bb320deca30a7000e69e549439c6e0c02d39 Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Sun, 22 Nov 2020 09:03:31 -0300 Subject: [PATCH] [Enhancement] Optimized fast_path_match() --- skywalking/config.py | 2 +- skywalking/plugins/__init__.py | 10 ++-- skywalking/utils/ant_matcher.py | 87 +++++++-------------------------- 3 files changed, 24 insertions(+), 75 deletions(-) diff --git a/skywalking/config.py b/skywalking/config.py index 35540b12..339747bf 100644 --- a/skywalking/config.py +++ b/skywalking/config.py @@ -46,7 +46,7 @@ correlation_value_max_length = int(os.getenv('SW_CORRELATION_VALUE_MAX_LENGTH') or '128') # type: int trace_ignore = True if os.getenv('SW_TRACE_IGNORE') and \ os.getenv('SW_TRACE_IGNORE') == 'True' else False # type: bool -trace_ignore_path = (os.getenv('SW_TRACE_IGNORE_PATH') or '').split(',') # type: List[str] +trace_ignore_path = [s.strip() for s in (os.getenv('SW_TRACE_IGNORE_PATH') or '').split(',')] # type: List[str] elasticsearch_trace_dsl = True if os.getenv('SW_ELASTICSEARCH_TRACE_DSL') and \ os.getenv('SW_ELASTICSEARCH_TRACE_DSL') == 'True' else False # type: bool kafka_bootstrap_servers = os.getenv('SW_KAFKA_REPORTER_BOOTSTRAP_SERVERS') or "localhost:9092" # type: str diff --git a/skywalking/plugins/__init__.py b/skywalking/plugins/__init__.py index 217ef816..01d297f1 100644 --- a/skywalking/plugins/__init__.py +++ b/skywalking/plugins/__init__.py @@ -32,12 +32,12 @@ def install(): + disable_patterns = config.disable_plugins + if isinstance(disable_patterns, str): + disable_patterns = [re.compile(p.strip()) for p in disable_patterns.split(',') if p.strip()] + else: + disable_patterns = [re.compile(p.strip()) for p in disable_patterns if p.strip()] for importer, modname, ispkg in pkgutil.iter_modules(skywalking.plugins.__path__): - disable_patterns = config.disable_plugins - if isinstance(disable_patterns, str): - disable_patterns = [re.compile(p.strip()) for p in disable_patterns.split(',') if p.strip()] - else: - disable_patterns = [re.compile(p.strip()) for p in disable_patterns if p.strip()] if any(pattern.match(modname) for pattern in disable_patterns): logger.info('plugin %s is disabled and thus won\'t be installed', modname) continue diff --git a/skywalking/utils/ant_matcher.py b/skywalking/utils/ant_matcher.py index 41e004ae..382a1a5e 100644 --- a/skywalking/utils/ant_matcher.py +++ b/skywalking/utils/ant_matcher.py @@ -16,75 +16,24 @@ # -def fast_path_match(pattern: str, path: str): - return normal_match(pattern, 0, path, 0) - - -def normal_match(pat: str, p: int, var: str, s: int) -> bool: - while p < len(pat): - pc = pat[p] - sc = safe_char_at(var, s) - - if pc == '*': - p += 1 - - if safe_char_at(pat, p) == '*': - p += 1 - - return multi_wildcard_match(pat, p, var, s) - else: - return wildcard_match(pat, p, var, s) - - if (pc == '?' and sc != '0' and sc != '/') or pc == sc: - s += 1 - p += 1 - continue - - return False - - return s == len(var) - - -def wildcard_match(pat: str, p: int, var: str, s: int) -> bool: - pc = safe_char_at(pat, p) - - while True: - sc = safe_char_at(var, s) +import re - if sc == '/': +reesc = re.compile(r'([.*+?^=!:${}()|\[\]\\])') +recache = {} - if pc == sc: - return normal_match(pat, p + 1, var, s + 1) - return False - - if normal_match(pat, p, var, s) is False: - if s >= len(var): - return False - - s += 1 - continue - - return True - - -def multi_wildcard_match(pat: str, p: int, var: str, s: int) -> bool: - if p >= len(pat) and s < len(var): - return var[len(var) - 1] != '/' - - while True: - if not normal_match(pat, p, var, s): - if s >= len(var): - return False - - s += 1 - continue - - return True - - -def safe_char_at(value: str, index: int) -> str: - if index >= len(value): - return '0' - - return value[index] +def fast_path_match(pattern: str, path: str): + repat = recache.get(pattern) + + if repat is None: + repat = recache[pattern] = \ + re.compile('^(?:' + # this could handle multiple patterns in one by joining with '|' + '(?:(?:[^/]+/)*[^/]+)?'.join( # replaces "**" + '[^/]*'.join( # replaces "*" + '[^/]'.join( # replaces "?" + reesc.sub(r'\\\1', s) for s in p2.split('?') + ) for p2 in p1.split('*') + ) for p1 in pattern.split('**') + ) + ')$') + + return bool(repat.match(path))