From 2f703afe5e5f0542d89089be8e7047b8bd2d560c Mon Sep 17 00:00:00 2001 From: David Cain Date: Mon, 1 Aug 2022 14:45:37 -0700 Subject: [PATCH] Fix invalid escape sequences in regex strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary ======= This commit fixes deprecation warnings that arise from using backslashes in strings, but *not* as part of an escape sequence. It will help this library be used with newer versions of Python. String literals do not change (for current versions of Python) ============================================================== ```python >>> r'[\[\]]' == '[\[\]]' True ``` Examples ======== ```bash $ python -Wd -c 'print("\d")' DeprecationWarning: invalid escape sequence \d $ python -W error -c 'print("\d")' SyntaxError: invalid escape sequence \d ``` Explanation =========== For an explanation of the problem (and the recommended solution), see: https://docs.python.org/3/library/re.html > Also, please note that any invalid escape sequences in Python’s usage > of the backslash in string literals now generate a DeprecationWarning > and in the future this will become a SyntaxError. This behaviour will > happen even if it is a valid escape sequence for a regular expression. > > The solution is to use Python’s raw string notation for regular > expression patterns; backslashes are not handled in any special way in > a string literal prefixed with 'r'. How to keep these errors from source code ========================================= I didn't make any proposed changes in this commit, but there are a few ways to make sure that *new* invalid escape sequences are not used: - Use a linter! - `pylint` has `anomalous-backslash-in-string` - `flake8` has `W605` - other linters work too! - Escalate deprecation warnings to full errors at test time (e.g. `error:invalid escape sequence:DeprecationWarning` with `filterwarnings` will change these warnings to errors) --- vcf/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf/parser.py b/vcf/parser.py index c3c3d082..e454ea41 100644 --- a/vcf/parser.py +++ b/vcf/parser.py @@ -275,7 +275,7 @@ def __init__(self, fsock=None, filename=None, compressed=None, prepend_chr=False self._separator = '\t| +' self._row_pattern = re.compile(self._separator) - self._alt_pattern = re.compile('[\[\]]') + self._alt_pattern = re.compile(r'[\[\]]') self.reader = (line.strip() for line in self._reader if line.strip()) @@ -533,7 +533,7 @@ def _parse_alt(self, str): withinMainAssembly = True pos = remoteCoords[1] orientation = (str[0] == '[' or str[0] == ']') - remoteOrientation = (re.search('\[', str) is not None) + remoteOrientation = (re.search(r'\[', str) is not None) if orientation: connectingSequence = items[2] else: