charset detection: ignore case when searching in HTML (#4785)

2024-11-22 07:08:10 +00:00 · 2021-08-31 11:13:28 +02:00 · 2021-08-31 11:13:28 +02:00 · 4e5a0ae71d
commit 4e5a0ae71d
parent d5bba9878b
1 changed files with 2 additions and 2 deletions
--- a/mitmproxy/http.py
+++ b/mitmproxy/http.py
@ -408,13 +408,13 @@ class Message(serializable.Serializable):
            if "json" in self.headers.get("content-type", ""):
                enc = "utf8"
        if not enc:
-            meta_charset = re.search(rb"""<meta[^>]+charset=['"]?([^'">]+)""", content)
+            meta_charset = re.search(rb"""<meta[^>]+charset=['"]?([^'">]+)""", content, re.IGNORECASE)
            if meta_charset:
                enc = meta_charset.group(1).decode("ascii", "ignore")
        if not enc:
            if "text/css" in self.headers.get("content-type", ""):
                # @charset rule must be the very first thing.
-                css_charset = re.match(rb"""@charset "([^"]+)";""", content)
+                css_charset = re.match(rb"""@charset "([^"]+)";""", content, re.IGNORECASE)
                if css_charset:
                    enc = css_charset.group(1).decode("ascii", "ignore")
        if not enc: