From fa2c55d34737c3b2b115b2b0e0f931c4eef06886 Mon Sep 17 00:00:00 2001 From: Dryusdan Date: Thu, 23 Jan 2025 14:32:03 +0100 Subject: [PATCH 1/2] Remove cgi usage --- setup.py | 3 --- src/metadata_parser/__init__.py | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 8862633..47008eb 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,6 @@ if sys.version_info.major == 2: requires.append("backports.html") -if sys.version_info >= (3, 13): - requires.append("legacy-cgi") - tests_require = [ "httpbin", "pytest", diff --git a/src/metadata_parser/__init__.py b/src/metadata_parser/__init__.py index fd97e05..da62cee 100644 --- a/src/metadata_parser/__init__.py +++ b/src/metadata_parser/__init__.py @@ -1,7 +1,6 @@ import _socket # noqa: I100,I201 # peername hack, see below # stdlib -import cgi # noqa: I100,I201 import collections import datetime from html import unescape as html_unescape @@ -46,7 +45,7 @@ # ============================================================================== -__VERSION__ = "0.12.2" +__VERSION__ = "0.12.3" # ------------------------------------------------------------------------------ @@ -313,12 +312,13 @@ def get_encoding_from_headers(headers: CaseInsensitiveDict) -> Optional[str]: content_type = headers.get("content-type") if not content_type: return None - content_type, params = cgi.parse_header(content_type) - if "charset" in params: - return params["charset"].strip("'\"") + if not "charset" in content_type: + return None + for param in content_type.replace(" ", "").split(";"): + if "charset=" in param: + return param.split("=")[-1] return None - # ------------------------------------------------------------------------------ From 126ce0711046a9c4fb6315cfda0c2bfa66698e8f Mon Sep 17 00:00:00 2001 From: Dryusdan Date: Thu, 23 Jan 2025 14:40:42 +0100 Subject: [PATCH 2/2] Run ruff (code linter) --- src/metadata_parser/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metadata_parser/__init__.py b/src/metadata_parser/__init__.py index da62cee..185b961 100644 --- a/src/metadata_parser/__init__.py +++ b/src/metadata_parser/__init__.py @@ -319,6 +319,7 @@ def get_encoding_from_headers(headers: CaseInsensitiveDict) -> Optional[str]: return param.split("=")[-1] return None + # ------------------------------------------------------------------------------ @@ -1948,8 +1949,7 @@ def _run_in_session(_requests_session: requests.Session): ) log.error("NotParsable | %s", self.url) raise NotParsable( - "NotParseable document detected! " - "content-type:'[%s]" % content_type, + "NotParseable document detected! content-type:'[%s]" % content_type, metadataParser=self, )