diff --git a/.gitignore b/.gitignore index 015c6bc..18412f5 100644 --- a/.gitignore +++ b/.gitignore @@ -160,4 +160,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ -tls_requests/bin/*xgo* +tls_requests/bin/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a8b289..6f2073b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,9 @@ exclude: '^docs.sh/|scripts/' default_stages: [pre-commit] -default_language_version: - python: python3.10 - repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -20,14 +17,21 @@ repos: - id: check-docstring-first - id: detect-private-key + # run the autoflake. + - repo: https://github.com/PyCQA/autoflake + rev: v2.3.1 + hooks: + - id: autoflake + args: [--remove-all-unused-imports, --in-place, --ignore-init-module-imports] + # run the isort. - repo: https://github.com/PyCQA/isort - rev: 5.13.2 + rev: 6.1.0 hooks: - id: isort # run the flake8. - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 + rev: 7.3.0 hooks: - id: flake8 diff --git a/Makefile b/Makefile index 94544a0..eb06efe 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,12 @@ init-actions: python -m pip install --upgrade pip python -m pip install -r requirements-dev.txt + python -m autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports . python -m black tls_requests python -m isort tls_requests python -m flake8 tls_requests + test: tox -p rm -rf *.egg-info diff --git a/README.md b/README.md index 3919d32..1411012 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,31 @@ Start using TLS Requests with just a few lines of code: 200 ``` +Basic automatically rotates: + +```pycon +>>> import tls_requests +>>> proxy_list = [ + "http://user1:pass1@proxy.example.com:8080", + "http://user2:pass2@proxy.example.com:8081", + "socks5://proxy.example.com:8082", + "proxy.example.com:8083", # (defaults to http) + "http://user:pass@proxy.example.com:8084|1.0|US", # http://user:pass@host:port|weight|region +] +>>> r = tls_requests.get( + "https://httpbin.org/get", + proxy=proxy, + headers=tls_requests.HeaderRotator(), + tls_identifier=tls_requests.TLSIdentifierRotator() +) +>>> r + +>>> r.status_code +200 +>>> tls_requests.HeaderRotator(strategy = "round_robin") # strategy: Literal["round_robin", "random", "weighted"] +>>> tls_requests.Proxy("http://user1:pass1@proxy.example.com:8080", weight=0.1) # default weight: 1.0 +``` + **Introduction** ---------------- diff --git a/docs/advanced/rotators.md b/docs/advanced/rotators.md new file mode 100644 index 0000000..3f14e98 --- /dev/null +++ b/docs/advanced/rotators.md @@ -0,0 +1,145 @@ +# Using Rotators + +The `tls_requests` library is designed to be smart out of the box. By default, it automatically rotates through realistic headers and client identifiers to make your requests appear authentic and avoid detection. + +This guide explains how these default rotators work and how you can customize or disable them. + +* * * + +### Header Rotator + +**Default Behavior: Automatic Rotation** + +When you initialize a `Client` without specifying the `headers` parameter, it will **automatically rotate** through a built-in collection of header templates that mimic popular browsers like Chrome, Firefox, and Safari across different operating systems. + +```python +import tls_requests + +# No extra configuration needed! +# This client will automatically use a different, realistic header set for each request. +with tls_requests.Client(headers=tls_requests.HeaderRotator()) as client: + # Request 1 might have Chrome headers + res1 = client.get("https://httpbin.org/headers") + print(f"Request 1 UA: {res1.json()['headers']['User-Agent']}") + + # Request 2 might have Firefox headers + res2 = client.get("https://httpbin.org/headers") + print(f"Request 2 UA: {res2.json()['headers']['User-Agent']}") +``` + +**How to Override the Default Behavior:** + +- **To rotate through your own list of headers**, pass a `list` of `dict`s: + ```python + my_headers = [{"User-Agent": "MyBot/1.0"}, {"User-Agent": "MyBot/2.0"}] + client = tls_requests.Client(headers=my_headers) + ``` + +- **To use a single, static set of headers (no rotation)**, pass a single `dict`: + ```python + static_headers = {"User-Agent": "Always-The-Same-Bot/1.0"} + client = tls_requests.Client(headers=static_headers) + ``` + +- **To completely disable default headers**, pass `None`: + ```python + # This client will not add any default headers (like User-Agent). + client = tls_requests.Client(headers=None) + ``` + +* * * + +### TLS Client Identifier Rotator + +**Default Behavior: Automatic Rotation** + +Similar to headers, the `Client` **defaults to rotating** through all supported client identifier profiles (e.g., `chrome_120`, `firefox_120`, `safari_16_0`, etc.). This changes your TLS fingerprint with every request, an advanced technique to evade sophisticated anti-bot systems. + +```python +import tls_requests + +# This client automatically changes its TLS fingerprint for each request. +with tls_requests.Client(client_identifier=tls_requests.TLSIdentifierRotator()) as client: + # These two requests will have different TLS profiles. + res1 = client.get("https://tls.browserleaks.com/json") + res2 = client.get("https://tls.browserleaks.com/json") +``` + +**How to Override the Default Behavior:** + +- **To rotate through a specific list of identifiers**, pass a `list` of strings: + ```python + my_identifiers = ["chrome_120", "safari_16_0"] + client = tls_requests.Client(client_identifier=my_identifiers) + ``` + +- **To use a single, static identifier**, pass a string: + ```python + client = tls_requests.Client(client_identifier="chrome_120") + ``` +- **To disable rotation and use the library's single default identifier**, pass `None`: + ```python + client = tls_requests.Client(client_identifier=None) + ``` + +* * * + +### Proxy Rotator + +Unlike headers and client identifiers, proxy rotation is **not enabled by default**, as the library cannot provide a list of free proxies. You must provide your own list to enable this feature. + +To enable proxy rotation, pass a list of proxy strings to the `proxy` parameter. The library will automatically use a `weighted` strategy, prioritizing proxies that perform well. + +```python +import tls_requests + +proxy_list = [ + "http://user1:pass1@proxy.example.com:8080", + "http://user2:pass2@proxy.example.com:8081", + "socks5://proxy.example.com:8082", + "proxy.example.com:8083", # (defaults to http) + "http://user:pass@proxy.example.com:8084|1.0|US", # http://user:pass@host:port|weight|region +] + +# Provide a list to enable proxy rotation. +with tls_requests.Client(proxy=proxy_list) as client: + response = client.get("https://httpbin.org/get") +``` + +For more control, you can create a `ProxyRotator` instance with a specific strategy: + +```python +from tls_requests.models.rotators import ProxyRotator + +rotator = ProxyRotator.from_file(proxy_list, strategy="round_robin") + +with tls_requests.Client(proxy=rotator) as client: + response = client.get("https://httpbin.org/get") +``` + +> **Note:** The `Client` automatically provides performance feedback (success/failure, latency) to the `ProxyRotator`, making the `weighted` strategy highly effective. + +* * * + +### Asynchronous Support + +All rotator features, including the smart defaults, work identically with `AsyncClient`. + +```python +import tls_requests +import asyncio + +async def main(): + # This async client automatically uses default header and identifier rotation. + async with tls_requests.AsyncClient( + headers=tls_requests.HeaderRotator(), + client_identifier=tls_requests.TLSIdentifierRotator() + ) as client: + tasks = [client.get("https://httpbin.org/get") for _ in range(2)] + responses = await asyncio.gather(*tasks) + + for i, r in enumerate(responses): + print(f"Async Request {i+1} status: {r.status_code}") + +asyncio.run(main()) +``` diff --git a/mkdocs.yml b/mkdocs.yml index a07540f..80dcce4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -30,6 +30,7 @@ nav: - Authentication: 'advanced/authentication.md' - Hooks: 'advanced/hooks.md' - Proxies: 'advanced/proxies.md' + - Rotators: 'advanced/rotators.md' - TLS Client: - Install: 'tls/install.md' - Wrapper TLS Client: 'tls/index.md' diff --git a/pyproject.toml b/pyproject.toml index d2839f6..2c10617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,47 @@ build-backend = 'setuptools.build_meta' [tool.pytest.ini_options] asyncio_mode = "auto" + +[tool.black] +line-length = 120 +target-version = ['py38', 'py39', 'py310', 'py311', 'py312'] +unstable = true +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist +)/ +''' + + +[tool.flake8] +max-line-length = 120 +max-complexity = 10 +extend-ignore = [ + "E203", # Whitespace before ':', which black handles differently than flake8. + "W503", # Line break before binary operator, black's preferred style. +] + +# Comma-separated list of directories to exclude from linting. +exclude = [ + ".git", + "__pycache__", + "docs/source/conf.py", + "old", + "build", + "dist", + ".venv", +] + +# Per-file ignores are very useful for specific cases. +# For example, __init__.py files often have unused imports on purpose. +per-file-ignores = [ + "__init__.py:F401", # Ignore "unused import" in __init__.py files +] diff --git a/requirements-dev.txt b/requirements-dev.txt index 0c95c2a..58b5f04 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,6 +17,7 @@ black==24.3.0 coverage[toml]==7.6.1 isort==5.13.2 flake8==7.1.1 +autoflake==2.3.1 mypy==1.11.2 pytest==8.3.3 pytest-asyncio==0.24.0 diff --git a/requirements.txt b/requirements.txt index 7a91425..4077289 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,3 @@ # Base chardet~=5.2.0 -requests~=2.32.3 -tqdm~=4.67.1 idna~=3.10 diff --git a/setup.cfg b/setup.cfg index 4128f96..1b47838 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,8 +6,6 @@ license_file = LICENSE python_requires = >=3.8 install_requires = chardet ~= 5.2.0 - requests ~= 2.32.3 - tqdm ~= 4.67.1 idna ~= 3.10 classifiers = Development Status :: 5 - Production/Stable diff --git a/tests/test_headers.py b/tests/test_headers.py index 49f764c..38d27c2 100644 --- a/tests/test_headers.py +++ b/tests/test_headers.py @@ -23,18 +23,18 @@ def test_request_headers(httpserver: HTTPServer): httpserver.expect_request("/headers").with_post_hook(hook_request_headers).respond_with_data(b"OK") response = tls_requests.get(httpserver.url_for("/headers"), headers={"foo": "bar"}) assert response.status_code == 200 - assert response.headers.get("foo") == "bar" + assert response.request.headers["foo"] == "bar" def test_response_headers(httpserver: HTTPServer): httpserver.expect_request("/headers").with_post_hook(hook_response_headers).respond_with_data(b"OK") response = tls_requests.get(httpserver.url_for("/headers")) assert response.status_code, 200 - assert response.headers.get("foo") == "bar" + assert response.headers["foo"] == "bar" def test_response_case_insensitive_headers(httpserver: HTTPServer): httpserver.expect_request("/headers").with_post_hook(hook_response_case_insensitive_headers).respond_with_data(b"OK") response = tls_requests.get(httpserver.url_for("/headers")) assert response.status_code, 200 - assert response.headers.get("foo") == "bar" + assert response.headers["foo"] == "bar" diff --git a/tests/test_rotators.py b/tests/test_rotators.py new file mode 100644 index 0000000..7244faa --- /dev/null +++ b/tests/test_rotators.py @@ -0,0 +1,262 @@ +import itertools +import json +from collections import Counter +from pathlib import Path + +import pytest + +from tls_requests.models.headers import Headers +from tls_requests.models.rotators import (HeaderRotator, ProxyRotator, + TLSIdentifierRotator) +from tls_requests.models.urls import Proxy + + +@pytest.fixture +def proxy_list_fixture(): + return ["proxy1:8000", "proxy2:8000", "proxy3:8000"] + + +@pytest.fixture +def proxy_txt_file_fixture(tmp_path: Path): + content = """ + # This is a comment, should be skipped + proxy1:8000 + proxy2:8000 + + proxy3:8000|2.5|us-east # proxy with weight and region + """ + file_path = tmp_path / "proxies.txt" + file_path.write_text(content) + return file_path + + +@pytest.fixture +def proxy_json_file_fixture(tmp_path: Path): + data = [ + {"url": "http://proxy1:8000", "weight": 1.0, "region": "eu"}, + {"url": "http://proxy2:8000", "weight": 3.0, "region": "us"}, + ] + file_path = tmp_path / "proxies.json" + file_path.write_text(json.dumps(data)) + return file_path + + +@pytest.fixture +def header_list_fixture(): + return [ + { + "Accept": "application/json", + "User-Agent": "Test-UA-1", + }, + { + "Accept": "text/html", + "User-Agent": "Test-UA-2", + }, + ] + + +class TestBaseRotator: + def test_initialization(self, proxy_list_fixture): + rotator = ProxyRotator(items=[Proxy(p) for p in proxy_list_fixture]) + assert len(rotator) == 3 + assert isinstance(rotator.items[0], Proxy) + + def test_from_file_list(self, proxy_list_fixture): + rotator = ProxyRotator.from_file(proxy_list_fixture) + assert len(rotator) == 3 + assert rotator.items[0].url == "http://proxy1:8000" + + def test_from_file_txt(self, proxy_txt_file_fixture): + rotator = ProxyRotator.from_file(proxy_txt_file_fixture) + assert len(rotator) == 3, "Blank lines and comments should be ignored." + assert rotator.items[2].weight == 2.5 + assert rotator.items[2].region == "us-east" + + def test_from_file_json(self, proxy_json_file_fixture): + rotator = ProxyRotator.from_file(proxy_json_file_fixture) + assert len(rotator) == 2 + assert rotator.items[1].url == "http://proxy2:8000" + assert rotator.items[1].weight == 3.0 + + def test_from_file_not_found(self): + with pytest.raises(FileNotFoundError): + ProxyRotator.from_file("non_existent_file.txt") + + def test_empty_rotator_raises_error(self): + rotator = ProxyRotator.from_file([]) + with pytest.raises(ValueError, match="Rotator is empty"): + rotator.next() + + @pytest.mark.asyncio + async def test_async_empty_rotator_raises_error(self): + rotator = ProxyRotator.from_file([]) + with pytest.raises(ValueError, match="Rotator is empty"): + await rotator.anext() + + def test_add_remove(self): + rotator = TLSIdentifierRotator.from_file(["chrome_120"]) + assert len(rotator) == 1 + rotator.add("firefox_120") + assert len(rotator) == 2 + assert "firefox_120" in rotator.items + rotator.remove("chrome_120") + assert len(rotator) == 1 + assert "chrome_120" not in rotator.items + + @pytest.mark.asyncio + async def test_async_add_remove(self): + rotator = TLSIdentifierRotator.from_file(["chrome_120"]) + assert len(rotator) == 1 + await rotator.aadd("firefox_120") + assert len(rotator) == 2 + assert "firefox_120" in rotator.items + await rotator.aremove("chrome_120") + assert len(rotator) == 1 + assert "chrome_120" not in rotator.items + + def test_default_strategy_is_random(self, proxy_list_fixture): + """ + Tests that the default strategy is 'random' when none is specified. + This is crucial for stateless shortcut API usage like `tls_requests.get()`. + """ + # Initialize the rotator without specifying a `strategy` + rotator = ProxyRotator.from_file(proxy_list_fixture) + assert rotator.strategy == "random" + + # Check behavior: the results should not be a predictable round-robin sequence. + # With 10 iterations, the probability of a random choice perfectly + # matching a round-robin sequence is extremely low. + results = [rotator.next() for _ in range(10)] + + # Generate the expected round-robin sequence for comparison + round_robin_cycle = itertools.cycle(rotator.items) + expected_round_robin_results = [next(round_robin_cycle) for _ in range(10)] + + assert results != expected_round_robin_results, \ + "Default strategy produced a predictable round-robin sequence." + + +class TestRotationStrategies: + @pytest.mark.parametrize("strategy", ["round_robin", "random", "weighted"]) + def test_sync_strategies(self, strategy, proxy_list_fixture): + proxies = [Proxy(p, weight=i + 1) for i, p in enumerate(proxy_list_fixture)] + rotator = ProxyRotator(proxies, strategy=strategy) + num_iterations = 100 if strategy == "weighted" else 10 + + results = [rotator.next() for _ in range(num_iterations)] + + if strategy == "round_robin": + assert results[0].url == "http://proxy1:8000" + assert results[1].url == "http://proxy2:8000" + assert results[2].url == "http://proxy3:8000" + assert results[3].url == "http://proxy1:8000" + + elif strategy == "random": + for res in results: + assert res in proxies + + elif strategy == "weighted": + counts = Counter(p.url for p in results) + assert counts["http://proxy3:8000"] > counts["http://proxy1:8000"] + + @pytest.mark.asyncio + @pytest.mark.parametrize("strategy", ["round_robin", "random", "weighted"]) + async def test_async_strategies(self, strategy, proxy_list_fixture): + proxies = [Proxy(p, weight=i + 1) for i, p in enumerate(proxy_list_fixture)] + rotator = ProxyRotator(proxies, strategy=strategy) + num_iterations = 100 if strategy == "weighted" else 10 + + results = [await rotator.anext() for _ in range(num_iterations)] + + if strategy == "round_robin": + assert results[0].url == "http://proxy1:8000" + assert results[3].url == "http://proxy1:8000" + + elif strategy == "random": + for res in results: + assert res in proxies + + elif strategy == "weighted": + counts = Counter(p.url for p in results) + assert counts["http://proxy3:8000"] > counts["http://proxy1:8000"] + + +class TestProxyRotator: + def test_mark_result_weighted(self): + proxy = Proxy("proxy.example.com:8080", weight=2.0) + rotator = ProxyRotator([proxy], strategy="weighted") + + initial_weight = proxy.weight + rotator.mark_result(proxy, success=True) + assert proxy.weight > initial_weight + + initial_weight = proxy.weight + rotator.mark_result(proxy, success=False) + assert proxy.weight < initial_weight + + @pytest.mark.asyncio + async def test_async_mark_result_weighted(self): + proxy = Proxy("proxy.example.com:8080", weight=2.0) + rotator = ProxyRotator([proxy], strategy="weighted") + + initial_weight = proxy.weight + await rotator.amark_result(proxy, success=True) + assert proxy.weight > initial_weight + + initial_weight = proxy.weight + await rotator.amark_result(proxy, success=False) + assert proxy.weight < initial_weight + + +class TestHeaderRotator: + def test_rebuild_item(self): + item = HeaderRotator.rebuild_item({"User-Agent": "Test"}) + assert isinstance(item, Headers) + assert item["user-agent"] == "Test" + + def test_next_with_user_agent_override(self, header_list_fixture): + """ + Tests that overriding the User-Agent returns a modified COPY, + and does NOT mutate the original header object in the rotator. + This test is independent of the rotation strategy. + """ + rotator = HeaderRotator.from_file(header_list_fixture) # Uses default (random) strategy + custom_ua = "My-Custom-Bot/1.0" + + # Get a header (randomly) and override its UA + modified_header = rotator.next(user_agent=custom_ua) + assert modified_header["User-Agent"] == custom_ua + + # Find the original header object in the rotator's list that corresponds + # to the one we pulled (using the unique 'Accept' header from our fixture). + original_header_in_list = next( + h for h in rotator.items if h["Accept"] == modified_header["Accept"] + ) + + # The most important check: ensure the original object was NOT changed. + assert original_header_in_list["User-Agent"] != custom_ua + assert "Test-UA-" in original_header_in_list["User-Agent"] + + # Ensure it is a copy, not the same object. + assert modified_header is not original_header_in_list + + @pytest.mark.asyncio + async def test_anext_with_user_agent_override(self, header_list_fixture): + """ + Tests the async version of the User-Agent override, ensuring the + original object in the rotator remains unchanged. + """ + rotator = HeaderRotator.from_file(header_list_fixture) # Uses default (random) strategy + custom_ua = "My-Custom-Bot/1.0" + + # Get a header (randomly) and override its UA + modified_header = await rotator.anext(user_agent=custom_ua) + assert modified_header["User-Agent"] == custom_ua + + # Find the original header object in the list + original_header_in_list = next( + h for h in rotator.items if h["Accept"] == modified_header["Accept"] + ) + + # Ensure the original object was NOT changed + assert original_header_in_list["User-Agent"] != custom_ua diff --git a/tls_requests/__init__.py b/tls_requests/__init__.py index 277d5b4..9a9077e 100644 --- a/tls_requests/__init__.py +++ b/tls_requests/__init__.py @@ -33,9 +33,11 @@ "patch", "post", "put", - "request", ] +from .api import request + +__all__ += ["request"] __locals = locals() for __name in __all__: diff --git a/tls_requests/__version__.py b/tls_requests/__version__.py index c8cb460..5d7a62e 100644 --- a/tls_requests/__version__.py +++ b/tls_requests/__version__.py @@ -1,7 +1,9 @@ __title__ = "wrapper-tls-requests" -__description__ = "A powerful and lightweight Python library for making secure and reliable HTTP/TLS Fingerprint requests." +__description__ = ( + "A powerful and lightweight Python library for making secure and reliable HTTP/TLS Fingerprint requests." +) __url__ = "https://github.com/thewebscraping/tls-requests" __author__ = "Tu Pham" __author_email__ = "thetwofarm@gmail.com" -__version__ = "1.1.5" +__version__ = "1.1.6" __license__ = "MIT" diff --git a/tls_requests/client.py b/tls_requests/client.py index 579b948..5a625fe 100644 --- a/tls_requests/client.py +++ b/tls_requests/client.py @@ -9,8 +9,9 @@ TypeVar, Union) from .exceptions import ProxyError, RemoteProtocolError, TooManyRedirects -from .models import (URL, Auth, BasicAuth, Cookies, Headers, Proxy, Request, - Response, StatusCodes, TLSClient, TLSConfig, URLParams) +from .models import (URL, Auth, BasicAuth, Cookies, HeaderRotator, Headers, + Proxy, ProxyRotator, Request, Response, StatusCodes, + TLSClient, TLSConfig, TLSIdentifierRotator, URLParams) from .settings import (DEFAULT_FOLLOW_REDIRECTS, DEFAULT_HEADERS, DEFAULT_MAX_REDIRECTS, DEFAULT_TIMEOUT, DEFAULT_TLS_HTTP2, DEFAULT_TLS_IDENTIFIER) @@ -102,16 +103,27 @@ def __init__( self._params = URLParams(params) self._cookies = Cookies(cookies) self._state = ClientState.UNOPENED - self._headers = Headers(headers) + self._header_rotator: Optional[HeaderRotator] = None + self._headers: Headers = Headers() + if isinstance(headers, HeaderRotator): + self._header_rotator = headers + elif isinstance(headers, list): + self._header_rotator = HeaderRotator.from_file(headers) + elif headers is not None: + self._headers = Headers(headers) self._hooks = hooks if isinstance(hooks, dict) else {} self.auth = auth - self.proxy = self.prepare_proxy(proxy) + self.proxy = ProxyRotator.from_file(proxy) if isinstance(proxy, list) else proxy self.timeout = timeout self.follow_redirects = follow_redirects self.max_redirects = max_redirects self.http2 = http2 self.verify = verify - self.client_identifier = client_identifier + self.client_identifier = ( + TLSIdentifierRotator.from_file(client_identifier) + if isinstance(client_identifier, list) + else client_identifier + ) self.encoding = encoding @property @@ -135,7 +147,12 @@ def headers(self) -> Headers: @headers.setter def headers(self, headers: HeaderTypes) -> None: - self._headers = Headers(headers) + if isinstance(headers, HeaderRotator): + self._header_rotator = headers + elif isinstance(headers, list): + self._header_rotator = HeaderRotator.from_file(headers) + elif headers is not None: + self._headers = Headers(headers) @property def cookies(self) -> Cookies: @@ -161,9 +178,7 @@ def hooks(self) -> Mapping[Literal["request", "response"], list[Callable]]: def hooks(self, hooks: HookTypes) -> None: self._hooks = self._rebuild_hooks(hooks) - def prepare_auth( - self, request: Request, auth: AuthTypes, *args, **kwargs - ) -> Union[Request, Any]: + def prepare_auth(self, request: Request, auth: AuthTypes, *args, **kwargs) -> Union[Request, Any]: """Build Auth Request instance""" if isinstance(auth, tuple) and len(auth) == 2: @@ -176,11 +191,15 @@ def prepare_auth( if isinstance(auth, Auth): return auth.build_auth(request) - def prepare_headers(self, headers: HeaderTypes = None) -> Headers: - """Prepare Headers""" + return auth - merged_headers = self.headers.copy() - return merged_headers.update(headers) + def prepare_headers(self, headers: HeaderTypes = None, user_agent: Optional[str] = None) -> Headers: + """Prepare Headers. Gets base headers from rotator if available.""" + if headers is None: + return self.headers.copy() + if isinstance(headers, HeaderRotator): + return headers.next(user_agent=user_agent) + return Headers(headers) def prepare_cookies(self, cookies: CookieTypes = None) -> Cookies: """Prepare Cookies""" @@ -194,14 +213,27 @@ def prepare_params(self, params: URLParamTypes = None) -> URLParams: merged_params = self.params.copy() return merged_params.update(params) - def prepare_proxy(self, proxy: ProxyTypes = None) -> Optional[Proxy]: - if proxy is not None: - if isinstance(proxy, (bytes, str, URL, Proxy)): - return Proxy(proxy) - - raise ProxyError("Invalid proxy.") - - def prepare_config(self, request: Request): + def prepare_proxy(self, proxy: ProxyTypes | None) -> Optional[Proxy]: + if proxy is None: + return None + if isinstance(proxy, ProxyRotator): + return proxy.next() + if isinstance(proxy, (str, bytes)): + return Proxy(proxy) + if isinstance(proxy, Proxy): + return proxy + if isinstance(proxy, URL): + return Proxy(str(proxy)) + raise ProxyError(f"Unsupported proxy type: {type(proxy)}") + + def prepare_tls_identifier(self, identifier: Optional[str, TLSIdentifierRotator]) -> str: + if isinstance(identifier, str): + return identifier + if isinstance(identifier, TLSIdentifierRotator): + return identifier.next() + return DEFAULT_TLS_IDENTIFIER + + def prepare_config(self, request: Request, tls_identifier: str = DEFAULT_TLS_IDENTIFIER): """Prepare TLS Config""" config = self.config.copy_with( @@ -214,7 +246,7 @@ def prepare_config(self, request: Request): timeout=request.timeout, http2=True if self.http2 in ["auto", "http2", True, None] else False, verify=self.verify, - tls_identifier=self.client_identifier, + tls_identifier=tls_identifier, ) # Set Request SessionId. @@ -245,27 +277,25 @@ def build_request( params=self.prepare_params(params), headers=self.prepare_headers(headers), cookies=self.prepare_cookies(cookies), - proxy=self.proxy, + proxy=self.prepare_proxy(self.proxy), timeout=timeout or self.timeout, ) - def build_hook_request( - self, request: Request, *args, **kwargs - ) -> Union[Request, Any]: + def build_hook_request(self, request: Request, *args, **kwargs) -> Union[Request, Any]: request_hooks = self._rebuild_hooks(self.hooks).get("request") if isinstance(request_hooks, Sequence): for hook in request_hooks: if callable(hook): return hook(request) + return None - def build_hook_response( - self, response: Response, *args, **kwargs - ) -> Union[Response, Any]: + def build_hook_response(self, response: Response, *args, **kwargs) -> Union[Response, Any]: request_hooks = self._rebuild_hooks(self.hooks).get("response") if isinstance(request_hooks, Sequence): for hook in request_hooks: if callable(hook): return hook(response) + return None def _rebuild_hooks(self, hooks: HookTypes): if isinstance(hooks, dict): @@ -274,10 +304,9 @@ def _rebuild_hooks(self, hooks: HookTypes): for k, items in hooks.items() if str(k) in ["request", "response"] and isinstance(items, Sequence) } + return None - def _rebuild_redirect_request( - self, request: Request, response: Response - ) -> Request: + def _rebuild_redirect_request(self, request: Request, response: Response) -> Request: """Rebuild Redirect Request""" return Request( @@ -324,7 +353,8 @@ def _rebuild_redirect_url(self, request: Request, response: Response) -> URL: self.config.sessionId = str(uuid.uuid4()) else: raise RemoteProtocolError( - "Switching remote scheme from HTTP/2 to HTTP/1 is not supported. Please initialize Client with parameter `http2` to `auto`." + "Switching remote scheme from HTTP/2 to HTTP/1 is not supported. Please initialize Client with" + " parameter `http2` to `auto`." ) setattr(url, "_url", None) # reset url @@ -333,11 +363,10 @@ def _rebuild_redirect_url(self, request: Request, response: Response) -> URL: return url - def _send( - self, request: Request, *, history: list = None, start: float = None - ) -> Response: + def _send(self, request: Request, *, history: list = None, start: float = None) -> Response: start = start or time.perf_counter() - config = self.prepare_config(request) + tls_identifier = self.prepare_tls_identifier(self.client_identifier) + config = self.prepare_config(request, tls_identifier=tls_identifier) response = Response.from_tls_response( self.session.request(config.to_dict()), is_byte_response=config.isByteResponse, @@ -367,14 +396,10 @@ def close(self) -> None: def __enter__(self: T) -> T: if self._state == ClientState.OPENED: - raise RuntimeError( - "It is not possible to open a client instance more than once." - ) + raise RuntimeError("It is not possible to open a client instance more than once.") if self._state == ClientState.CLOSED: - raise RuntimeError( - "The client instance cannot be reopened after it has been closed." - ) + raise RuntimeError("The client instance cannot be reopened after it has been closed.") self._state = ClientState.OPENED return self @@ -481,6 +506,14 @@ def send( self.follow_redirects = follow_redirects response = self._send(request, start=time.perf_counter(), history=[]) + if isinstance(self.proxy, ProxyRotator) and response.request.proxy: + proxy_success = 200 <= response.status_code < 500 and response.status_code not in [407] + self.proxy.mark_result( + proxy=response.request.proxy, + success=proxy_success, + latency=response.elapsed, + ) + if self.hooks.get("response"): response_ = self.build_hook_response(response) if isinstance(response_, Response): @@ -721,6 +754,62 @@ class AsyncClient(BaseClient): **Parameters:** See `tls_requests.BaseClient`. """ + async def aprepare_headers(self, headers: HeaderTypes = None, user_agent: Optional[str] = None) -> Headers: + """Prepare Headers. Gets base headers from rotator if available.""" + if headers is None: + return self.headers.copy() + if isinstance(headers, HeaderRotator): + return await headers.anext(user_agent=user_agent) + return Headers(headers) + + async def aprepare_proxy(self, proxy: ProxyTypes | None) -> Optional[Proxy]: + if proxy is None: + return None + if isinstance(proxy, ProxyRotator): + return await proxy.anext() + if isinstance(proxy, (str, bytes)): + return Proxy(proxy) + if isinstance(proxy, Proxy): + return proxy + if isinstance(proxy, URL): + return Proxy(str(proxy)) + raise ProxyError(f"Unsupported proxy type: {type(proxy)}") + + async def aprepare_tls_identifier(self, identifier) -> str: + if isinstance(identifier, str): + return identifier + if isinstance(identifier, TLSIdentifierRotator): + return await identifier.anext() + return DEFAULT_TLS_IDENTIFIER + + async def abuild_request( + self, + method: str, + url: URLTypes, + *, + data: RequestData = None, + files: RequestFiles = None, + json: typing.Any = None, + params: URLParamTypes = None, + headers: HeaderTypes = None, + cookies: CookieTypes = None, + timeout: TimeoutTypes = None, + ) -> Request: + headers = await self.aprepare_headers(headers) + proxy = await self.aprepare_proxy(self.proxy) + return Request( + method, + url, + data=data, + files=files, + json=json, + params=self.prepare_params(params), + headers=headers, + cookies=self.prepare_cookies(cookies), + proxy=proxy, + timeout=timeout or self.timeout, + ) + async def request( self, method: str, @@ -738,7 +827,7 @@ async def request( ) -> Response: """Async Request""" - request = self.build_request( + request = await self.abuild_request( method=method, url=url, data=data, @@ -967,6 +1056,7 @@ async def send( follow_redirects: bool = DEFAULT_FOLLOW_REDIRECTS, ) -> Response: if self._state == ClientState.CLOSED: + pass # pass duplicated code raise RuntimeError("Cannot send a request, as the client has been closed.") self._state = ClientState.OPENED @@ -978,6 +1068,14 @@ async def send( self.follow_redirects = follow_redirects response = await self._send(request, start=time.perf_counter(), history=[]) + if isinstance(self.proxy, ProxyRotator) and response.request.proxy: + proxy_success = 200 <= response.status_code < 500 and response.status_code not in [407] + await self.proxy.amark_result( + proxy=response.request.proxy, + success=proxy_success, + latency=response.elapsed, + ) + if self.hooks.get("response"): response_ = self.build_hook_response(response) if isinstance(response_, Response): @@ -988,11 +1086,10 @@ async def send( await response.aclose() return response - async def _send( - self, request: Request, *, history: list = None, start: float = None - ) -> Response: + async def _send(self, request: Request, *, history: list = None, start: float = None) -> Response: start = start or time.perf_counter() - config = self.prepare_config(request) + tls_identifier = await self.aprepare_tls_identifier(self.client_identifier) + config = self.prepare_config(request, tls_identifier=tls_identifier) response = Response.from_tls_response( await self.session.arequest(config.to_dict()), is_byte_response=config.isByteResponse, @@ -1019,14 +1116,10 @@ async def aclose(self) -> None: async def __aenter__(self: A) -> A: if self._state == ClientState.OPENED: - raise RuntimeError( - "It is not possible to open a client instance more than once." - ) + raise RuntimeError("It is not possible to open a client instance more than once.") if self._state == ClientState.CLOSED: - raise RuntimeError( - "The client instance cannot be reopened after it has been closed." - ) + raise RuntimeError("The client instance cannot be reopened after it has been closed.") self._state = ClientState.OPENED return self diff --git a/tls_requests/exceptions.py b/tls_requests/exceptions.py index 3938d3a..3908887 100644 --- a/tls_requests/exceptions.py +++ b/tls_requests/exceptions.py @@ -6,15 +6,18 @@ pass __all__ = [ + "AuthenticationError", "CookieConflictError", "HTTPError", - "URLError", - "RemoteProtocolError", + "HeaderError", "ProtocolError", + "RemoteProtocolError", "StreamConsumed", "StreamError", - "TooManyRedirects", "TLSError", + "RotatorError", + "TooManyRedirects", + "URLError", ] @@ -28,7 +31,15 @@ def __init__(self, message: str, **kwargs) -> None: self.request = kwargs.pop("request", None) if response is not None and not self.request and hasattr(response, "request"): self.request = self.response.request - super().__init__(message, **kwargs) + super().__init__(message) + + +class AuthenticationError(HTTPError): + """Authentication Error""" + + +class HeaderError(HTTPError): + """Header Error""" class ProtocolError(HTTPError): @@ -81,3 +92,7 @@ class StreamConsumed(StreamError): class StreamClosed(StreamError): pass + + +class RotatorError(HTTPError): + pass diff --git a/tls_requests/models/__init__.py b/tls_requests/models/__init__.py index 30049c3..c7333e2 100644 --- a/tls_requests/models/__init__.py +++ b/tls_requests/models/__init__.py @@ -6,6 +6,8 @@ from .libraries import TLSLibrary from .request import Request from .response import Response +from .rotators import (BaseRotator, HeaderRotator, ProxyRotator, + TLSIdentifierRotator) from .status_codes import StatusCodes from .tls import CustomTLSClientConfig, TLSClient, TLSConfig, TLSResponse from .urls import URL, Proxy, URLParams diff --git a/tls_requests/models/auth.py b/tls_requests/models/auth.py index 63089e1..faf258d 100644 --- a/tls_requests/models/auth.py +++ b/tls_requests/models/auth.py @@ -1,7 +1,8 @@ from base64 import b64encode from typing import Any, Union -from tls_requests.models.request import Request +from ..exceptions import AuthenticationError +from .request import Request class Auth: @@ -22,9 +23,7 @@ def build_auth(self, request: Request): return self._build_auth_headers(request) def _build_auth_headers(self, request: Request): - auth_token = b64encode( - b":".join([self._encode(self.username), self._encode(self.password)]) - ).decode() + auth_token = b64encode(b":".join([self._encode(self.username), self._encode(self.password)])).decode() request.headers["Authorization"] = "Basic %s" % auth_token def _encode(self, value: Union[str, bytes]) -> bytes: @@ -32,6 +31,6 @@ def _encode(self, value: Union[str, bytes]) -> bytes: value = value.encode("latin1") if not isinstance(value, bytes): - raise TypeError("`username` or `password` parameter must be str or byte.") + raise AuthenticationError("`username` or `password` parameter must be str or byte.") return value diff --git a/tls_requests/models/cookies.py b/tls_requests/models/cookies.py index ae799a6..061eacd 100644 --- a/tls_requests/models/cookies.py +++ b/tls_requests/models/cookies.py @@ -3,7 +3,6 @@ from __future__ import annotations import copy -from abc import ABC from email.message import Message from http import cookiejar as cookielib from http.cookiejar import Cookie @@ -11,8 +10,8 @@ from typing import TYPE_CHECKING, Iterator, MutableMapping, Optional from urllib.parse import urlparse, urlunparse -from tls_requests.exceptions import CookieConflictError -from tls_requests.types import CookieTypes +from ..exceptions import CookieConflictError, CookieError +from ..types import CookieTypes if TYPE_CHECKING: from .request import Request @@ -137,10 +136,8 @@ def set(self, name, value, **kwargs): """ # support client code that unsets cookies by assignment of a None value: if value is None: - remove_cookie_by_name( - self, name, domain=kwargs.get("domain"), path=kwargs.get("path") - ) - return + remove_cookie_by_name(self, name, domain=kwargs.get("domain"), path=kwargs.get("path")) + return None if isinstance(value, Morsel): c = morsel_to_cookie(value) @@ -239,9 +236,7 @@ def get_dict(self, domain=None, path=None): """ dictionary = {} for cookie in iter(self): - if (domain is None or cookie.domain == domain) and ( - path is None or cookie.path == path - ): + if (domain is None or cookie.domain == domain) and (path is None or cookie.path == path): dictionary[cookie.name] = cookie.value return dictionary @@ -274,13 +269,9 @@ def __delitem__(self, name): remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): - if ( - hasattr(cookie.value, "startswith") - and cookie.value.startswith('"') - and cookie.value.endswith('"') - ): + if hasattr(cookie.value, "startswith") and cookie.value.startswith('"') and cookie.value.endswith('"'): cookie.value = cookie.value.replace('\\"', "") - return super().set_cookie(cookie, *args, **kwargs) + return super().set_cookie(cookie, *args, **kwargs) # type: ignore def update(self, other): # noqa """Updates this jar with cookies from another CookieJar or dict-like""" @@ -329,9 +320,7 @@ def _find_no_duplicates(self, name, domain=None, path=None): if path is None or cookie.path == path: if toReturn is not None: # if there are multiple cookies that meet passed in criteria - raise CookieConflictError( - f"There are multiple cookies with name, {name!r}" - ) + raise CookieConflictError(f"There are multiple cookies with name, {name!r}") # we will eventually return this as long as no cookie conflict toReturn = cookie.value @@ -351,6 +340,7 @@ def __setstate__(self, state): self.__dict__.update(state) if "_cookies_lock" not in self.__dict__: import threading + self._cookies_lock = threading.RLock() def copy(self): @@ -447,9 +437,7 @@ def create_cookie(name, value, **kwargs): badargs = set(kwargs) - set(result) if badargs: - raise TypeError( - f"create_cookie() got unexpected keyword arguments: {list(badargs)}" - ) + raise CookieError(f"create_cookie() got unexpected keyword arguments: {list(badargs)}") result.update(kwargs) result["port_specified"] = bool(result["port"]) @@ -468,7 +456,7 @@ def morsel_to_cookie(morsel): try: expires = int(time.time() + int(morsel["max-age"])) except ValueError: - raise TypeError(f"max-age: {morsel['max-age']} must be integer") + raise CookieError(f"max-age: {morsel['max-age']} must be integer") elif morsel["expires"]: time_template = "%a, %d-%b-%Y %H:%M:%S GMT" expires = calendar.timegm(time.strptime(morsel["expires"], time_template)) @@ -532,7 +520,7 @@ def merge_cookies(cookiejar, cookies): return cookiejar -class Cookies(MutableMapping[str, str], ABC): +class Cookies(MutableMapping[str, str]): def __init__(self, cookies: CookieTypes = None) -> None: self.cookiejar = self._prepare_cookiejar(cookies) @@ -562,10 +550,8 @@ def get_cookie_header(self, request: Request) -> str: def set(self, name, value, **kwargs) -> Optional[Cookie]: if value is None: - remove_cookie_by_name( - self, name, domain=kwargs.get("domain"), path=kwargs.get("path") - ) - return + remove_cookie_by_name(self, name, domain=kwargs.get("domain"), path=kwargs.get("path")) + return None if isinstance(value, Morsel): cookie = morsel_to_cookie(value) diff --git a/tls_requests/models/encoders.py b/tls_requests/models/encoders.py index 3bca4c6..1b78b6e 100644 --- a/tls_requests/models/encoders.py +++ b/tls_requests/models/encoders.py @@ -5,9 +5,9 @@ from typing import Any, AsyncIterator, Dict, Iterator, Mapping, Tuple, TypeVar from urllib.parse import urlencode -from tls_requests.types import (BufferTypes, ByteOrStr, RequestData, - RequestFiles, RequestFileValue, RequestJson) -from tls_requests.utils import to_bytes, to_str +from ..types import (BufferTypes, ByteOrStr, RequestData, RequestFiles, + RequestFileValue, RequestJson) +from ..utils import to_bytes, to_str __all__ = [ "JsonEncoder", @@ -64,9 +64,7 @@ def render_parts(self) -> bytes: def render_headers(self) -> bytes: headers = self.get_headers() - return ( - b"\r\n".join(b"%s: %s" % (k, v) for k, v in headers.items()) + b"\r\n\r\n" - ) + return b"\r\n".join(b"%s: %s" % (k, v) for k, v in headers.items()) + b"\r\n\r\n" def render_data(self, chunk_size: int = 65_536) -> Iterator[bytes]: yield b"" @@ -80,9 +78,7 @@ def get_headers(self) -> Dict[bytes, bytes]: content_type = getattr(self, "content_type", None) if content_type: self._headers[b"Content-Type"] = ( - self.content_type.encode("ascii") - if isinstance(content_type, str) - else content_type + self.content_type.encode("ascii") if isinstance(content_type, str) else content_type ) return self._headers @@ -194,9 +190,7 @@ def __init__( self._chunk_size = chunk_size self._is_closed = False self.fields = self._prepare_fields(data, files) - self.boundary = ( - boundary if boundary and isinstance(boundary, bytes) else get_boundary() - ) + self.boundary = boundary if boundary and isinstance(boundary, bytes) else get_boundary() @property def headers(self) -> dict: diff --git a/tls_requests/models/headers.py b/tls_requests/models/headers.py index c10acf6..d079bf1 100644 --- a/tls_requests/models/headers.py +++ b/tls_requests/models/headers.py @@ -1,10 +1,10 @@ -from abc import ABC from collections.abc import Mapping, MutableMapping from enum import Enum from typing import Any, ItemsView, KeysView, List, Literal, Tuple, ValuesView -from tls_requests.types import ByteOrStr, HeaderTypes -from tls_requests.utils import to_str +from ..exceptions import HeaderError +from ..types import ByteOrStr, HeaderTypes +from ..utils import to_str __all__ = ["Headers"] @@ -23,16 +23,9 @@ def __contains__(self, key: str) -> bool: return False -class Headers(MutableMapping, ABC): - def __init__( - self, - headers: HeaderTypes = None, - *, - alias: HeaderAliasTypes = HeaderAlias.LOWER - ): - self.alias = ( - alias if alias in HeaderAlias._value2member_map_ else HeaderAlias.LOWER - ) +class Headers(MutableMapping): + def __init__(self, headers: HeaderTypes = None, *, alias: HeaderAliasTypes = HeaderAlias.LOWER): + self.alias = alias if alias in HeaderAlias._value2member_map_ else HeaderAlias.LOWER self._items = self._prepare_items(headers) def get(self, key: str, default: Any = None) -> Any: @@ -76,7 +69,7 @@ def _prepare_items(self, headers: HeaderTypes) -> List[Tuple[str, Any]]: return items except IndexError: pass - raise TypeError + raise HeaderError def _normalize_key(self, key: ByteOrStr) -> str: key = to_str(key, encoding="ascii") @@ -90,13 +83,13 @@ def _normalize_key(self, key: ByteOrStr) -> str: def _normalize_value(self, value) -> List[str]: if isinstance(value, dict): - raise TypeError + raise HeaderError if isinstance(value, (list, tuple, set)): items = [] for item in value: if isinstance(item, dict): - raise TypeError + raise HeaderError items.append(to_str(item)) return items @@ -110,8 +103,7 @@ def __setitem__(self, key, value) -> None: key, value = self._normalize(key, value) for idx, (k, _) in enumerate(self._items): if k == key: - values = [v for v in value if v not in self._items[idx][1]] - self._items[idx][1].extend(values) + self._items[idx] = (k, value) found = True break @@ -151,9 +143,7 @@ def __eq__(self, other: HeaderTypes): return items == other def __repr__(self): - SECURE = [ - self._normalize_key(key) for key in ["Authorization", "Proxy-Authorization"] - ] + SECURE = [self._normalize_key(key) for key in ["Authorization", "Proxy-Authorization"]] return "<%s: %s>" % ( self.__class__.__name__, {k: "[secure]" if k in SECURE else ",".join(v) for k, v in self._items}, diff --git a/tls_requests/models/libraries.py b/tls_requests/models/libraries.py index ef81f67..c214890 100644 --- a/tls_requests/models/libraries.py +++ b/tls_requests/models/libraries.py @@ -1,19 +1,20 @@ import ctypes import glob +import json import os import platform import re import sys +import urllib.error +import urllib.request from dataclasses import dataclass, field, fields from pathlib import Path from platform import machine -from typing import List, Optional - -import requests -from tqdm import tqdm +from typing import List, Optional, Tuple __all__ = ["TLSLibrary"] +LATEST_VERSION_TAG_NAME = "v1.11.2" BIN_DIR = os.path.join(Path(__file__).resolve(strict=True).parent.parent / "bin") GITHUB_API_URL = "https://api.github.com/repos/bogdanfinn/tls-client/releases" PLATFORM = sys.platform @@ -73,7 +74,7 @@ def model_fields_set(cls) -> set: @classmethod def from_kwargs(cls, **kwargs): model_fields_set = cls.model_fields_set() - return cls(**{k: v for k, v in kwargs.items() if k in model_fields_set}) + return cls(**{k: v for k, v in kwargs.items() if k in model_fields_set}) # noqa @dataclass @@ -92,9 +93,7 @@ class Release(BaseRelease): def from_kwargs(cls, **kwargs): model_fields_set = cls.model_fields_set() assets = kwargs.pop("assets", []) or [] - kwargs["assets"] = [ - ReleaseAsset.from_kwargs(**asset_kwargs) for asset_kwargs in assets - ] + kwargs["assets"] = [ReleaseAsset.from_kwargs(**asset_kwargs) for asset_kwargs in assets] return cls(**{k: v for k, v in kwargs.items() if k in model_fields_set}) @@ -133,116 +132,140 @@ class TLSLibrary: _PATH: str = None _STATIC_API_DATA = { - "name": "v1.7.10", - "tag_name": "v1.7.10", + "name": "v1.11.2", + "tag_name": "v1.11.2", "assets": [ { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-darwin-amd64-1.7.10.dylib", - "name": "tls-client-darwin-amd64-1.7.10.dylib", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-darwin-amd64-1.11.2.dylib", + "name": "tls-client-darwin-amd64-1.11.2.dylib", + }, + { + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-darwin-arm64-1.11.2.dylib", + "name": "tls-client-darwin-arm64-1.11.2.dylib", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-darwin-arm64-1.7.10.dylib", - "name": "tls-client-darwin-arm64-1.7.10.dylib", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-linux-alpine-amd64-1.11.2.so", + "name": "tls-client-linux-alpine-amd64-1.11.2.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-linux-arm64-1.7.10.so", - "name": "tls-client-linux-arm64-1.7.10.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-linux-arm64-1.11.2.so", + "name": "tls-client-linux-arm64-1.11.2.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-linux-armv7-1.7.10.so", - "name": "tls-client-linux-armv7-1.7.10.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-linux-armv7-1.11.2.so", + "name": "tls-client-linux-armv7-1.11.2.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-linux-ubuntu-amd64-1.7.10.so", - "name": "tls-client-linux-ubuntu-amd64-1.7.10.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-linux-ubuntu-amd64-1.11.2.so", + "name": "tls-client-linux-ubuntu-amd64-1.11.2.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-windows-32-1.7.10.dll", - "name": "tls-client-windows-32-1.7.10.dll", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-windows-32-1.11.2.dll", + "name": "tls-client-windows-32-1.11.2.dll", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-windows-64-1.7.10.dll", - "name": "tls-client-windows-64-1.7.10.dll", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-windows-64-1.11.2.dll", + "name": "tls-client-windows-64-1.11.2.dll", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-darwin-amd64.dylib", - "name": "tls-client-xgo-1.7.10-darwin-amd64.dylib", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-darwin-amd64.dylib", + "name": "tls-client-xgo-1.11.2-darwin-amd64.dylib", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-darwin-arm64.dylib", - "name": "tls-client-xgo-1.7.10-darwin-arm64.dylib", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-darwin-arm64.dylib", + "name": "tls-client-xgo-1.11.2-darwin-arm64.dylib", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-386.so", - "name": "tls-client-xgo-1.7.10-linux-386.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-386.so", + "name": "tls-client-xgo-1.11.2-linux-386.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-amd64.so", - "name": "tls-client-xgo-1.7.10-linux-amd64.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-amd64.so", + "name": "tls-client-xgo-1.11.2-linux-amd64.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-arm-5.so", - "name": "tls-client-xgo-1.7.10-linux-arm-5.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-arm-5.so", + "name": "tls-client-xgo-1.11.2-linux-arm-5.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-arm-6.so", - "name": "tls-client-xgo-1.7.10-linux-arm-6.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-arm-6.so", + "name": "tls-client-xgo-1.11.2-linux-arm-6.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-arm-7.so", - "name": "tls-client-xgo-1.7.10-linux-arm-7.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-arm-7.so", + "name": "tls-client-xgo-1.11.2-linux-arm-7.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-arm64.so", - "name": "tls-client-xgo-1.7.10-linux-arm64.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-arm64.so", + "name": "tls-client-xgo-1.11.2-linux-arm64.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-ppc64le.so", - "name": "tls-client-xgo-1.7.10-linux-ppc64le.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-ppc64le.so", + "name": "tls-client-xgo-1.11.2-linux-ppc64le.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-riscv64.so", - "name": "tls-client-xgo-1.7.10-linux-riscv64.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-riscv64.so", + "name": "tls-client-xgo-1.11.2-linux-riscv64.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-linux-s390x.so", - "name": "tls-client-xgo-1.7.10-linux-s390x.so", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-linux-s390x.so", + "name": "tls-client-xgo-1.11.2-linux-s390x.so", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-windows-386.dll", - "name": "tls-client-xgo-1.7.10-windows-386.dll", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-windows-386.dll", + "name": "tls-client-xgo-1.11.2-windows-386.dll", }, { - "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.7.10/tls-client-xgo-1.7.10-windows-amd64.dll", - "name": "tls-client-xgo-1.7.10-windows-amd64.dll", + "browser_download_url": "https://github.com/bogdanfinn/tls-client/releases/download/v1.11.2/tls-client-xgo-1.11.2-windows-amd64.dll", + "name": "tls-client-xgo-1.11.2-windows-amd64.dll", }, ], } + @staticmethod + def _parse_version(version_string: str) -> Tuple[int, ...]: + """Converts a version string (e.g., "v1.11.2") to a comparable tuple (1, 11, 2).""" + try: + parts = version_string.lstrip("v").split(".") + return tuple(map(int, parts)) + except (ValueError, AttributeError): + return 0, 0, 0 + + @staticmethod + def _parse_version_from_filename(filename: str) -> Tuple[int, ...]: + """Extracts and parses the version from a library filename.""" + match = re.search(r"v?(\d+\.\d+\.\d+)", Path(filename).name) + if match: + return TLSLibrary._parse_version(match.group(1)) + return 0, 0, 0 + + @classmethod + def cleanup_files(cls, keep_file: str = None): + """Removes all library files in the BIN_DIR except for the one to keep.""" + for file_path in cls.find_all(): + is_remove = True + if keep_file and Path(file_path).name == Path(keep_file).name: + is_remove = False + + if is_remove: + try: + os.remove(file_path) + print(f"Removed old library file: {file_path}") + except OSError as e: + print(f"Error removing old library file {file_path}: {e}") + @classmethod def fetch_api(cls, version: str = None, retries: int = 3): def _find_release(data, version_: str = None): - releases = [ - Release.from_kwargs(**kwargs) for kwargs in data - ] + releases = [Release.from_kwargs(**kwargs) for kwargs in data] if version_ is not None: - version_ = ( - "v%s" % version_ - if not str(version_).startswith("v") - else str(version_) - ) - releases = [ - release - for release in releases - if re.search(version_, release.name, re.I) - ] + version_ = "v%s" % version_ if not str(version_).startswith("v") else str(version_) + releases = [release for release in releases if re.search(version_, release.name, re.I)] for release in releases: for asset in release.assets: - if IS_UBUNTU and PATTERN_UBUNTU_RE.search( - asset.browser_download_url - ): + if IS_UBUNTU and PATTERN_UBUNTU_RE.search(asset.browser_download_url): ubuntu_urls.append(asset.browser_download_url) if PATTERN_RE.search(asset.browser_download_url): asset_urls.append(asset.browser_download_url) @@ -250,11 +273,12 @@ def _find_release(data, version_: str = None): asset_urls, ubuntu_urls = [], [] for _ in range(retries): try: - response = requests.get(GITHUB_API_URL) - if response.ok: - _find_release(response.json()) - break - + # Use standard library's urllib to fetch API data + with urllib.request.urlopen(GITHUB_API_URL, timeout=10) as response: + if response.status == 200: + content = response.read().decode("utf-8") + _find_release(json.loads(content)) + break except Exception as ex: print("Unable to fetch GitHub API: %s" % ex) @@ -272,14 +296,11 @@ def find(cls) -> str: for fp in cls.find_all(): if PATTERN_RE.search(fp): return fp + return None @classmethod def find_all(cls) -> List[str]: - return [ - src - for src in glob.glob(os.path.join(BIN_DIR, r"*")) - if src.lower().endswith(("so", "dll", "dylib")) - ] + return [src for src in glob.glob(os.path.join(BIN_DIR, r"*")) if src.lower().endswith(("so", "dll", "dylib"))] @classmethod def download(cls, version: str = None) -> str: @@ -293,35 +314,51 @@ def download(cls, version: str = None) -> str: ) ) download_url = None - for download_url in cls.fetch_api(version): - if download_url: + for url in cls.fetch_api(version): + if url: + download_url = url break print("Library Download URL: %s" % download_url) if download_url: - destination = os.path.join(BIN_DIR, download_url.split("/")[-1]) - with requests.get(download_url, stream=True) as response: - response.raise_for_status() + destination_name = download_url.split("/")[-1] + destination = os.path.join(BIN_DIR, destination_name) + + # Use standard library's urllib to download the file + with urllib.request.urlopen(download_url, timeout=10) as response: + if response.status != 200: + raise urllib.error.URLError(f"Failed to download file: HTTP {response.status}") + os.makedirs(BIN_DIR, exist_ok=True) total_size = int(response.headers.get("content-length", 0)) - chunk_size = 1024 - with open( - os.path.join(BIN_DIR, download_url.split("/")[-1]), "wb" - ) as file, tqdm( - desc=destination, - total=total_size, - unit="iB", - unit_scale=True, - unit_divisor=chunk_size, - ) as progress_bar: - for chunk in response.iter_content(chunk_size): - size = file.write(chunk) - progress_bar.update(size) - + chunk_size = 8192 # 8KB + + with open(destination, "wb") as file: + downloaded = 0 + while True: + chunk = response.read(chunk_size) + if not chunk: + break + + file.write(chunk) + downloaded += len(chunk) + + # Simple text-based progress bar + if total_size > 0: + percent = downloaded / total_size * 100 + bar_length = 50 + filled_length = int(bar_length * downloaded // total_size) + bar = "=" * filled_length + "-" * (bar_length - filled_length) + sys.stdout.write(f"\rDownloading {destination_name}: [{bar}] {percent:.1f}%") + sys.stdout.flush() + + print() # Newline after download completes return destination - except requests.exceptions.HTTPError as ex: + except (urllib.error.URLError, urllib.error.HTTPError) as ex: print("Unable to download file: %s" % ex) + except Exception as e: + print("An unexpected error occurred during download: %s" % e) @classmethod def set_path(cls, fp: str): @@ -329,44 +366,60 @@ def set_path(cls, fp: str): @classmethod def load(cls): - """Load libraries""" + """ + Loads the TLS library. It checks for the correct version, downloads it if + the local version is outdated or missing, and then loads it into memory. + """ - def _load_libraries(fp_): + def _load_library(fp_): try: lib = ctypes.cdll.LoadLibrary(fp_) cls.set_path(fp_) + print(f"Successfully loaded TLS library: {fp_}") return lib except Exception as ex: - print("Unable to load TLS Library, details: %s" % ex) + print(f"Unable to load TLS library '{fp_}', details: {ex}") try: os.remove(fp_) - except FileNotFoundError: + except (FileNotFoundError, PermissionError): pass - if cls._PATH is not None: - library = _load_libraries(cls._PATH) - if library: - return library - - if TLS_LIBRARY_PATH: - library = _load_libraries(TLS_LIBRARY_PATH) - if library: - return library - - for fp in cls.find_all(): - if IS_UBUNTU and PATTERN_UBUNTU_RE.search(fp): - library = _load_libraries(fp) - if library: - return library - if PATTERN_RE.search(fp): - library = _load_libraries(fp) + target_version = cls._parse_version(LATEST_VERSION_TAG_NAME) + print(f"Required library version: {LATEST_VERSION_TAG_NAME}") + local_files = cls.find_all() + newest_local_version = (0, 0, 0) + newest_local_file = None + + if local_files: + for file_path in local_files: + file_version = cls._parse_version_from_filename(file_path) + if file_version > newest_local_version: + newest_local_version = file_version + newest_local_file = file_path + print( + f"Found newest local library: {newest_local_file} (version {'.'.join(map(str, newest_local_version))})" + ) + else: + print("No local library found.") + + if newest_local_version < target_version: + if newest_local_file: + print(f"Local library is outdated. Upgrading to {LATEST_VERSION_TAG_NAME}...") + else: + print(f"Downloading required library version {LATEST_VERSION_TAG_NAME}...") + + downloaded_fp = cls.download(version=LATEST_VERSION_TAG_NAME) + if downloaded_fp: + cls.cleanup_files(keep_file=downloaded_fp) + library = _load_library(downloaded_fp) if library: return library + raise OSError("Failed to download the required TLS library.") - download_fp = cls.download() - if download_fp: - library = _load_libraries(download_fp) + if newest_local_file: + library = _load_library(newest_local_file) if library: + cls.cleanup_files(keep_file=newest_local_file) return library - raise OSError("Your system does not support TLS Library.") + raise OSError("Could not find or load a compatible TLS library.") diff --git a/tls_requests/models/request.py b/tls_requests/models/request.py index ca961f0..be37809 100644 --- a/tls_requests/models/request.py +++ b/tls_requests/models/request.py @@ -1,13 +1,13 @@ from typing import Any -from tls_requests.models.cookies import Cookies -from tls_requests.models.encoders import StreamEncoder -from tls_requests.models.headers import Headers -from tls_requests.models.urls import URL, Proxy -from tls_requests.settings import DEFAULT_TIMEOUT -from tls_requests.types import (CookieTypes, HeaderTypes, MethodTypes, - ProxyTypes, RequestData, RequestFiles, - TimeoutTypes, URLParamTypes, URLTypes) +from ..settings import DEFAULT_TIMEOUT +from ..types import (CookieTypes, HeaderTypes, MethodTypes, ProxyTypes, + RequestData, RequestFiles, TimeoutTypes, URLParamTypes, + URLTypes) +from .cookies import Cookies +from .encoders import StreamEncoder +from .headers import Headers +from .urls import URL, Proxy __all__ = ["Request"] diff --git a/tls_requests/models/response.py b/tls_requests/models/response.py index 1adbeef..e164c8b 100644 --- a/tls_requests/models/response.py +++ b/tls_requests/models/response.py @@ -4,16 +4,16 @@ from email.message import Message from typing import Any, Callable, Optional, TypeVar, Union -from tls_requests.exceptions import Base64DecodeError, HTTPError -from tls_requests.models.cookies import Cookies -from tls_requests.models.encoders import StreamEncoder -from tls_requests.models.headers import Headers -from tls_requests.models.request import Request -from tls_requests.models.status_codes import StatusCodes -from tls_requests.models.tls import TLSResponse -from tls_requests.settings import CHUNK_SIZE -from tls_requests.types import CookieTypes, HeaderTypes, ResponseHistory -from tls_requests.utils import b64decode, chardet, to_json +from ..exceptions import Base64DecodeError, HTTPError +from ..settings import CHUNK_SIZE +from ..types import CookieTypes, HeaderTypes, ResponseHistory +from ..utils import b64decode, chardet, to_json +from .cookies import Cookies +from .encoders import StreamEncoder +from .headers import Headers +from .request import Request +from .status_codes import StatusCodes +from .tls import TLSResponse __all__ = ["Response"] @@ -76,9 +76,7 @@ def elapsed(self, elapsed: datetime.timedelta) -> None: @property def request(self) -> Request: if self._request is None: - raise RuntimeError( - "The request instance has not been set on this response." - ) + raise RuntimeError("The request instance has not been set on this response.") return self._request @request.setter @@ -191,11 +189,7 @@ def raise_for_status(self) -> "Response": raise HTTPError( http_error_msg.format( self.status_code, - ( - self.reason - if self.status_code < 100 - else StatusCodes.get_reason(self.status_code) - ), + (self.reason if self.status_code < 100 else StatusCodes.get_reason(self.status_code)), self.url, ), response=self, @@ -237,9 +231,7 @@ async def aclose(self) -> None: return self.close() @classmethod - def from_tls_response( - cls, response: TLSResponse, is_byte_response: bool = False - ) -> "Response": + def from_tls_response(cls, response: TLSResponse, is_byte_response: bool = False) -> "Response": def _parse_response_body(value: Optional[str]) -> bytes: if value: if is_byte_response and response.status > 0: @@ -247,9 +239,7 @@ def _parse_response_body(value: Optional[str]) -> bytes: value = b64decode(value.split(",")[-1]) return value except (binascii.Error, AssertionError): - raise Base64DecodeError( - "Couldn't decode the base64 string into bytes." - ) + raise Base64DecodeError("Couldn't decode the base64 string into bytes.") return value.encode("utf-8") return b"" diff --git a/tls_requests/models/rotators.py b/tls_requests/models/rotators.py new file mode 100644 index 0000000..975d5d2 --- /dev/null +++ b/tls_requests/models/rotators.py @@ -0,0 +1,545 @@ +from __future__ import annotations + +import asyncio +import itertools +import json +import random +import threading +from abc import ABC, abstractmethod +from pathlib import Path +from typing import (Any, Generic, Iterable, Iterator, List, Literal, Optional, + TypeVar, Union) + +from ..exceptions import RotatorError +from ..types import HeaderTypes, TLSIdentifierTypes +from .headers import Headers +from .urls import Proxy + +T = TypeVar("T") + +TLS_IDENTIFIER_TEMPLATES = [ + "chrome_120", + "chrome_124", + "chrome_131", + "chrome_133", + "firefox_120", + "firefox_123", + "firefox_132", + "firefox_133", + "safari_16_0", + "safari_ios_16_0", + "safari_ios_17_0", + "safari_ios_18_0", + "safari_ios_18_5", +] + +USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0" + " Safari/537.36" + ), + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0", + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2" + " Safari/605.1.15" + ), + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2" + " Safari/605.1.15" + ), + ( + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2" + " Mobile/15E148 Safari/604.1" + ), + ( + "Mozilla/5.0 (iPad; CPU OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2" + " Mobile/15E148 Safari/604.1" + ), + ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0" + " Safari/537.36 Edg/120.0.0.0" + ), + ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0" + " Safari/537.36 Edg/120.0.0.0" + ), + ( + "Mozilla/5.0 (Linux; Android 14; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1" + " Mobile/15E148 Safari/604.1" + ), + ( + "Mozilla/5.0 (Linux; Android 15; SM-S931B Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/127.0.6533.103 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; SM-S928B/DS) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.230" + " Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; SM-F956U) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0" + " Chrome/80.0.3987.119 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; SM-S911U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; SM-S901B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; SM-S908U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; SM-G998U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; Pixel 9 Pro Build/AD1A.240418.003; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/124.0.6367.54 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; Pixel 9 Build/AD1A.240411.003.A5; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/124.0.6367.54 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 15; Pixel 8 Pro Build/AP4A.250105.002; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 15; Pixel 8 Build/AP4A.250105.002; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 15; moto g - 2025 Build/V1VK35.22-13-2; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 13; 23129RAA4G Build/TKQ1.221114.001; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/116.0.0.0 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 15; 24129RT7CC Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like" + " Gecko) Version/4.0 Chrome/130.0.6723.86 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 12; HBP-LX9 Build/HUAWEIHBP-L29; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/99.0.4844.88 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; U; Android 12; zh-Hans-CN; ADA-AL00 Build/HUAWEIADA-AL00) AppleWebKit/537.36 (KHTML, like" + " Gecko) Version/4.0 Chrome/100.0.4896.58 Quark/6.11.2.531 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 12; PSD-AL00 Build/HUAWEIPSD-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/99.0.4844.88 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 14; 24030PN60G Build/UKQ1.231003.002; wv) AppleWebKit/537.36 (KHTML, like Gecko)" + " Version/4.0 Chrome/122.0.6261.119 Mobile Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 10; VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), + ( + "Mozilla/5.0 (Linux; Android 10; MAR-LX1A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile" + " Safari/537.36" + ), +] + +HEADER_TEMPLATES = [ + { + "accept": "*/*", + "connection": "keep-alive", + "accept-encoding": "gzip, deflate, br, zstd", + "User-Agent": ua, + } + for ua in USER_AGENTS +] + + +class BaseRotator(ABC, Generic[T]): + """ + A unified, thread-safe and coroutine-safe abstract base class for a + generic rotating data source. + + This class provides a dual API for both synchronous and asynchronous contexts. + - For synchronous, thread-safe operations, use methods like `next()`, `add()`. + - For asynchronous, coroutine-safe operations, use methods prefixed with 'a', + like `anext()`, `aadd()`. + + It uses a `threading.Lock` for thread safety and an `asyncio.Lock` for + coroutine safety. + + Attributes: + items (List[T]): The list of items to rotate through. + strategy (str): The rotation strategy in use. + """ + + def __init__( + self, + items: Optional[Iterable[T]] = None, + strategy: Literal["round_robin", "random", "weighted"] = "random", + ) -> None: + """ + Initializes the BaseRotator. + + Args: + items: An iterable of initial items. + strategy: The rotation strategy to use. + """ + self.items: List[T] = list(items or []) + self.strategy = strategy + self._iterator: Optional[Iterator[T]] = None + self._lock = threading.Lock() + self._async_lock = asyncio.Lock() + self._rebuild_iterator() + + @classmethod + def from_file( + cls, + source: Union[str, Path, list], + strategy: Literal["round_robin", "random", "weighted"] = "random", + ) -> "BaseRotator": + """ + Factory method to create a rotator from a file or a list. This method + is synchronous as it's typically used during setup. + """ + + items = [] + if isinstance(source, (str, Path)): + path = Path(source) + if not path.exists(): + raise FileNotFoundError(f"Source file not found: {path}") + + if path.suffix == ".json": + data = json.loads(path.read_text()) + items = [cls.rebuild_item(item) for item in data] + else: + lines = path.read_text().splitlines() + for line in lines: + line_content = line.split("#", 1)[0].strip() + if not line_content: + continue + items.append(cls.rebuild_item(line_content)) + elif isinstance(source, list): + items = [cls.rebuild_item(item) for item in source] + else: + raise RotatorError(f"Unsupported source type: {type(source)}") + + valid_items = [item for item in items if item is not None] + return cls(valid_items, strategy) + + @classmethod + @abstractmethod + def rebuild_item(cls, item: Any) -> Optional[T]: + """ + Abstract method to convert a raw item into a typed object. Must be + implemented by subclasses. + """ + raise NotImplementedError + + def _rebuild_iterator(self) -> None: + """ + Reconstructs the internal iterator. This is a core logic method and + should be called only after acquiring a lock. + """ + if not self.items: + self._iterator = None + return + + if self.strategy == "round_robin": + self._iterator = itertools.cycle(self.items) + elif self.strategy == "random": + self._iterator = None + elif self.strategy == "weighted": + weights = [getattr(item, "weight", 1.0) for item in self.items] + self._iterator = self._weighted_cycle(self.items, weights) + else: + raise ValueError(f"Unsupported strategy: {self.strategy}") + + def _weighted_cycle(self, items: List[T], weights: List[float]) -> Iterator[T]: + """Creates an infinite iterator that yields items based on weights.""" + while True: + yield random.choices(items, weights=weights, k=1)[0] + + def next(self, *args, **kwargs) -> T: + """ + Retrieves the next item using a thread-safe mechanism. + + Returns: + The next item from the collection. + + Raises: + ValueError: If the rotator contains no items. + """ + with self._lock: + if not self.items: + raise ValueError("Rotator is empty.") + if self.strategy == "random": + return random.choice(self.items) + return next(self._iterator) + + def add(self, item: T) -> None: + """ + Adds a new item to the rotator in a thread-safe manner. + """ + with self._lock: + self.items.append(item) + self._rebuild_iterator() + + def remove(self, item: T) -> None: + """ + Removes an item from the rotator in a thread-safe manner. + """ + with self._lock: + self.items = [i for i in self.items if i != item] + self._rebuild_iterator() + + async def anext(self, *args, **kwargs) -> T: + """ + Retrieves the next item using a coroutine-safe mechanism. + + Returns: + The next item from the collection. + + Raises: + ValueError: If the rotator contains no items. + """ + async with self._async_lock: + if not self.items: + raise ValueError("Rotator is empty.") + if self.strategy == "random": + return random.choice(self.items) + return next(self._iterator) + + async def aadd(self, item: T) -> None: + """ + Adds a new item to the rotator in a coroutine-safe manner. + """ + async with self._async_lock: + self.items.append(item) + self._rebuild_iterator() + + async def aremove(self, item: T) -> None: + """ + Removes an item from the rotator in a coroutine-safe manner. + """ + async with self._async_lock: + self.items = [i for i in self.items if i != item] + self._rebuild_iterator() + + def __len__(self) -> int: + return len(self.items) + + def __iter__(self) -> Iterator[T]: + return iter(self.items) + + +class ProxyRotator(BaseRotator[Proxy]): + """ + A unified rotator for managing `Proxy` objects, supporting both sync and + async operations. + """ + + @classmethod + def rebuild_item(cls, item: Any) -> Optional[Proxy]: + """Constructs a `Proxy` object from various input types.""" + try: + if isinstance(item, Proxy): + return item + if isinstance(item, dict): + return Proxy.from_dict(item) + if isinstance(item, str): + return Proxy.from_string(item) + except Exception: + return None + return None + + def mark_result(self, proxy: Proxy, success: bool, latency: Optional[float] = None) -> None: + """ + Thread-safely updates a proxy's performance statistics. + """ + with self._lock: + self._update_proxy_stats(proxy, success, latency) + + async def amark_result(self, proxy: Proxy, success: bool, latency: Optional[float] = None) -> None: + """ + Coroutine-safely updates a proxy's performance statistics. + """ + async with self._async_lock: + self._update_proxy_stats(proxy, success, latency) + + def _update_proxy_stats(self, proxy: Proxy, success: bool, latency: Optional[float] = None): + """Internal logic for updating proxy stats. Must be called from a locked context.""" + if success: + proxy.mark_success(latency) + else: + proxy.mark_failed() + + if self.strategy == "weighted": + self._rebuild_iterator() + + +class TLSIdentifierRotator(BaseRotator[TLSIdentifierTypes]): + """ + A unified rotator for TLS Identifiers, supporting both sync and async operations. + """ + + def __init__( + self, + items: Optional[Iterable[T]] = None, + strategy: Literal["round_robin", "random", "weighted"] = "round_robin", + ) -> None: + super().__init__(items or TLS_IDENTIFIER_TEMPLATES, strategy) + + @classmethod + def rebuild_item(cls, item: Any) -> Optional[TLSIdentifierTypes]: + """Processes a raw item to be used as a TLS identifier.""" + if isinstance(item, str): + return item + return None + + +class HeaderRotator(BaseRotator[Headers]): + """ + A unified rotator for managing `Headers` objects, supporting both sync and + async operations. + + This rotator can dynamically update the 'User-Agent' header for each request + without modifying the original header templates. + + Examples: + >>> common_headers = { + ... "Accept": "application/json", + ... "Accept-Language": "en-US,en;q=0.9", + ... "User-Agent": "Default-Bot/1.0" + ... } + >>> mobile_headers = { + ... "Accept": "*/*", + ... "User-Agent": "Default-Mobile/1.0", + ... "X-Custom-Header": "mobile" + ... } + >>> + >>> rotator = HeaderRotator.from_file([common_headers, mobile_headers]) + >>> + >>> # Get headers without modification + >>> h1 = rotator.next() + >>> print(h1['User-Agent']) + Default-Bot/1.0 + >>> + >>> # Get headers with a new, dynamic User-Agent + >>> h2 = rotator.next(user_agent="My-Custom-UA/2.0") + >>> print(h2['User-Agent']) + My-Custom-UA/2.0 + >>> + >>> # The original header set remains unchanged + >>> h3 = rotator.next() + >>> print(h3['User-Agent']) + Default-Mobile/1.0 + """ + + def __init__( + self, + items: Optional[Iterable[T]] = None, + strategy: Literal["round_robin", "random", "weighted"] = "random", + ) -> None: + super().__init__(items or HEADER_TEMPLATES, strategy) + + @classmethod + def rebuild_item(cls, item: HeaderTypes) -> Optional[Headers]: + """ + Constructs a `Headers` object from various input types. + + It can process existing `Headers` objects, dictionaries, or lists of tuples. + + Args: + item: The raw data to convert into a `Headers` object. + + Returns: + A `Headers` instance, or None if the input is invalid. + """ + try: + if isinstance(item, Headers): + return item + return Headers(item) + except Exception: + return None + + def next(self, user_agent: Optional[str] = None, **kwargs) -> Headers: + """ + Retrieves the next `Headers` object in a thread-safe manner and + optionally updates its User-Agent. + + Args: + user_agent: If provided, this string will replace the 'User-Agent' + header in the returned object. + + Returns: + A copy of the next `Headers` object, potentially with a modified User-Agent. + """ + headers = super().next() + headers_copy = headers.copy() + if not isinstance(headers_copy, Headers): + headers_copy = Headers(headers_copy) + if user_agent: + headers_copy["User-Agent"] = user_agent + return headers_copy + + async def anext(self, user_agent: Optional[str] = None, **kwargs) -> Headers: + """ + Retrieves the next `Headers` object in a coroutine-safe manner and + optionally updates its User-Agent. + + Args: + user_agent: If provided, this string will replace the 'User-Agent' + header in the returned object. + + Returns: + A copy of the next `Headers` object, potentially with a modified User-Agent. + """ + headers = await super().anext() + headers_copy = headers.copy() + if not isinstance(headers_copy, Headers): + headers_copy = Headers(headers_copy) + if user_agent: + headers_copy["User-Agent"] = user_agent + return headers_copy diff --git a/tls_requests/models/tls.py b/tls_requests/models/tls.py index 7b6271e..f7f9a2a 100644 --- a/tls_requests/models/tls.py +++ b/tls_requests/models/tls.py @@ -4,15 +4,14 @@ from dataclasses import fields as get_fields from typing import Any, List, Mapping, Optional, Set, TypeVar, Union -from tls_requests.models.encoders import StreamEncoder -from tls_requests.models.libraries import TLSLibrary -from tls_requests.models.status_codes import StatusCodes -from tls_requests.settings import (DEFAULT_HEADERS, DEFAULT_TIMEOUT, - DEFAULT_TLS_DEBUG, DEFAULT_TLS_HTTP2, - DEFAULT_TLS_IDENTIFIER) -from tls_requests.types import (MethodTypes, TLSCookiesTypes, - TLSIdentifierTypes, TLSSessionId, URLTypes) -from tls_requests.utils import to_base64, to_bytes, to_json +from ..settings import (DEFAULT_HEADERS, DEFAULT_TIMEOUT, DEFAULT_TLS_DEBUG, + DEFAULT_TLS_HTTP2, DEFAULT_TLS_IDENTIFIER) +from ..types import (MethodTypes, TLSCookiesTypes, TLSIdentifierTypes, + TLSSessionId, URLTypes) +from ..utils import to_base64, to_bytes, to_json +from .encoders import StreamEncoder +from .libraries import TLSLibrary +from .status_codes import StatusCodes __all__ = [ "TLSClient", @@ -115,9 +114,7 @@ def initialize(cls): @classmethod def get_cookies(cls, session_id: TLSSessionId, url: str) -> "TLSResponse": - response = cls._send( - cls._getCookiesFromSession, {"sessionId": session_id, "url": url} - ) + response = cls._send(cls._getCookiesFromSession, {"sessionId": session_id, "url": url}) return response @classmethod @@ -179,16 +176,12 @@ class _BaseConfig: @classmethod def model_fields_set(cls) -> Set[str]: - return { - model_field.name - for model_field in get_fields(cls) - if not model_field.name.startswith("_") - } + return {model_field.name for model_field in get_fields(cls) if not model_field.name.startswith("_")} @classmethod def from_kwargs(cls, **kwargs: Any) -> T: model_fields_set = cls.model_fields_set() - return cls(**{k: v for k, v in kwargs.items() if k in model_fields_set and v}) + return cls(**{k: v for k, v in kwargs.items() if k in model_fields_set and v}) # noqa def to_dict(self) -> dict: return {k: v for k, v in asdict(self).items() if not k.startswith("_")} @@ -462,9 +455,7 @@ def to_dict(self) -> dict: self.requestBody = None self.timeoutSeconds = ( - int(self.timeoutSeconds) - if isinstance(self.timeoutSeconds, (float, int)) - else DEFAULT_TIMEOUT + int(self.timeoutSeconds) if isinstance(self.timeoutSeconds, (float, int)) else DEFAULT_TIMEOUT ) return asdict(self) @@ -509,7 +500,7 @@ def copy_with( if kwargs.get(k) is not None: current_kwargs[k] = kwargs[k] - return self.__class__(**current_kwargs) + return self.__class__(**current_kwargs) # noqa @classmethod def from_kwargs( @@ -542,11 +533,7 @@ def from_kwargs( isByteRequest=is_byte_request, proxyUrl=proxy, forceHttp1=bool(not http2), - timeoutSeconds=( - int(timeout) - if isinstance(timeout, (float, int)) - else DEFAULT_TIMEOUT - ), + timeoutSeconds=(int(timeout) if isinstance(timeout, (float, int)) else DEFAULT_TIMEOUT), insecureSkipVerify=not verify, tlsClientIdentifier=tls_identifier, withDebug=tls_debug, diff --git a/tls_requests/models/urls.py b/tls_requests/models/urls.py index a2502bd..3a5418c 100644 --- a/tls_requests/models/urls.py +++ b/tls_requests/models/urls.py @@ -1,104 +1,126 @@ from __future__ import annotations -from abc import ABC +import time from collections.abc import Mapping from typing import Any, ItemsView, KeysView, Union, ValuesView from urllib.parse import ParseResult, quote, unquote, urlencode, urlparse import idna -from tls_requests.exceptions import ProxyError, URLError, URLParamsError -from tls_requests.types import (URL_ALLOWED_PARAMS, ProxyTypes, URLParamTypes, - URLTypes) +from ..exceptions import ProxyError, URLError, URLParamsError +from ..types import URL_ALLOWED_PARAMS, ProxyTypes, URLParamTypes, URLTypes __all__ = ["URL", "URLParams", "Proxy"] -class URLParams(Mapping, ABC): - """URLParams +class URLParams(Mapping): + """ + A mapping-like object for managing URL query parameters. - Represents a mapping of URL parameters with utilities for normalization, encoding, and updating. - This class provides a dictionary-like interface for managing URL parameters, ensuring that keys - and values are properly validated and normalized. + This class provides a dictionary-like interface for URL parameters, + handling the normalization of keys and values into the correct string format + and encoding them into a query string. It supports multi-value parameters. Attributes: - - params (str): Returns the encoded URL parameters as a query string. - - Methods: - - update(params: URLParamTypes = None, **kwargs): Updates the current parameters with new ones. - - keys() -> KeysView: Returns a view of the parameter keys. - - values() -> ValuesView: Returns a view of the parameter values. - - items() -> ItemsView: Returns a view of the parameter key-value pairs. - - copy() -> URLParams: Returns a copy of the current instance. - - normalize(s: URL_ALLOWED_PARAMS): Normalizes a key or value to a string. - - Raises: - - URLParamsError: Raised for invalid keys, values, or parameter types during initialization or updates. + params (str): The URL-encoded query string representation of the parameters. - Example Usage: + Examples: >>> params = URLParams({'key1': 'value1', 'key2': ['value2', 'value3']}) >>> print(str(params)) 'key1=value1&key2=value2&key2=value3' - >>> params.update({'key3': 'value4'}) + >>> params.update({'key3': 4, 'active': True}) >>> print(params) - 'key1=value1&key2=value2&key2=value3&key3=value4' + 'key1=value1&key2=value2&key2=value3&key3=4&active=true' >>> 'key1' in params True """ def __init__(self, params: URLParamTypes = None, **kwargs): + """ + Initializes the URLParams object. + + Args: + params: A dictionary, another URLParams instance, or a list of tuples + to initialize the parameters. + **kwargs: Additional key-value pairs to add or overwrite parameters. + + Raises: + URLParamsError: If `params` is not a valid mapping type. + """ self._data = self._prepare(params, **kwargs) @property def params(self) -> str: + """Returns the encoded URL parameters as a query string.""" return str(self) def update(self, params: URLParamTypes = None, **kwargs): + """ + Updates the current parameters with new ones from a mapping or keyword args. + + Args: + params: A dictionary-like object of parameters to add. + **kwargs: Additional key-value pairs to add. + """ self._data.update(self._prepare(params, **kwargs)) return self def keys(self) -> KeysView: + """Returns a view of the parameter keys.""" return self._data.keys() def values(self) -> ValuesView: + """Returns a view of the parameter values.""" return self._data.values() def items(self) -> ItemsView: + """Returns a view of the parameter key-value pairs.""" return self._data.items() def copy(self) -> URLParams: + """Returns a shallow copy of the current instance.""" return self.__class__(self._data.copy()) def __str__(self): + """Returns the URL-encoded string representation of the parameters.""" return urlencode(self._data, doseq=True) def __repr__(self): + """Returns the official string representation of the object.""" return "<%s: %s>" % (self.__class__.__name__, self.items()) def __contains__(self, key: Any) -> bool: + """Checks if a key exists in the parameters.""" return key in self._data def __setitem__(self, key, value): + """Sets a parameter key-value pair, normalizing the input.""" self._data.update(self._prepare({key: value})) def __getitem__(self, key): + """Retrieves a parameter value by its key.""" return self._data[key] def __delitem__(self, key): + """Deletes a parameter by its key.""" del self._data[key] def __iter__(self): + """Returns an iterator over the parameter keys.""" return (k for k in self.keys()) def __len__(self) -> int: + """Returns the number of unique parameter keys.""" return len(self._data) def __hash__(self) -> int: + """Returns the hash of the encoded parameter string.""" return hash(str(self)) def __eq__(self, other) -> bool: + """Checks for equality based on the encoded parameter string.""" if not isinstance(other, self.__class__): if isinstance(other, Mapping): other = self.__class__(other) @@ -107,6 +129,19 @@ def __eq__(self, other) -> bool: return bool(self.params == other.params) def _prepare(self, params: URLParamTypes = None, **kwargs) -> Mapping: + """ + Normalizes and prepares the input parameters. + + Args: + params: A dictionary-like object of parameters. + **kwargs: Additional keyword arguments. + + Returns: + A mapping with normalized keys and values. + + Raises: + URLParamsError: If keys or values are of an invalid type. + """ params = params or {} if not isinstance(params, (dict, self.__class__)): raise URLParamsError("Invalid parameters.") @@ -125,6 +160,18 @@ def _prepare(self, params: URLParamTypes = None, **kwargs) -> Mapping: return params def normalize(self, s: URL_ALLOWED_PARAMS): + """ + Converts a supported type into a string. + + Args: + s: The value to normalize (str, bytes, int, float, bool). + + Returns: + The normalized string value. + + Raises: + URLParamsError: If the value type is not supported. + """ if not isinstance(s, (str, bytes, int, float, bool)): raise URLParamsError("Invalid parameters value type.") @@ -138,53 +185,41 @@ def normalize(self, s: URL_ALLOWED_PARAMS): class URL: - """URL + """ + A class for parsing, manipulating, and constructing URLs. - A utility class for parsing, manipulating, and constructing URLs. It integrates with the - `URLParams` class for managing query parameters and provides easy access to various components - of a URL, such as scheme, host, port, and path. + This class provides a structured way to interact with URL components, + integrating with `URLParams` for easy query string management. It handles + IDNA encoding for hostnames and ensures proper URL construction. Attributes: - - url (str): The raw or prepared URL string. - - params (URLParams): An instance of URLParams to manage query parameters. - - parsed (ParseResult): A `ParseResult` object containing the parsed components of the URL. - - auth (tuple): A tuple of (username, password) extracted from the URL. - - fragment (str): The fragment identifier of the URL. - - host (str): The hostname (IDNA-encoded if applicable). - - path (str): The path component of the URL. - - netloc (str): The network location (host:port if port is present). - - password (str): The password extracted from the URL. - - port (str): The port number of the URL. - - query (str): The query string, incorporating both existing and additional parameters. - - scheme (str): The URL scheme (e.g., "http", "https"). - - username (str): The username extracted from the URL. - - Methods: - - _prepare(url: Union[U, str, bytes]) -> str: Prepares and validates a URL string or bytes to ParseResult. - - _build(secure: bool = False) -> str: Constructs a URL string from its components. - - Raises: - - URLError: Raised when an invalid URL or component is encountered. - - Example Usage: + url (str): The full URL string. Can be set to re-parse. + params (URLParams): An object managing the URL's query parameters. + parsed (ParseResult): The result from `urllib.parse.urlparse`. + scheme (str): The URL scheme (e.g., "https"). + netloc (str): The network location part (e.g., "user:pass@host:port"). + host (str): The hostname, IDNA-encoded. + port (str): The port number as a string, if present. + path (str): The hierarchical path. + query (str): The complete query string, combining original and added params. + fragment (str): The fragment identifier. + username (str): The username for authentication. + password (str): The password for authentication. + auth (tuple): A (username, password) tuple. + + Examples: >>> url = URL("https://example.com/path?q=1#fragment", params={"key": "value"}) >>> print(url.scheme) 'https' >>> print(url.host) 'example.com' >>> print(url.query) - 'q%3D1&key%3Dvalue' - >>> print(url.params) - 'key=value' + 'q=1&key=value' + >>> url.params.update({'key2': 'value2'}) - >>> print(url.url) - 'https://example.com/path?q%3D1&key%3Dvalue%26key2%3Dvalue2#fragment' - >>> from urllib.parse import unquote >>> print(unquote(url.url)) 'https://example.com/path?q=1&key=value&key2=value2#fragment' - >>> url.url = 'https://example.org/' - >>> print(unquote(url.url)) - 'https://example.org/?key=value&key2=value2' + >>> url.url = 'https://httpbin.org/get' >>> print(unquote(url.url)) 'https://httpbin.org/get?key=value&key2=value2' @@ -202,40 +237,58 @@ class URL: ) def __init__(self, url: URLTypes, params: URLParamTypes = None, **kwargs): + """ + Initializes the URL object. + + Args: + url: The URL string, bytes, or another URL object. + params: A dictionary-like object to be used as query parameters. + **kwargs: Additional keyword arguments for URLParams. + + Raises: + URLError: If the provided URL is invalid. + """ self._parsed = self._prepare(url) self._url = None self._params = URLParams(params) @property def url(self): + """The full, reconstructed URL string.""" if self._url is None: self._url = self._build(False) return self._url @url.setter def url(self, value): + """Allows setting a new URL, which will be parsed.""" self._parsed = self._prepare(value) self._url = self._build(False) @property def params(self): + """The `URLParams` object for managing query parameters.""" return self._params @params.setter def params(self, value): + """Sets a new `URLParams` object.""" self._url = None self._params = URLParams(value) @property def parsed(self) -> ParseResult: + """The `ParseResult` object from the standard library.""" return self._parsed @property def netloc(self) -> str: + """The network location, including host and port.""" return ":".join([self.host, self.port]) if self.port else self.host @property def query(self) -> str: + """The combined query string from the original URL and the `params`.""" query = "" if self.parsed.query and self.params.params: query = "&".join([quote(self.parsed.query), self.params.params]) @@ -246,17 +299,29 @@ def query(self) -> str: return query def __str__(self): + """Returns the full URL string with the real password.""" return self._build() def __repr__(self): + """Returns a representation of the URL with a secured password.""" return "<%s: %s>" % (self.__class__.__name__, unquote(self._build(True))) def _prepare(self, url: Union[T, str, bytes]) -> ParseResult: + """ + Validates, decodes, and parses the input URL. + + Args: + url: The URL to prepare. + + Returns: + A `ParseResult` object. + + Raises: + URLError: For invalid URL types or formats. + """ if isinstance(url, bytes): url = url.decode("utf-8") - elif isinstance(url, self.__class__) or issubclass( - self.__class__, url.__class__ - ): + elif isinstance(url, self.__class__) or issubclass(self.__class__, url.__class__): url = str(url) if not isinstance(url, str): @@ -291,6 +356,15 @@ def _prepare(self, url: Union[T, str, bytes]) -> ParseResult: return parsed def _build(self, secure: bool = False) -> str: + """ + Constructs the URL string from its components. + + Args: + secure: If True, masks the password in the output. + + Returns: + The final URL string. + """ urls = [self.scheme, "://"] authority = self.netloc if self.username or self.password: @@ -318,36 +392,109 @@ def _build(self, secure: bool = False) -> str: class Proxy(URL): - """Proxy + """ + A specialized URL class for managing proxy configurations and performance. - A specialized subclass of `URL` designed to handle proxy URLs with specific schemes and additional - validations. The class restricts allowed schemes to "http", "https", "socks5", and "socks5h". It - also modifies the URL construction process to focus on proxy-specific requirements. + This class inherits from `URL` and extends it with features for proxy + rotation strategies, such as weighting, success/failure tracking, and + metadata. It restricts the allowed URL schemes to those common for proxies. Attributes: - - ALLOWED_SCHEMES (tuple): A tuple of allowed schemes for the proxy ("http", "https", "socks5", "socks5h"). - Raises: - - ProxyError: Raised when an invalid proxy or unsupported protocol is encountered. - - Example Usage: - >>> proxy = Proxy("http://user:pass@127.0.0.1:8080") - >>> print(proxy.scheme) - 'http' - >>> print(proxy.netloc) - '127.0.0.1:8080' - >>> print(proxy) + ALLOWED_SCHEMES (tuple): Allowed proxy schemes ('http', 'https', 'socks5', 'socks5h'). + weight (float): The weight of the proxy, used in selection algorithms. + region (Optional[str]): A geographical or logical region identifier. + latency (Optional[float]): The last recorded latency in seconds. + success_rate (Optional[float]): A score indicating reliability (0.0 to 1.0). + meta (Dict[str, Any]): A dictionary for arbitrary user-defined data. + failures (int): A counter for consecutive connection failures. + last_used (Optional[float]): A timestamp of the last time the proxy was used. + + Examples: + >>> proxy = Proxy("http://user:pass@127.0.0.1:8080", weight=5.0, region="us-east") + >>> proxy.mark_failed() + >>> print(proxy.failures) + 1 + >>> print(proxy.weight) + 4.25 + >>> proxy.mark_success() + >>> data = proxy.to_dict() + >>> print(data['url']) 'http://user:pass@127.0.0.1:8080' - >>> print(proxy.__repr__()) - '' - - >>> socks5 = Proxy("socks5://127.0.0.1:8080") - >>> print(socks) - 'socks5://127.0.0.1:8080' """ ALLOWED_SCHEMES = ("http", "https", "socks5", "socks5h") + def __init__( + self, + url: URLTypes, + params: URLParamTypes = None, + *, + weight: float = 1.0, + region: Optional[str] = None, + latency: Optional[float] = None, + success_rate: Optional[float] = None, + meta: Optional[Dict[str, Any]] = None, + **kwargs, + ): + """ + Initializes the Proxy object. + + Args: + url: The proxy URL string, bytes, or another URL object. + params: URL parameters (rarely used for proxies). + weight: The initial weight for proxy selection (higher is more likely). + region: An identifier for the proxy's region. + latency: The initial or last known latency in seconds. + success_rate: A score from 0.0 to 1.0 indicating reliability. + meta: A dictionary for storing arbitrary user data. + **kwargs: Additional keyword arguments passed to the parent `URL` class. + + Raises: + ProxyError: If the URL is invalid or the scheme is not supported. + """ + self._weight = weight or 1.0 + self.region = region + self.latency = latency + self.success_rate = success_rate + self.meta = meta or {} + self.failures: int = 0 + self.last_used: Optional[float] = None + super().__init__(url, **kwargs) + + def __repr__(self): + """Returns a secure representation of the proxy with its weight.""" + return "<%s: %s, weight=%s>" % ( + self.__class__.__name__, + unquote(self._build(True)), + getattr(self, "weight", "unset"), + ) + + @property + def weight(self) -> float: + return self._weight or 1.0 + + @weight.setter + def weight(self, weight: float) -> None: + try: + self._weight = float(weight) + except ValueError: + raise ProxyError("Weight must be an integer or float.") + def _prepare(self, url: ProxyTypes) -> ParseResult: + """ + Parses the proxy URL, ensuring it has a valid scheme and format. + + Overrides the parent `_prepare` to enforce proxy-specific rules. + + Args: + url: The proxy URL to prepare. + + Returns: + A `ParseResult` object containing only scheme and netloc. + + Raises: + ProxyError: If the URL is invalid or the scheme is not allowed. + """ try: if isinstance(url, bytes): url = url.decode("utf-8") @@ -355,6 +502,9 @@ def _prepare(self, url: ProxyTypes) -> ParseResult: if isinstance(url, str): url = url.strip() + if "://" not in str(url): + url = f"http://{url}" + parsed = super(Proxy, self)._prepare(url) if str(parsed.scheme).lower() not in self.ALLOWED_SCHEMES: raise ProxyError( @@ -362,12 +512,24 @@ def _prepare(self, url: ProxyTypes) -> ParseResult: % parsed.scheme ) + # Re-parse to create a clean object with only scheme and netloc return urlparse("%s://%s" % (parsed.scheme, parsed.netloc)) except URLError: raise ProxyError("Invalid proxy: %s" % url) def _build(self, secure: bool = False) -> str: - urls = [self.scheme, "://"] + """ + Constructs the proxy URL string. + + Overrides the parent `_build` to exclude path, query, and fragment. + + Args: + secure: If True, masks the password. + + Returns: + The proxy URL string. + """ + urls = [self.scheme or "http", "://"] authority = self.netloc if self.username or self.password: userinfo = ":".join([self.username, self.password]) @@ -383,3 +545,119 @@ def _build(self, secure: bool = False) -> str: urls.append(authority) return "".join(urls) + + def mark_used(self): + """Sets the `last_used` timestamp to the current time.""" + self.last_used = time.time() + + def mark_failed(self): + """ + Records a connection failure. + + Increments the failure count and applies a decay factor to the weight. + """ + self.failures += 1 + self.weight = max(0.1, self.weight * 0.85) + + def mark_success(self, latency: Optional[float] = None): + """ + Records a connection success. + + Resets failure count, updates latency, and improves success rate and weight. + + Args: + latency: The observed connection latency in seconds for this success. + """ + if latency: + self.latency = latency + + self.failures = max(0, self.failures - 1) + self.success_rate = (self.success_rate or 1.0) * 0.95 + 0.05 + self.weight = min(10.0, self.weight * 1.05) + + def to_dict(self): + """ + Serializes the proxy's state to a dictionary. + + Returns: + A dictionary containing the proxy's URL and performance metrics. + """ + return { + "url": self.url, + "weight": self.weight, + "region": self.region, + "latency": self.latency, + "success_rate": self.success_rate, + "last_used": self.last_used, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Proxy": + """ + Creates a Proxy object from a dictionary. + + Args: + data: A dictionary containing a 'url' key and other optional + proxy attributes (`weight`, `region`, etc.). + + Returns: + A new `Proxy` instance. + + Raises: + ProxyError: If the 'url' key is missing from the dictionary. + """ + if "url" not in data: + raise ProxyError("Missing required key: 'url'. The proxy configuration dictionary must include a 'url'.") + + url = data.pop("url") + return cls( + url=url, + **data, + ) + + @classmethod + def from_string(cls, raw: str, separator: str = "|") -> "Proxy": + """ + Parses a proxy from a string with optional attributes. + + Handles various common formats for representing proxies in text files. + Comments (#) and blank lines should be handled by the calling code. + + Supported Formats: + - `http://user:pass@host:port` + - `socks5://host:port` + - `host:port` (defaults to http) + - `host:port|weight` + - `host:port|weight|region` + - `http://user:pass@host:port|weight|region` + + Args: + raw: The raw proxy string. + separator: The character used to separate attributes (default: '|'). + + Returns: + A new `Proxy` instance. + + Raises: + ProxyError: If the proxy string is empty or malformed. + """ + raw = raw.strip() + if not raw: + raise ProxyError("Empty proxy string.") + + parts = [p.strip() for p in raw.split(separator)] + url = parts[0] + weight = 1.0 + region = None + if len(parts) >= 2 and parts[1]: + try: + weight = float(parts[1]) + except Exception: + pass + if len(parts) >= 3 and parts[2]: + region = parts[2] + + if "://" not in url: + url = "http://" + url + + return cls(url, weight=weight, region=region) diff --git a/tls_requests/settings.py b/tls_requests/settings.py index ab80ff2..a561aef 100644 --- a/tls_requests/settings.py +++ b/tls_requests/settings.py @@ -9,7 +9,7 @@ DEFAULT_TLS_DEBUG = False DEFAULT_TLS_INSECURE_SKIP_VERIFY = False DEFAULT_TLS_HTTP2 = "auto" -DEFAULT_TLS_IDENTIFIER = "chrome_120" +DEFAULT_TLS_IDENTIFIER = "chrome_133" DEFAULT_HEADERS = { "accept": "*/*", "connection": "keep-alive", diff --git a/tls_requests/types.py b/tls_requests/types.py index ffe0b35..be2a0b1 100644 --- a/tls_requests/types.py +++ b/tls_requests/types.py @@ -8,7 +8,9 @@ from uuid import UUID if TYPE_CHECKING: # pragma: no cover - from .models import Cookies, Headers, Request # noqa: F401 + from .models import Headers # noqa: F401 + from .models import (Cookies, HeaderRotator, ProxyRotator, + TLSIdentifierRotator) AuthTypes = Optional[ Union[ @@ -19,7 +21,7 @@ ] ] URLTypes = Union["URL", str, bytes] -ProxyTypes = Union[str, bytes, "Proxy", "URL"] +ProxyTypes = Union[str, bytes, "Proxy", "URL", "ProxyRotator"] URL_ALLOWED_PARAMS = Union[str, bytes, int, float, bool] URLParamTypes = Optional[ Union[ @@ -35,68 +37,84 @@ ], ] ] -MethodTypes = Union[ - "Method", Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] -] +MethodTypes = Union["Method", Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]] ProtocolTypes = Optional[Union[Literal["auto", "http1", "http2"], bool]] HookTypes = Optional[Mapping[Literal["request", "response"], Sequence[Callable]]] TLSSession = Union["TLSSession", None] TLSSessionId = Union[str, UUID] TLSPayload = Union[dict, str, bytes, bytearray] TLSCookiesTypes = Optional[List[Dict[str, str]]] -TLSIdentifierTypes = Literal[ - "chrome_103", - "chrome_104", - "chrome_105", - "chrome_106", - "chrome_107", - "chrome_108", - "chrome_109", - "chrome_110", - "chrome_111", - "chrome_112", - "chrome_116_PSK", - "chrome_116_PSK_PQ", - "chrome_117", - "chrome_120", - "chrome_124", - "safari_15_6_1", - "safari_16_0", - "safari_ios_15_5", - "safari_ios_15_6", - "safari_ios_16_0", - "firefox_102", - "firefox_104", - "firefox_105", - "firefox_106", - "firefox_108", - "firefox_110", - "firefox_117", - "firefox_120", - "opera_89", - "opera_90", - "opera_91", - "okhttp4_android_7", - "okhttp4_android_8", - "okhttp4_android_9", - "okhttp4_android_10", - "okhttp4_android_11", - "okhttp4_android_12", - "okhttp4_android_13", - "zalando_ios_mobile", - "zalando_android_mobile", - "nike_ios_mobile", - "nike_android_mobile", - "mms_ios", - "mms_ios_2", - "mms_ios_3", - "mesh_ios", - "mesh_ios_2", - "mesh_android", - "mesh_android_2", - "confirmed_ios", - "confirmed_android", - "confirmed_android_2", +TLSIdentifierTypes = Union[ + Literal[ + "chrome_103", + "chrome_104", + "chrome_105", + "chrome_106", + "chrome_107", + "chrome_108", + "chrome_109", + "chrome_110", + "chrome_111", + "chrome_112", + "chrome_116_PSK", + "chrome_116_PSK_PQ", + "chrome_117", + "chrome_120", + "chrome_124", + "chrome_130_PSK", + "chrome_131", + "chrome_131_PSK", + "chrome_133", + "chrome_133_PSK", + "confirmed_android", + "confirmed_android_2", + "confirmed_ios", + "firefox_102", + "firefox_104", + "firefox_105", + "firefox_106", + "firefox_108", + "firefox_110", + "firefox_117", + "firefox_120", + "firefox_123", + "firefox_132", + "firefox_133", + "mesh_android", + "mesh_android_1", + "mesh_android_2", + "mesh_ios", + "mesh_ios_1", + "mesh_ios_2", + "mms_ios", + "mms_ios_1", + "mms_ios_2", + "mms_ios_3", + "nike_android_mobile", + "nike_ios_mobile", + "okhttp4_android_10", + "okhttp4_android_11", + "okhttp4_android_12", + "okhttp4_android_13", + "okhttp4_android_7", + "okhttp4_android_8", + "okhttp4_android_9", + "opera_89", + "opera_90", + "opera_91", + "safari_15_6_1", + "safari_16_0", + "safari_ipad_15_6", + "safari_ios_15_5", + "safari_ios_15_6", + "safari_ios_16_0", + "safari_ios_17_0", + "safari_ios_18_0", + "safari_ios_18_5", + "zalando_android_mobile", + "zalando_ios_mobile", + ], + "TLSIdentifierRotator", ] AnyList = List[ @@ -113,6 +131,7 @@ HeaderTypes = Optional[ Union[ "Headers", + "HeaderRotator", Mapping[str, Union[str, int, float]], Mapping[bytes, bytes], AnyList, @@ -136,9 +155,7 @@ RequestFileValue = Union[ FileContent, # file (or file path, str and bytes) Tuple[ByteOrStr, FileContent], # filename, file (or file path, str and bytes)) - Tuple[ - ByteOrStr, FileContent, ByteOrStr - ], # filename, file (or file path, str and bytes)), content type + Tuple[ByteOrStr, FileContent, ByteOrStr], # filename, file (or file path, str and bytes)), content type ] RequestData = Mapping[str, Any] RequestJson = Mapping[str, Any] diff --git a/tls_requests/utils.py b/tls_requests/utils.py index ba36416..1be6af6 100644 --- a/tls_requests/utils.py +++ b/tls_requests/utils.py @@ -30,9 +30,7 @@ def import_module(name: Union[str, list[str]]): jsonlib = json -def get_logger( - name: str = "TLSRequests", level: int | str = logging.INFO -) -> logging.Logger: +def get_logger(name: str = "TLSRequests", level: int | str = logging.INFO) -> logging.Logger: logging.basicConfig(format=FORMAT, datefmt=DATE_FORMAT, level=level) logger = logging.getLogger(name) logger.setLevel(level)