feat: timezone="auto" derives the zone from the proxy egress IP
A proxy in a different country paired with the host timezone is the classic timezone_mismatch signal, so a session with a proxy and no explicit timezone now resolves the zone automatically. - discover the egress IP through the proxy (SOCKS via requests[socks]), map it to an IANA zone with an offline mmdb (daijro/geoip-all-in-one, downloaded + cached like the Firefox binary; GPL so not vendored) - precedence: explicit zone wins; ""+proxy and "auto"+proxy resolve; ""/"auto" without a proxy stay host; "host"/"local" force host TZ - fail-early when a proxy is set but the zone cannot be resolved, never a silent host-TZ fallback - deps: requests[socks], maxminddb, tzdata (zoneinfo ships no DB on Windows) - resolve_session_timezone / ensure_geoip_mmdb exported for integrations
This commit is contained in:
@@ -141,6 +141,33 @@ with InvisiblePlaywright(proxy=proxy) as browser:
|
|||||||
|
|
||||||
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
||||||
|
|
||||||
|
### Timezone
|
||||||
|
|
||||||
|
The browser timezone follows `timezone=`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# default: with a proxy, the timezone is auto-derived from the proxy egress IP
|
||||||
|
with InvisiblePlaywright(proxy=proxy) as browser:
|
||||||
|
...
|
||||||
|
|
||||||
|
# explicit IANA zone always wins
|
||||||
|
with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser:
|
||||||
|
...
|
||||||
|
|
||||||
|
# opt out and keep the host timezone even behind a proxy
|
||||||
|
with InvisiblePlaywright(proxy=proxy, timezone="host") as browser:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
| `timezone=` | with proxy | without proxy |
|
||||||
|
|---|---|---|
|
||||||
|
| `""` (default) | auto-derived from egress IP | host timezone |
|
||||||
|
| `"auto"` | auto-derived from egress IP | host timezone |
|
||||||
|
| `"Area/City"` | that zone | that zone |
|
||||||
|
| `"host"` / `"local"` | host timezone | host timezone |
|
||||||
|
|
||||||
|
A proxy in a different country paired with the host timezone is the classic `timezone_mismatch` signal, so a proxy with no explicit timezone now resolves automatically. The egress IP is looked up through the proxy and mapped to its IANA zone with an offline database ([`daijro/geoip-all-in-one`](https://github.com/daijro/geoip-all-in-one)), downloaded and cached on first use. If a proxy is set but the zone can't be resolved, the launch raises rather than silently falling back to the host zone — pass an explicit `timezone=` or `timezone="host"` to override. Point `STEALTHFOX_GEOIP_MMDB` at your own `.mmdb` to skip the download.
|
||||||
|
|
||||||
### Pinning specific fingerprint fields
|
### Pinning specific fingerprint fields
|
||||||
|
|
||||||
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
||||||
|
|||||||
+3
-1
@@ -22,7 +22,9 @@ classifiers = [
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"playwright>=1.40",
|
"playwright>=1.40",
|
||||||
"platformdirs>=4",
|
"platformdirs>=4",
|
||||||
"requests>=2.31",
|
"requests[socks]>=2.31",
|
||||||
|
"maxminddb>=2.2",
|
||||||
|
"tzdata>=2024.1",
|
||||||
"tqdm>=4.66",
|
"tqdm>=4.66",
|
||||||
"pywin32>=306; sys_platform == 'win32'",
|
"pywin32>=306; sys_platform == 'win32'",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -17,7 +17,8 @@ Quickstart:
|
|||||||
"""
|
"""
|
||||||
from .config import get_default_args, get_default_stealth_prefs
|
from .config import get_default_args, get_default_stealth_prefs
|
||||||
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
||||||
from .download import ensure_binary
|
from ._geo import GeoTimezoneError, resolve_session_timezone
|
||||||
|
from .download import ensure_binary, ensure_geoip_mmdb
|
||||||
from .launcher import InvisiblePlaywright
|
from .launcher import InvisiblePlaywright
|
||||||
|
|
||||||
from importlib.metadata import PackageNotFoundError, version as _pkg_version
|
from importlib.metadata import PackageNotFoundError, version as _pkg_version
|
||||||
@@ -32,8 +33,11 @@ except PackageNotFoundError:
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"InvisiblePlaywright",
|
"InvisiblePlaywright",
|
||||||
"ensure_binary",
|
"ensure_binary",
|
||||||
|
"ensure_geoip_mmdb",
|
||||||
"get_default_stealth_prefs",
|
"get_default_stealth_prefs",
|
||||||
"get_default_args",
|
"get_default_args",
|
||||||
|
"resolve_session_timezone",
|
||||||
|
"GeoTimezoneError",
|
||||||
"BINARY_VERSION",
|
"BINARY_VERSION",
|
||||||
"FIREFOX_UPSTREAM_VERSION",
|
"FIREFOX_UPSTREAM_VERSION",
|
||||||
"__version__",
|
"__version__",
|
||||||
|
|||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""Resolve the session timezone from the proxy egress IP (``timezone="auto"``).
|
||||||
|
|
||||||
|
Approach B: discover the egress IP with one HTTP request routed *through the
|
||||||
|
configured proxy*, then map IP → IANA timezone with an offline mmdb
|
||||||
|
(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``).
|
||||||
|
|
||||||
|
Precedence (see ``resolve_session_timezone``):
|
||||||
|
|
||||||
|
"host" / "local" → "" force host TZ (escape hatch)
|
||||||
|
explicit IANA → unchanged explicit always wins
|
||||||
|
"" + no proxy → "" host TZ (default, unchanged behaviour)
|
||||||
|
"" + proxy → egress NEW default: a proxy with no timezone is
|
||||||
|
exactly the timezone_mismatch trap, so we
|
||||||
|
auto-resolve it.
|
||||||
|
"auto" + no proxy → "" nothing to resolve, fall back to host TZ
|
||||||
|
"auto" + proxy → egress
|
||||||
|
|
||||||
|
When a proxy IS set we fail loudly rather than silently fall back to the host
|
||||||
|
TZ — a foreign proxy paired with the host timezone is the precise signal
|
||||||
|
detectors flag as ``timezone_mismatch``.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class GeoTimezoneError(RuntimeError):
|
||||||
|
"""Raised when ``timezone="auto"`` cannot resolve a valid IANA zone."""
|
||||||
|
|
||||||
|
|
||||||
|
# Plain-text IP echo endpoints (each returns just the caller's public IP).
|
||||||
|
_IP_ECHO_ENDPOINTS = (
|
||||||
|
"https://api.ipify.org",
|
||||||
|
"https://icanhazip.com",
|
||||||
|
"https://checkip.amazonaws.com",
|
||||||
|
)
|
||||||
|
|
||||||
|
_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://")
|
||||||
|
|
||||||
|
|
||||||
|
def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool:
|
||||||
|
if not proxy:
|
||||||
|
return False
|
||||||
|
server = (proxy.get("server") or "").strip()
|
||||||
|
return bool(server) and server.lower() != "direct://"
|
||||||
|
|
||||||
|
|
||||||
|
def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]:
|
||||||
|
"""Translate our proxy dict into a ``requests`` proxies mapping.
|
||||||
|
|
||||||
|
SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches
|
||||||
|
``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS
|
||||||
|
pass through unchanged. Credentials are URL-encoded.
|
||||||
|
"""
|
||||||
|
server = (proxy.get("server") or "").strip()
|
||||||
|
low = server.lower()
|
||||||
|
if low.startswith("socks5://") or low.startswith("socks://"):
|
||||||
|
scheme = "socks5h"
|
||||||
|
elif low.startswith("socks4://"):
|
||||||
|
scheme = "socks4"
|
||||||
|
elif low.startswith("https://"):
|
||||||
|
scheme = "https"
|
||||||
|
else:
|
||||||
|
scheme = "http"
|
||||||
|
|
||||||
|
host_port = server.split("://", 1)[1] if "://" in server else server
|
||||||
|
user = proxy.get("username") or ""
|
||||||
|
pwd = proxy.get("password") or ""
|
||||||
|
if user:
|
||||||
|
auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@"
|
||||||
|
else:
|
||||||
|
auth = ""
|
||||||
|
url = f"{scheme}://{auth}{host_port}"
|
||||||
|
return {"http": url, "https": url}
|
||||||
|
|
||||||
|
|
||||||
|
def discover_egress_ip(
|
||||||
|
proxy: Dict[str, str], *, timeout: float = 10.0
|
||||||
|
) -> str:
|
||||||
|
"""Return the public IP seen when routing through ``proxy``.
|
||||||
|
|
||||||
|
Tries each echo endpoint in turn; raises :class:`GeoTimezoneError` if none
|
||||||
|
return a valid IP (SOCKS support requires ``requests[socks]`` / PySocks).
|
||||||
|
"""
|
||||||
|
proxies = _proxies_for_requests(proxy)
|
||||||
|
last_err: Optional[Exception] = None
|
||||||
|
for url in _IP_ECHO_ENDPOINTS:
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, proxies=proxies, timeout=timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
ip = resp.text.strip()
|
||||||
|
ipaddress.ip_address(ip) # validate (raises ValueError if not an IP)
|
||||||
|
return ip
|
||||||
|
except Exception as exc: # noqa: BLE001 - try the next endpoint
|
||||||
|
last_err = exc
|
||||||
|
continue
|
||||||
|
raise GeoTimezoneError(
|
||||||
|
f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} "
|
||||||
|
f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure "
|
||||||
|
f"requests[socks] / PySocks is installed."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ip_to_timezone(ip: str, mmdb_path: Any) -> str:
|
||||||
|
"""Map ``ip`` to its IANA timezone using the offline mmdb.
|
||||||
|
|
||||||
|
Reads the standard MaxMind ``location.time_zone`` field and validates it
|
||||||
|
against the system tz database. Raises :class:`GeoTimezoneError` if the IP
|
||||||
|
is absent from the DB or the zone is missing / not a valid IANA name.
|
||||||
|
"""
|
||||||
|
import maxminddb
|
||||||
|
|
||||||
|
with maxminddb.open_database(str(mmdb_path)) as reader:
|
||||||
|
record = reader.get(ip)
|
||||||
|
if not record:
|
||||||
|
raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database")
|
||||||
|
tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get(
|
||||||
|
"time_zone"
|
||||||
|
)
|
||||||
|
if not tz:
|
||||||
|
raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database")
|
||||||
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||||
|
|
||||||
|
try:
|
||||||
|
ZoneInfo(tz)
|
||||||
|
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||||
|
raise GeoTimezoneError(
|
||||||
|
f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}"
|
||||||
|
) from exc
|
||||||
|
return tz
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_session_timezone(
|
||||||
|
timezone: str, proxy: Optional[Dict[str, str]]
|
||||||
|
) -> str:
|
||||||
|
"""Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``).
|
||||||
|
|
||||||
|
See the module docstring for the full precedence table. Raises
|
||||||
|
:class:`GeoTimezoneError` when a proxy is set but the egress timezone
|
||||||
|
cannot be resolved (fail-early — never silently use the host TZ behind a
|
||||||
|
foreign proxy).
|
||||||
|
"""
|
||||||
|
tz = (timezone or "").strip()
|
||||||
|
if tz.lower() in ("host", "local"):
|
||||||
|
return ""
|
||||||
|
if tz and tz.lower() != "auto":
|
||||||
|
return tz # explicit IANA wins
|
||||||
|
if not _proxy_is_set(proxy):
|
||||||
|
return "" # "" / "auto" without a proxy → host TZ
|
||||||
|
# proxy set, tz is "" (new default) or "auto" → resolve from egress.
|
||||||
|
assert proxy is not None
|
||||||
|
from .download import ensure_geoip_mmdb
|
||||||
|
|
||||||
|
ip = discover_egress_ip(proxy)
|
||||||
|
mmdb = ensure_geoip_mmdb()
|
||||||
|
return ip_to_timezone(ip, mmdb)
|
||||||
@@ -9,6 +9,7 @@ from typing import Any, Dict, Optional, Union
|
|||||||
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
||||||
|
|
||||||
from ._fpforge import Profile, generate_profile
|
from ._fpforge import Profile, generate_profile
|
||||||
|
from ._geo import resolve_session_timezone
|
||||||
from ._headless import make_virtual_display
|
from ._headless import make_virtual_display
|
||||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||||
from .download import ensure_binary
|
from .download import ensure_binary
|
||||||
@@ -75,6 +76,13 @@ class InvisiblePlaywright:
|
|||||||
|
|
||||||
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
||||||
import sys as _sys
|
import sys as _sys
|
||||||
|
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||||
|
# concrete IANA zone before anything reads self._timezone. Run the
|
||||||
|
# blocking geo lookup off the event loop. Fail-early if a proxy is set
|
||||||
|
# but the egress zone can't be resolved.
|
||||||
|
self._timezone = await asyncio.to_thread(
|
||||||
|
resolve_session_timezone, self._timezone, self._proxy
|
||||||
|
)
|
||||||
executable = self._binary_path or ensure_binary()
|
executable = self._binary_path or ensure_binary()
|
||||||
prefs = translate_profile_to_prefs(
|
prefs = translate_profile_to_prefs(
|
||||||
self._profile,
|
self._profile,
|
||||||
|
|||||||
@@ -46,3 +46,19 @@ BINARY_ENTRY_REL = {
|
|||||||
RELEASE_URL_TEMPLATE = (
|
RELEASE_URL_TEMPLATE = (
|
||||||
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
|
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a
|
||||||
|
# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt
|
||||||
|
# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the
|
||||||
|
# Firefox binary) rather than bundling it into this MIT package. Pinned to a
|
||||||
|
# known-good weekly tag; bump to refresh. The `-all` variant covers IPv4+IPv6.
|
||||||
|
GEOIP_REPO: str = "daijro/geoip-all-in-one"
|
||||||
|
GEOIP_MMDB_VERSION: str = "2026.06.03"
|
||||||
|
GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip"
|
||||||
|
GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb"
|
||||||
|
GEOIP_RELEASE_URL_TEMPLATE: str = (
|
||||||
|
"https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}"
|
||||||
|
)
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ from .constants import (
|
|||||||
ARCHIVE_NAME,
|
ARCHIVE_NAME,
|
||||||
BINARY_ENTRY_REL,
|
BINARY_ENTRY_REL,
|
||||||
BINARY_VERSION,
|
BINARY_VERSION,
|
||||||
|
GEOIP_ASSET,
|
||||||
|
GEOIP_MMDB_NAME,
|
||||||
|
GEOIP_MMDB_VERSION,
|
||||||
|
GEOIP_RELEASE_URL_TEMPLATE,
|
||||||
RELEASE_URL_TEMPLATE,
|
RELEASE_URL_TEMPLATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -151,3 +155,49 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
|||||||
if not entry.exists():
|
if not entry.exists():
|
||||||
raise RuntimeError(f"binary not found after extraction: {entry}")
|
raise RuntimeError(f"binary not found after extraction: {entry}")
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# GeoIP mmdb (used by timezone="auto" to map proxy egress IP → IANA zone)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
def geoip_mmdb_path(version: str = GEOIP_MMDB_VERSION) -> Path:
|
||||||
|
"""Cache location for the extracted geoip mmdb."""
|
||||||
|
return cache_root() / "geoip" / version / GEOIP_MMDB_NAME
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_geoip_mmdb(version: str = GEOIP_MMDB_VERSION) -> Path:
|
||||||
|
"""Return a path to the geoip mmdb, downloading + caching it if needed.
|
||||||
|
|
||||||
|
Set ``STEALTHFOX_GEOIP_MMDB`` to point at a user-supplied mmdb (or a test
|
||||||
|
fixture) to skip the download entirely. Otherwise the pinned weekly build
|
||||||
|
of ``daijro/geoip-all-in-one`` is fetched from GitHub Releases (public, no
|
||||||
|
token) into the user cache and unzipped once.
|
||||||
|
"""
|
||||||
|
override = os.environ.get("STEALTHFOX_GEOIP_MMDB")
|
||||||
|
if override:
|
||||||
|
p = Path(override)
|
||||||
|
if not p.exists():
|
||||||
|
raise RuntimeError(
|
||||||
|
f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}"
|
||||||
|
)
|
||||||
|
return p
|
||||||
|
|
||||||
|
dst = geoip_mmdb_path(version)
|
||||||
|
if dst.exists():
|
||||||
|
return dst
|
||||||
|
|
||||||
|
url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=version, asset=GEOIP_ASSET)
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
archive = Path(td) / GEOIP_ASSET
|
||||||
|
_download_file(url, archive)
|
||||||
|
_extract(archive, dst.parent)
|
||||||
|
|
||||||
|
if dst.exists():
|
||||||
|
return dst
|
||||||
|
# The asset name inside the zip may differ from GEOIP_MMDB_NAME — fall
|
||||||
|
# back to the first .mmdb the archive produced.
|
||||||
|
candidates = sorted(dst.parent.glob("*.mmdb"))
|
||||||
|
if candidates:
|
||||||
|
return candidates[0]
|
||||||
|
raise RuntimeError(f"geoip mmdb not found after extraction in {dst.parent}")
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from typing import Any, Dict, Optional, Union
|
|||||||
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
||||||
|
|
||||||
from ._fpforge import Profile, generate_profile
|
from ._fpforge import Profile, generate_profile
|
||||||
|
from ._geo import resolve_session_timezone
|
||||||
from ._headless import make_virtual_display
|
from ._headless import make_virtual_display
|
||||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||||
from .download import ensure_binary
|
from .download import ensure_binary
|
||||||
@@ -178,6 +179,10 @@ class InvisiblePlaywright:
|
|||||||
self._virtual_display: Any = None
|
self._virtual_display: Any = None
|
||||||
|
|
||||||
def __enter__(self) -> Union[Browser, BrowserContext]:
|
def __enter__(self) -> Union[Browser, BrowserContext]:
|
||||||
|
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||||
|
# concrete IANA zone before anything reads self._timezone. Fail-early
|
||||||
|
# if a proxy is set but the egress zone can't be resolved.
|
||||||
|
self._timezone = resolve_session_timezone(self._timezone, self._proxy)
|
||||||
executable = self._binary_path or ensure_binary()
|
executable = self._binary_path or ensure_binary()
|
||||||
prefs = self._build_prefs()
|
prefs = self._build_prefs()
|
||||||
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
||||||
|
|||||||
@@ -0,0 +1,263 @@
|
|||||||
|
"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
|
||||||
|
|
||||||
|
Covers: the precedence policy (resolve_session_timezone), proxy→requests
|
||||||
|
translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked
|
||||||
|
mmdb). No real network or mmdb is touched.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright import _geo
|
||||||
|
from invisible_playwright._geo import (
|
||||||
|
GeoTimezoneError,
|
||||||
|
_proxies_for_requests,
|
||||||
|
_proxy_is_set,
|
||||||
|
discover_egress_ip,
|
||||||
|
ip_to_timezone,
|
||||||
|
resolve_session_timezone,
|
||||||
|
)
|
||||||
|
|
||||||
|
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
|
||||||
|
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# _proxy_is_set
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"proxy,expected",
|
||||||
|
[
|
||||||
|
(None, False),
|
||||||
|
({}, False),
|
||||||
|
({"server": ""}, False),
|
||||||
|
({"server": " "}, False),
|
||||||
|
({"server": "direct://"}, False),
|
||||||
|
({"server": "DIRECT://"}, False),
|
||||||
|
({"server": "socks5://h:1"}, True),
|
||||||
|
({"server": "http://h:8080"}, True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_proxy_is_set(proxy, expected):
|
||||||
|
assert _proxy_is_set(proxy) is expected
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# _proxies_for_requests — scheme + credential translation
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_socks5_uses_socks5h_remote_dns():
|
||||||
|
out = _proxies_for_requests(SOCKS)
|
||||||
|
assert out["http"] == "socks5h://u:p@gw.example:1080"
|
||||||
|
assert out["https"] == out["http"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_socks4_scheme():
|
||||||
|
out = _proxies_for_requests({"server": "socks4://gw:1080"})
|
||||||
|
assert out["http"] == "socks4://gw:1080"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_http_and_https_schemes():
|
||||||
|
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
|
||||||
|
out = _proxies_for_requests({"server": "https://gw:8443"})
|
||||||
|
assert out["https"] == "https://gw:8443"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_no_scheme_defaults_to_http():
|
||||||
|
out = _proxies_for_requests({"server": "gw.example:3128"})
|
||||||
|
assert out["http"] == "http://gw.example:3128"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_credentials_are_url_encoded():
|
||||||
|
out = _proxies_for_requests(
|
||||||
|
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
|
||||||
|
)
|
||||||
|
# '@', ':' and '/' in creds must be percent-encoded so they don't break
|
||||||
|
# the proxy URL parsing.
|
||||||
|
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_no_credentials_has_no_auth_prefix():
|
||||||
|
out = _proxies_for_requests({"server": "socks5://gw:1080"})
|
||||||
|
assert out["http"] == "socks5h://gw:1080"
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# discover_egress_ip — mocked requests
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
class _FakeResp:
|
||||||
|
def __init__(self, text, status=200):
|
||||||
|
self.text = text
|
||||||
|
self._status = status
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
if self._status >= 400:
|
||||||
|
raise RuntimeError(f"HTTP {self._status}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
calls.append(url)
|
||||||
|
return _FakeResp("203.0.113.7\n")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
assert discover_egress_ip(SOCKS) == "203.0.113.7"
|
||||||
|
assert len(calls) == 1 # stopped at the first success
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
|
||||||
|
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
|
||||||
|
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
return next(seq)
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
assert discover_egress_ip(HTTP) == "198.51.100.42"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_all_fail_raises(monkeypatch):
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
raise OSError("connection refused")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
discover_egress_ip(SOCKS)
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# ip_to_timezone — mocked mmdb reader
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
class _FakeReader:
|
||||||
|
def __init__(self, record):
|
||||||
|
self._record = record
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *a):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get(self, ip):
|
||||||
|
return self._record
|
||||||
|
|
||||||
|
|
||||||
|
def _install_fake_maxminddb(monkeypatch, record):
|
||||||
|
mod = types.ModuleType("maxminddb")
|
||||||
|
mod.open_database = lambda path: _FakeReader(record)
|
||||||
|
monkeypatch.setitem(sys.modules, "maxminddb", mod)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
|
||||||
|
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, None)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {}})
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# resolve_session_timezone — the precedence policy
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.fixture
|
||||||
|
def stub_egress(monkeypatch):
|
||||||
|
"""Make egress resolution deterministic + offline; record if it ran."""
|
||||||
|
state = {"called": False}
|
||||||
|
|
||||||
|
def fake_discover(proxy, **kw):
|
||||||
|
state["called"] = True
|
||||||
|
return "203.0.113.7"
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
|
||||||
|
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
|
||||||
|
# ensure_geoip_mmdb is imported from .download at call time
|
||||||
|
import invisible_playwright.download as dl
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.parametrize("sentinel", ["host", "local", "HOST", "Local"])
|
||||||
|
def test_resolve_host_sentinel_forces_host_tz(sentinel, stub_egress):
|
||||||
|
# Even with a proxy set, "host"/"local" force the host TZ and never resolve.
|
||||||
|
assert resolve_session_timezone(sentinel, SOCKS) == ""
|
||||||
|
assert stub_egress["called"] is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_explicit_iana_wins_over_proxy(stub_egress):
|
||||||
|
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
|
||||||
|
assert stub_egress["called"] is False # no resolution when explicit
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_empty_no_proxy_is_host(stub_egress):
|
||||||
|
assert resolve_session_timezone("", None) == ""
|
||||||
|
assert stub_egress["called"] is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_auto_no_proxy_is_host(stub_egress):
|
||||||
|
assert resolve_session_timezone("auto", None) == ""
|
||||||
|
assert stub_egress["called"] is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_empty_with_proxy_defaults_to_auto(stub_egress):
|
||||||
|
# NEW default: a proxy with no timezone auto-resolves from the egress.
|
||||||
|
assert resolve_session_timezone("", SOCKS) == "America/New_York"
|
||||||
|
assert stub_egress["called"] is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_auto_with_proxy_resolves(stub_egress):
|
||||||
|
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
|
||||||
|
assert stub_egress["called"] is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_direct_proxy_treated_as_no_proxy(stub_egress):
|
||||||
|
assert resolve_session_timezone("auto", {"server": "direct://"}) == ""
|
||||||
|
assert stub_egress["called"] is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_fail_early_propagates(monkeypatch):
|
||||||
|
# With a proxy set, a discovery failure must raise — never silent host TZ.
|
||||||
|
def boom(proxy, **kw):
|
||||||
|
raise GeoTimezoneError("no egress")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
resolve_session_timezone("auto", SOCKS)
|
||||||
Reference in New Issue
Block a user