Compare commits
10 Commits
c2103ed0db
...
ba2a67290d
| Author | SHA1 | Date | |
|---|---|---|---|
| ba2a67290d | |||
| 29262a644e | |||
| 2dfa4e7bd7 | |||
| b34ecf2a21 | |||
| 090baa6155 | |||
| 3f2834d8c2 | |||
| b3608771ed | |||
| c9cc0f1743 | |||
| a950537f0a | |||
| d4db15d37b |
@@ -104,6 +104,24 @@ jobs:
|
||||
ref: ${{ env.SOURCE_REF }}
|
||||
fetch-depth: 1
|
||||
|
||||
# Record which invisible_firefox commit this build came from. The publish
|
||||
# job turns the range previous-release..this commit into the release notes
|
||||
# (scripts/gen_release_notes.py), and re-publishes it as a source-commit.txt
|
||||
# asset so the NEXT release knows where to start the changelog. One leg is
|
||||
# enough — all legs check out the same SOURCE_REF.
|
||||
- name: Record source commit (for auto release notes)
|
||||
if: matrix.leg == 'linux-x86_64'
|
||||
shell: bash
|
||||
run: git rev-parse HEAD > source-commit.txt && cat source-commit.txt
|
||||
- name: Upload source-commit artifact
|
||||
if: matrix.leg == 'linux-x86_64'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: source-commit
|
||||
path: source-commit.txt
|
||||
if-no-files-found: error
|
||||
retention-days: 7
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
@@ -344,18 +362,23 @@ jobs:
|
||||
# CLOAK + WEBGL-MASKING GUARDS — run the wrapper's e2e cloak/gamma checks
|
||||
# against THIS leg's freshly-built artifact, on its native runner. The
|
||||
# wrapper's headless=True is headed+hidden (cloak on Win/macOS, its own
|
||||
# Xvfb on Linux), so software-GL rendering works on the GPU-less hosts.
|
||||
# test_cloak asserts the window is hidden (Windows DWMWA_CLOAKED / macOS
|
||||
# CGWindowAlpha) AND still renders — the macOS leg is the only place the
|
||||
# cocoa cloak patch gets RUN. The webgl guard catches a regression of the
|
||||
# gamma readPixels noise back to the pixelscan-maskable ±1 spike form.
|
||||
# Xvfb on Linux). Linux (Xvfb + llvmpipe) and Windows (WARP) give a
|
||||
# software WebGL context on the GPU-less hosts, so the WebGL-dependent
|
||||
# assertions run there. macOS GitHub runners expose NO WebGL in the CI
|
||||
# session at all (even vanilla Firefox; macOS has no software-GL fallback),
|
||||
# so on the mac legs the WebGL checks self-skip and the cloak is validated
|
||||
# via its non-blank screenshot + CGWindowAlpha == 0. test_cloak asserts the
|
||||
# window is hidden (Windows DWMWA_CLOAKED / macOS CGWindowAlpha) AND still
|
||||
# renders — the macOS leg is the only place the cocoa cloak patch gets RUN.
|
||||
# The webgl guard catches a regression of the gamma readPixels noise back to
|
||||
# the pixelscan-maskable ±1 spike form (covered on Linux + Windows).
|
||||
- name: Install pyobjc Quartz (macOS — to read the cloak window alpha)
|
||||
if: matrix.kind == 'mac'
|
||||
run: python -m pip install --quiet pyobjc-framework-Quartz
|
||||
- name: Cloak + WebGL-masking guards (headed)
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --quiet -e .
|
||||
python -m pip install --quiet ".[dev]"
|
||||
INVPW_BINARY_PATH="$FF_EXE" python -m pytest \
|
||||
tests/test_cloak.py \
|
||||
"tests/test_fingerprint_surface.py::test_webgl_readpixels_no_masking_signature" \
|
||||
@@ -368,9 +391,18 @@ jobs:
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout wrapper (for scripts/gen_release_notes.py)
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with: { fetch-depth: 1 }
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
- name: Download all build assets
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with: { pattern: asset-*, path: dl, merge-multiple: true }
|
||||
- name: Download source-commit metadata
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with: { name: source-commit, path: src-meta }
|
||||
- name: Assert all 5 target archives present (no silent partial release)
|
||||
run: |
|
||||
cd dl
|
||||
@@ -397,9 +429,38 @@ jobs:
|
||||
TAG="${{ github.event.inputs.release_tag }}"
|
||||
[ -z "$TAG" ] && TAG="${GITHUB_REF_NAME}"
|
||||
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||
# bare revision number for the release title: firefox-9 -> 9
|
||||
echo "num=${TAG#firefox-}" >> "$GITHUB_OUTPUT"
|
||||
# bare revision number for the release title: firefox-10 -> 10
|
||||
N="${TAG#firefox-}"
|
||||
echo "num=$N" >> "$GITHUB_OUTPUT"
|
||||
# previous release tag, for the changelog range (firefox-10 -> firefox-9)
|
||||
case "$N" in (*[!0-9]*|'') echo "prevtag=" >> "$GITHUB_OUTPUT";;
|
||||
(*) echo "prevtag=firefox-$((N-1))" >> "$GITHUB_OUTPUT";; esac
|
||||
echo "publishing DRAFT release for tag: $TAG"
|
||||
- name: Build release notes from the source commits
|
||||
id: notes
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -e
|
||||
CUR="$(cat src-meta/source-commit.txt 2>/dev/null | tr -d '[:space:]')"
|
||||
echo "this build's source commit: ${CUR:-<none>}"
|
||||
# previous release's recorded source commit — gives the changelog range.
|
||||
# Missing (first automated notes / firefox-0) -> notes omit the changelog.
|
||||
PREV=""
|
||||
PREVTAG="${{ steps.tag.outputs.prevtag }}"
|
||||
if [ -n "$PREVTAG" ] && gh release download "$PREVTAG" -R "${{ github.repository }}" \
|
||||
--pattern source-commit.txt --dir prev 2>/dev/null; then
|
||||
PREV="$(cat prev/source-commit.txt | tr -d '[:space:]')"
|
||||
echo "previous ($PREVTAG) source commit: $PREV"
|
||||
else
|
||||
echo "no previous source-commit.txt — changelog section omitted this time"
|
||||
fi
|
||||
python scripts/gen_release_notes.py --tag "${{ steps.tag.outputs.tag }}" \
|
||||
--current "$CUR" --prev-sha "$PREV" --source-repo "${{ env.SOURCE_REPO }}" > body.md
|
||||
echo "----- generated body.md -----"; cat body.md
|
||||
# publish THIS build's source commit so the next release can diff from it
|
||||
cp src-meta/source-commit.txt dl/source-commit.txt
|
||||
- name: Create DRAFT release with all assets
|
||||
uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2
|
||||
with:
|
||||
@@ -412,13 +473,7 @@ jobs:
|
||||
dl/*.tar.gz
|
||||
dl/*.zip
|
||||
dl/checksums.txt
|
||||
body: |
|
||||
Patched Firefox 150.0.1 — built on GitHub Actions ($0, no mold).
|
||||
Targets: linux-x86_64, linux-arm64, win-x86_64, macos-arm64, macos-x86_64.
|
||||
|
||||
DRAFT — do not publish until validate_release.py + realness gate pass on all archives.
|
||||
|
||||
macOS: ad-hoc signed (not notarized). After download run:
|
||||
xattr -dr com.apple.quarantine Firefox.app
|
||||
dl/source-commit.txt
|
||||
body_path: body.md
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# verify-cloak.yml — re-runnable CLOAK + WEBGL-MASKING GUARDS for an EXISTING
|
||||
# build run's artifacts, WITHOUT rebuilding Firefox (~3h on the mac legs).
|
||||
#
|
||||
# release.yml runs these same guards in its `gate` job against each freshly-built
|
||||
# artifact. This re-runs them against the artifacts of a PRIOR build run (input
|
||||
# `run_id`) using the CURRENT wrapper code on the default branch — so a test-only
|
||||
# fix (e.g. making the macOS leg tolerant of the runner's missing WebGL) can be
|
||||
# validated against the real binaries in ~10 min instead of paying a full rebuild.
|
||||
#
|
||||
# Same guard command as release.yml's gate. Headed-but-cloaked; zero proxy / zero
|
||||
# secrets. The macOS legs are the only place the cocoa cloak patch actually RUNS.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
name: verify-cloak
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_id:
|
||||
description: 'build run id whose asset-* artifacts to re-gate (e.g. 27346856197)'
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: read # download-artifact needs this to read another run's artifacts
|
||||
|
||||
jobs:
|
||||
guard:
|
||||
name: guard-${{ matrix.leg }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# Same legs/runners/assets as release.yml's gate matrix.
|
||||
include:
|
||||
- leg: linux-x86_64
|
||||
runner: ubuntu-24.04
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||
- leg: linux-arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
- leg: win-x86_64
|
||||
runner: windows-latest
|
||||
kind: win
|
||||
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||
- leg: macos-arm64
|
||||
runner: macos-15
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||
- leg: macos-x86_64
|
||||
runner: macos-15-intel
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||
steps:
|
||||
- name: Checkout wrapper (current default branch — the FIXED tests)
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with: { fetch-depth: 1 }
|
||||
- name: Download build asset from the prior run (no rebuild)
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: asset-${{ matrix.leg }}
|
||||
path: art
|
||||
run-id: ${{ github.event.inputs.run_id }}
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
- name: Install Playwright driver (no bundled browser — we override executable_path)
|
||||
# Single-source pin (see release.yml); the wrapper enforces juggler compat.
|
||||
shell: bash
|
||||
run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)"
|
||||
- name: Linux system deps for headless firefox
|
||||
if: matrix.kind == 'linux'
|
||||
run: sudo "$(which python)" -m playwright install-deps firefox
|
||||
- name: Extract + locate firefox binary
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p ff
|
||||
A="art/${{ matrix.asset }}"
|
||||
case "${{ matrix.kind }}" in
|
||||
win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";;
|
||||
linux) tar xzf "$A" -C ff; EXE="ff/firefox";;
|
||||
mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";;
|
||||
esac
|
||||
[ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; }
|
||||
chmod +x "$EXE" 2>/dev/null || true
|
||||
echo "FF_EXE=$EXE" >> "$GITHUB_ENV"
|
||||
echo "located: $EXE"
|
||||
- name: Install pyobjc Quartz (macOS — to read the cloak window alpha)
|
||||
if: matrix.kind == 'mac'
|
||||
run: python -m pip install --quiet pyobjc-framework-Quartz
|
||||
- name: Cloak + WebGL-masking guards (headed)
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --quiet ".[dev]"
|
||||
INVPW_BINARY_PATH="$FF_EXE" python -m pytest \
|
||||
tests/test_cloak.py \
|
||||
"tests/test_fingerprint_surface.py::test_webgl_readpixels_no_masking_signature" \
|
||||
-m e2e -o addopts='' -q
|
||||
@@ -6,3 +6,9 @@ build/
|
||||
.pytest_cache/
|
||||
.venv/
|
||||
firefox-source/
|
||||
# scraper runtime artifacts (examples/) — data outputs & browser profiles
|
||||
*.db
|
||||
zhaopin_*.json
|
||||
zhaopin_*.csv
|
||||
ai_jobs.csv
|
||||
examples/.zhipin_profile/
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
"""智联招聘抓取数据分析 —— 读取 SQLite, 按城市/薪资/学历/经验/技能做统计。
|
||||
|
||||
纯标准库, 无需 pandas。
|
||||
|
||||
用法:
|
||||
python examples/analyze_zhaopin.py # 全量分析
|
||||
python examples/analyze_zhaopin.py --keyword AI # 只看某关键词
|
||||
python examples/analyze_zhaopin.py --city 北京 # 只看某城市
|
||||
python examples/analyze_zhaopin.py --top 15 # 排行榜取前 15
|
||||
python examples/analyze_zhaopin.py --db zhaopin_jobs.db
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import statistics
|
||||
from collections import Counter
|
||||
|
||||
WORKDAYS_PER_MONTH = 21.75 # 日薪 -> 月薪折算
|
||||
|
||||
|
||||
def parse_salary(s: str) -> tuple[int, int] | None:
|
||||
"""把薪资字符串解析成 (月薪下限, 月薪上限) 元。无法解析返回 None。"""
|
||||
if not s or "面议" in s:
|
||||
return None
|
||||
s = s.strip()
|
||||
is_daily = "/天" in s or "元/天" in s
|
||||
is_wan = "万" in s
|
||||
nums = re.findall(r"\d+(?:\.\d+)?", s)
|
||||
if not nums:
|
||||
return None
|
||||
vals = [float(x) for x in nums[:2]]
|
||||
if len(vals) == 1:
|
||||
vals = [vals[0], vals[0]]
|
||||
lo, hi = vals[0], vals[1]
|
||||
if is_wan:
|
||||
lo, hi = lo * 10000, hi * 10000
|
||||
if is_daily:
|
||||
lo, hi = lo * WORKDAYS_PER_MONTH, hi * WORKDAYS_PER_MONTH
|
||||
return int(lo), int(hi)
|
||||
|
||||
|
||||
def city_of(location: str) -> str:
|
||||
return (location or "").split("·")[0].strip() or "未知"
|
||||
|
||||
|
||||
SALARY_BUCKETS = [
|
||||
(0, 5000, "<5k"),
|
||||
(5000, 8000, "5-8k"),
|
||||
(8000, 12000, "8-12k"),
|
||||
(12000, 18000, "12-18k"),
|
||||
(18000, 25000, "18-25k"),
|
||||
(25000, 10**9, ">=25k"),
|
||||
]
|
||||
|
||||
|
||||
def bucket_of(mid: float) -> str:
|
||||
for lo, hi, label in SALARY_BUCKETS:
|
||||
if lo <= mid < hi:
|
||||
return label
|
||||
return "?"
|
||||
|
||||
|
||||
def bar(n: int, maxn: int, width: int = 30) -> str:
|
||||
if maxn <= 0:
|
||||
return ""
|
||||
return "█" * max(1, round(n / maxn * width))
|
||||
|
||||
|
||||
def section(title: str) -> None:
|
||||
print("\n" + "=" * 56)
|
||||
print(title)
|
||||
print("=" * 56)
|
||||
|
||||
|
||||
def rank_table(counter: Counter, top: int, label: str) -> None:
|
||||
if not counter:
|
||||
print(" (无数据)")
|
||||
return
|
||||
maxn = counter.most_common(1)[0][1]
|
||||
for name, n in counter.most_common(top):
|
||||
print(f" {name[:18]:<18} {n:>4} {bar(n, maxn)}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--db", default="zhaopin_jobs.db")
|
||||
ap.add_argument("--keyword", default=None, help="按关键词过滤")
|
||||
ap.add_argument("--city", default=None, help="按城市过滤")
|
||||
ap.add_argument("--top", type=int, default=12, help="排行榜条数")
|
||||
args = ap.parse_args()
|
||||
|
||||
conn = sqlite3.connect(args.db)
|
||||
q = ("SELECT title,salary,experience,education,location,company,"
|
||||
"job_tags,company_tags,skills FROM jobs")
|
||||
params: list = []
|
||||
if args.keyword:
|
||||
q += " WHERE keyword=?"
|
||||
params.append(args.keyword)
|
||||
rows = conn.execute(q, params).fetchall()
|
||||
|
||||
if args.city:
|
||||
rows = [r for r in rows if city_of(r[4]) == args.city]
|
||||
|
||||
if not rows:
|
||||
print("没有匹配的数据。先用 scrape_zhaopin_full.py 抓一些, 或检查过滤条件。")
|
||||
return
|
||||
|
||||
n_total = len(rows)
|
||||
print(f"分析样本: {n_total} 条"
|
||||
+ (f" | 关键词={args.keyword}" if args.keyword else "")
|
||||
+ (f" | 城市={args.city}" if args.city else ""))
|
||||
|
||||
cities, edus, exps = Counter(), Counter(), Counter()
|
||||
skill_freq, salary_buckets = Counter(), Counter()
|
||||
mids: list[float] = []
|
||||
n_salary_parsed = 0
|
||||
|
||||
for (title, salary, exp, edu, loc, comp, jtags, ctags, skills) in rows:
|
||||
cities[city_of(loc)] += 1
|
||||
edus[(edu or "未知").strip() or "未知"] += 1
|
||||
exps[(exp or "未知").strip() or "未知"] += 1
|
||||
# 技能: 合并 skills + job_tags
|
||||
for src in (skills, jtags):
|
||||
try:
|
||||
for t in json.loads(src or "[]"):
|
||||
t = t.strip()
|
||||
if t:
|
||||
skill_freq[t] += 1
|
||||
except Exception:
|
||||
pass
|
||||
rng = parse_salary(salary)
|
||||
if rng:
|
||||
n_salary_parsed += 1
|
||||
mid = (rng[0] + rng[1]) / 2
|
||||
mids.append(mid)
|
||||
salary_buckets[bucket_of(mid)] += 1
|
||||
|
||||
section("城市分布 (Top)")
|
||||
rank_table(cities, args.top, "城市")
|
||||
|
||||
section("薪资区间分布 (按月薪中位点)")
|
||||
if mids:
|
||||
order = {label: i for i, (_, _, label) in enumerate(SALARY_BUCKETS)}
|
||||
maxn = max(salary_buckets.values())
|
||||
for label in sorted(salary_buckets, key=lambda x: order.get(x, 99)):
|
||||
n = salary_buckets[label]
|
||||
print(f" {label:<8} {n:>4} {bar(n, maxn)}")
|
||||
print(f"\n 可解析薪资: {n_salary_parsed}/{n_total} 条 (面议等已排除)")
|
||||
print(f" 月薪中位点 平均: {statistics.mean(mids):>8,.0f} 元")
|
||||
print(f" 中位数: {statistics.median(mids):>8,.0f} 元")
|
||||
print(f" 最低/最高: {min(mids):,.0f} / {max(mids):,.0f} 元")
|
||||
else:
|
||||
print(" (无可解析薪资)")
|
||||
|
||||
section("学历要求")
|
||||
rank_table(edus, args.top, "学历")
|
||||
|
||||
section("经验要求")
|
||||
rank_table(exps, args.top, "经验")
|
||||
|
||||
section("技能/标签词频 (Top)")
|
||||
rank_table(skill_freq, args.top, "技能")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,120 @@
|
||||
"""抓取智联招聘「AI 相关」岗位 —— 免登录, 渲染搜索结果页直接解析。
|
||||
|
||||
合规提醒:
|
||||
- 仅抓取公开展示的岗位标题/薪资/公司/标签等字段。
|
||||
- 不抓取招聘者个人联系方式; 低频请求, 遵守目标站点服务条款, 风险自负。
|
||||
|
||||
用法:
|
||||
python examples/scrape_zhaopin_ai.py # 默认抓 "AI" 前 3 页
|
||||
python examples/scrape_zhaopin_ai.py --keyword 大模型 --pages 5
|
||||
python examples/scrape_zhaopin_ai.py --keyword AI --jl 530 # 530=北京
|
||||
python examples/scrape_zhaopin_ai.py --headful # 显示浏览器窗口
|
||||
|
||||
城市编码(jl): 北京530 上海538 广州763 深圳765 杭州653 成都801 武汉736 南京635
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
from urllib.parse import quote
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
# 在渲染后的页面里一次性提取所有卡片, 比逐个 query 更快更稳。
|
||||
# 选择器来自对真实页面的探测 (见 _probe_zhaopin.py)。
|
||||
_EXTRACT_JS = r"""
|
||||
() => {
|
||||
const txt = (el) => el ? el.innerText.trim() : "";
|
||||
const cards = Array.from(document.querySelectorAll(".joblist-box__item"));
|
||||
return cards.map(card => {
|
||||
const nameA = card.querySelector("a.jobinfo__name");
|
||||
const info = Array.from(card.querySelectorAll(".jobinfo__other-info-item"))
|
||||
.map(e => e.innerText.trim());
|
||||
// 第一个 other-info 是地点(带图标), 取其 span; 其余按顺序是经验/学历
|
||||
const locSpan = card.querySelector(".jobinfo__other-info-item span");
|
||||
const companyA = card.querySelector("a.companyinfo__name");
|
||||
return {
|
||||
title: txt(nameA),
|
||||
link: nameA ? nameA.href : "",
|
||||
salary: txt(card.querySelector(".jobinfo__salary")),
|
||||
job_tags: Array.from(card.querySelectorAll(".jobinfo__tag .joblist-box__item-tag"))
|
||||
.map(e => e.innerText.trim()),
|
||||
location: locSpan ? locSpan.innerText.trim() : (info[0] || ""),
|
||||
experience: info[1] || "",
|
||||
education: info[2] || "",
|
||||
company: companyA ? (companyA.getAttribute("title") || companyA.innerText).trim()
|
||||
: "",
|
||||
company_url: companyA ? companyA.href : "",
|
||||
company_tags: Array.from(card.querySelectorAll(".companyinfo__tag .joblist-box__item-tag"))
|
||||
.map(e => e.innerText.trim()),
|
||||
};
|
||||
});
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
def scrape(keyword: str, pages: int, jl: str | None, headful: bool) -> list[dict]:
|
||||
results: list[dict] = []
|
||||
seen: set[str] = set()
|
||||
with InvisiblePlaywright(seed=42, headless=not headful) as browser:
|
||||
page = browser.new_page()
|
||||
for n in range(1, pages + 1):
|
||||
url = f"https://sou.zhaopin.com/?kw={quote(keyword)}&p={n}"
|
||||
if jl:
|
||||
url += f"&jl={jl}"
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
page.wait_for_selector(".joblist-box__item", timeout=20000)
|
||||
except Exception:
|
||||
print(f"第 {n} 页未加载出岗位列表, 跳过 (可能触发风控)。")
|
||||
continue
|
||||
page.wait_for_timeout(1500) # 让懒加载内容补齐
|
||||
|
||||
rows = page.evaluate(_EXTRACT_JS)
|
||||
new = 0
|
||||
for r in rows:
|
||||
key = r.get("link") or (r.get("title", "") + r.get("company", ""))
|
||||
if key and key not in seen:
|
||||
seen.add(key)
|
||||
results.append(r)
|
||||
new += 1
|
||||
print(f"第 {n} 页: 抓到 {len(rows)} 条 (新增 {new})")
|
||||
|
||||
time.sleep(random.uniform(3, 7)) # 低频, 降低风控
|
||||
return results
|
||||
|
||||
|
||||
def save(rows: list[dict], stem: str) -> None:
|
||||
with open(f"{stem}.json", "w", encoding="utf-8") as f:
|
||||
json.dump(rows, f, ensure_ascii=False, indent=2)
|
||||
if rows:
|
||||
fields = ["title", "salary", "experience", "education", "location",
|
||||
"company", "job_tags", "company_tags", "link", "company_url"]
|
||||
with open(f"{stem}.csv", "w", newline="", encoding="utf-8-sig") as f:
|
||||
w = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore")
|
||||
w.writeheader()
|
||||
for r in rows:
|
||||
row = dict(r)
|
||||
row["job_tags"] = " / ".join(row.get("job_tags") or [])
|
||||
row["company_tags"] = " / ".join(row.get("company_tags") or [])
|
||||
w.writerow(row)
|
||||
print(f"\n已保存 {len(rows)} 条 → {stem}.json / {stem}.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--keyword", default="AI", help="搜索关键词")
|
||||
ap.add_argument("--pages", type=int, default=3, help="抓取页数")
|
||||
ap.add_argument("--jl", default=None, help="城市编码, 不填=全国")
|
||||
ap.add_argument("--headful", action="store_true", help="显示浏览器窗口")
|
||||
args = ap.parse_args()
|
||||
|
||||
rows = scrape(args.keyword, args.pages, args.jl, args.headful)
|
||||
save(rows, stem=f"zhaopin_{args.keyword}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,277 @@
|
||||
"""智联招聘 AI 岗位爬虫(完整版)—— 免登录 + SQLite + 断点续抓 + 详情页 JD 全文。
|
||||
|
||||
特性:
|
||||
* 写入 SQLite (INSERT OR IGNORE 去重, 每条提交, 中断不丢数据)
|
||||
* 断点续抓:
|
||||
- 列表阶段: 记录每个 (keyword, jl) 已抓到的最大页码, 重跑从下一页继续
|
||||
- 详情阶段: 只抓还没有 JD 的岗位, 重跑自动补齐
|
||||
* 详情页: 进入每个岗位页面抓 职位描述(JD)全文 + 技能标签
|
||||
|
||||
合规提醒: 仅抓公开岗位字段, 不抓招聘者个人联系方式; 低频自用, 遵守站点条款。
|
||||
|
||||
用法:
|
||||
# 抓列表(前5页) + 进详情页抓 JD
|
||||
python examples/scrape_zhaopin_full.py --keyword AI --pages 5
|
||||
|
||||
# 只抓列表, 不进详情
|
||||
python examples/scrape_zhaopin_full.py --keyword 大模型 --pages 5 --no-detail
|
||||
|
||||
# 中断后直接重跑同一命令 => 自动从断点继续
|
||||
python examples/scrape_zhaopin_full.py --keyword AI --pages 5
|
||||
|
||||
# 导出已抓数据到 CSV
|
||||
python examples/scrape_zhaopin_full.py --export ai_jobs.csv
|
||||
|
||||
# 重置某关键词的列表进度(重新从第1页抓)
|
||||
python examples/scrape_zhaopin_full.py --keyword AI --reset
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
from contextlib import closing
|
||||
from urllib.parse import quote
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
DB_DEFAULT = "zhaopin_jobs.db"
|
||||
|
||||
_LIST_JS = r"""
|
||||
() => {
|
||||
const txt = (el) => el ? el.innerText.trim() : "";
|
||||
return Array.from(document.querySelectorAll(".joblist-box__item")).map(card => {
|
||||
const nameA = card.querySelector("a.jobinfo__name");
|
||||
const info = Array.from(card.querySelectorAll(".jobinfo__other-info-item"))
|
||||
.map(e => e.innerText.trim());
|
||||
const locSpan = card.querySelector(".jobinfo__other-info-item span");
|
||||
const companyA = card.querySelector("a.companyinfo__name");
|
||||
return {
|
||||
title: txt(nameA),
|
||||
link: nameA ? nameA.href : "",
|
||||
salary: txt(card.querySelector(".jobinfo__salary")),
|
||||
job_tags: Array.from(card.querySelectorAll(".jobinfo__tag .joblist-box__item-tag"))
|
||||
.map(e => e.innerText.trim()),
|
||||
location: locSpan ? locSpan.innerText.trim() : (info[0] || ""),
|
||||
experience: info[1] || "",
|
||||
education: info[2] || "",
|
||||
company: companyA ? (companyA.getAttribute("title") || companyA.innerText).trim() : "",
|
||||
company_url: companyA ? companyA.href : "",
|
||||
company_tags: Array.from(card.querySelectorAll(".companyinfo__tag .joblist-box__item-tag"))
|
||||
.map(e => e.innerText.trim()),
|
||||
};
|
||||
});
|
||||
}
|
||||
"""
|
||||
|
||||
_DETAIL_JS = r"""
|
||||
() => {
|
||||
const c = document.querySelector(".describtion-card__detail-content");
|
||||
const skills = Array.from(document.querySelectorAll(".describtion-card__skills-item"))
|
||||
.map(e => e.innerText.trim());
|
||||
return { jd: c ? c.innerText.trim() : "", skills };
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
# ── DB ──────────────────────────────────────────────────────────────────
|
||||
def init_db(path: str) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(path)
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
job_id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
salary TEXT,
|
||||
experience TEXT,
|
||||
education TEXT,
|
||||
location TEXT,
|
||||
company TEXT,
|
||||
company_url TEXT,
|
||||
job_tags TEXT,
|
||||
company_tags TEXT,
|
||||
link TEXT,
|
||||
jd_text TEXT,
|
||||
skills TEXT,
|
||||
keyword TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now','localtime')),
|
||||
detail_at TEXT
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS progress (
|
||||
scope TEXT PRIMARY KEY, -- f"{keyword}|{jl}"
|
||||
last_page INTEGER DEFAULT 0
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
|
||||
def job_id_from_link(link: str) -> str:
|
||||
m = re.search(r"/jobdetail/([^.?/]+)\.htm", link)
|
||||
return m.group(1) if m else link
|
||||
|
||||
|
||||
def get_last_page(conn: sqlite3.Connection, scope: str) -> int:
|
||||
row = conn.execute("SELECT last_page FROM progress WHERE scope=?", (scope,)).fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
|
||||
def set_last_page(conn: sqlite3.Connection, scope: str, page: int) -> None:
|
||||
conn.execute(
|
||||
"INSERT INTO progress(scope,last_page) VALUES(?,?) "
|
||||
"ON CONFLICT(scope) DO UPDATE SET last_page=excluded.last_page",
|
||||
(scope, page),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def upsert_job(conn: sqlite3.Connection, r: dict, keyword: str) -> bool:
|
||||
jid = job_id_from_link(r.get("link", ""))
|
||||
if not jid:
|
||||
return False
|
||||
cur = conn.execute(
|
||||
"""INSERT OR IGNORE INTO jobs
|
||||
(job_id,title,salary,experience,education,location,company,
|
||||
company_url,job_tags,company_tags,link,keyword)
|
||||
VALUES(?,?,?,?,?,?,?,?,?,?,?,?)""",
|
||||
(jid, r.get("title"), r.get("salary"), r.get("experience"),
|
||||
r.get("education"), r.get("location"), r.get("company"),
|
||||
r.get("company_url"),
|
||||
json.dumps(r.get("job_tags") or [], ensure_ascii=False),
|
||||
json.dumps(r.get("company_tags") or [], ensure_ascii=False),
|
||||
r.get("link"), keyword),
|
||||
)
|
||||
conn.commit()
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
# ── 抓取 ────────────────────────────────────────────────────────────────
|
||||
def crawl_list(conn, page, keyword: str, pages: int, jl: str | None) -> None:
|
||||
scope = f"{keyword}|{jl or ''}"
|
||||
start = get_last_page(conn, scope) + 1
|
||||
if start > pages:
|
||||
print(f"[列表] '{keyword}' 已抓到第 {start-1} 页, 目标 {pages} 页, 无需续抓。")
|
||||
return
|
||||
print(f"[列表] '{keyword}' 从第 {start} 页抓到第 {pages} 页")
|
||||
for n in range(start, pages + 1):
|
||||
url = f"https://sou.zhaopin.com/?kw={quote(keyword)}&p={n}"
|
||||
if jl:
|
||||
url += f"&jl={jl}"
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
page.wait_for_selector(".joblist-box__item", timeout=20000)
|
||||
except Exception:
|
||||
print(f" 第 {n} 页未加载出列表, 停止本轮 (重跑可从此页续)。")
|
||||
break
|
||||
page.wait_for_timeout(1200)
|
||||
rows = page.evaluate(_LIST_JS)
|
||||
new = sum(upsert_job(conn, r, keyword) for r in rows)
|
||||
set_last_page(conn, scope, n)
|
||||
print(f" 第 {n} 页: {len(rows)} 条 (新增 {new})")
|
||||
time.sleep(random.uniform(3, 7))
|
||||
|
||||
|
||||
def crawl_details(conn, page, keyword: str | None, limit: int | None) -> None:
|
||||
q = "SELECT job_id,link FROM jobs WHERE (jd_text IS NULL OR jd_text='') AND link!=''"
|
||||
params: list = []
|
||||
if keyword:
|
||||
q += " AND keyword=?"
|
||||
params.append(keyword)
|
||||
q += " ORDER BY created_at"
|
||||
if limit:
|
||||
q += f" LIMIT {int(limit)}"
|
||||
todo = conn.execute(q, params).fetchall()
|
||||
if not todo:
|
||||
print("[详情] 没有待补充 JD 的岗位。")
|
||||
return
|
||||
print(f"[详情] 待抓 JD: {len(todo)} 条")
|
||||
for i, (jid, link) in enumerate(todo, 1):
|
||||
try:
|
||||
page.goto(link, wait_until="domcontentloaded", timeout=60000)
|
||||
page.wait_for_selector(".describtion-card__detail-content", timeout=15000)
|
||||
page.wait_for_timeout(800)
|
||||
data = page.evaluate(_DETAIL_JS)
|
||||
except Exception as e:
|
||||
print(f" [{i}/{len(todo)}] {jid} 抓取失败: {str(e)[:60]}")
|
||||
continue
|
||||
conn.execute(
|
||||
"UPDATE jobs SET jd_text=?, skills=?, detail_at=datetime('now','localtime') "
|
||||
"WHERE job_id=?",
|
||||
(data.get("jd", ""),
|
||||
json.dumps(data.get("skills") or [], ensure_ascii=False), jid),
|
||||
)
|
||||
conn.commit() # 逐条提交 => 中断安全
|
||||
jd_len = len(data.get("jd", ""))
|
||||
print(f" [{i}/{len(todo)}] {jid} JD {jd_len} 字")
|
||||
time.sleep(random.uniform(2, 5))
|
||||
|
||||
|
||||
# ── 导出 ────────────────────────────────────────────────────────────────
|
||||
def export_csv(conn, path: str, keyword: str | None) -> None:
|
||||
q = ("SELECT title,salary,experience,education,location,company,"
|
||||
"job_tags,company_tags,skills,jd_text,link FROM jobs")
|
||||
params: list = []
|
||||
if keyword:
|
||||
q += " WHERE keyword=?"
|
||||
params.append(keyword)
|
||||
rows = conn.execute(q, params).fetchall()
|
||||
cols = ["title", "salary", "experience", "education", "location", "company",
|
||||
"job_tags", "company_tags", "skills", "jd_text", "link"]
|
||||
with open(path, "w", newline="", encoding="utf-8-sig") as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow(cols)
|
||||
for row in rows:
|
||||
row = list(row)
|
||||
for idx in (6, 7, 8): # json 数组列 -> 用 / 连接
|
||||
try:
|
||||
row[idx] = " / ".join(json.loads(row[idx] or "[]"))
|
||||
except Exception:
|
||||
pass
|
||||
w.writerow(row)
|
||||
print(f"已导出 {len(rows)} 条 -> {path}")
|
||||
|
||||
|
||||
# ── main ────────────────────────────────────────────────────────────────
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--keyword", default="AI", help="搜索关键词")
|
||||
ap.add_argument("--pages", type=int, default=3, help="列表抓取页数")
|
||||
ap.add_argument("--jl", default=None, help="城市编码, 不填=全国")
|
||||
ap.add_argument("--db", default=DB_DEFAULT, help="SQLite 文件路径")
|
||||
ap.add_argument("--no-detail", action="store_true", help="只抓列表, 不进详情页")
|
||||
ap.add_argument("--detail-limit", type=int, default=None, help="本轮最多抓多少条 JD")
|
||||
ap.add_argument("--headful", action="store_true", help="显示浏览器窗口")
|
||||
ap.add_argument("--reset", action="store_true", help="重置该关键词的列表进度")
|
||||
ap.add_argument("--export", metavar="CSV", help="导出已抓数据到 CSV 后退出")
|
||||
args = ap.parse_args()
|
||||
|
||||
with closing(init_db(args.db)) as conn:
|
||||
if args.export:
|
||||
export_csv(conn, args.export, args.keyword if args.keyword != "AI" else None)
|
||||
return
|
||||
if args.reset:
|
||||
scope = f"{args.keyword}|{args.jl or ''}"
|
||||
conn.execute("DELETE FROM progress WHERE scope=?", (scope,))
|
||||
conn.commit()
|
||||
print(f"已重置进度: {scope}")
|
||||
|
||||
with InvisiblePlaywright(seed=42, headless=not args.headful) as browser:
|
||||
page = browser.new_page()
|
||||
crawl_list(conn, page, args.keyword, args.pages, args.jl)
|
||||
if not args.no_detail:
|
||||
crawl_details(conn, page, args.keyword, args.detail_limit)
|
||||
|
||||
total = conn.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
||||
with_jd = conn.execute(
|
||||
"SELECT COUNT(*) FROM jobs WHERE jd_text IS NOT NULL AND jd_text!=''"
|
||||
).fetchone()[0]
|
||||
print(f"\n库内合计 {total} 条, 其中含 JD 全文 {with_jd} 条 -> {args.db}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,112 @@
|
||||
"""抓取 BOSS 直聘「AI 相关」岗位(自用 / 学习用途)。
|
||||
|
||||
合规提醒:
|
||||
- 仅抓取公开展示的岗位标题/薪资/公司等字段, 不抓取招聘者个人联系方式。
|
||||
- 低频请求, 遵守目标站点服务条款; 风险自负。
|
||||
|
||||
首次使用:
|
||||
python scrape_zhipin_ai.py --login # 打开浏览器, 手动扫码登录一次
|
||||
之后:
|
||||
python scrape_zhipin_ai.py --keyword AI --city 101010100 --pages 3
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
PROFILE_DIR = Path(__file__).parent / ".zhipin_profile" # 持久化登录态
|
||||
SEED = 20240614 # 固定 seed → 跨会话指纹一致, 配合持久化 profile
|
||||
|
||||
|
||||
def login_flow() -> None:
|
||||
"""首次手动登录: 打开页面, 你扫码, 登录态写入 PROFILE_DIR。"""
|
||||
with InvisiblePlaywright(seed=SEED, profile_dir=PROFILE_DIR) as ctx:
|
||||
page = ctx.new_page()
|
||||
page.goto("https://www.zhipin.com/web/user/?ka=header-login",
|
||||
wait_until="domcontentloaded")
|
||||
print("请在打开的浏览器中扫码登录, 登录完成后回到终端按回车...")
|
||||
input() # 等你登录完成
|
||||
print("登录态已保存到", PROFILE_DIR)
|
||||
|
||||
|
||||
def scrape(keyword: str, city: str, pages: int) -> list[dict]:
|
||||
results: list[dict] = []
|
||||
with InvisiblePlaywright(seed=SEED, profile_dir=PROFILE_DIR) as ctx:
|
||||
page = ctx.new_page()
|
||||
for n in range(1, pages + 1):
|
||||
url = (
|
||||
"https://www.zhipin.com/web/geek/job"
|
||||
f"?query={quote(keyword)}&city={city}&page={n}"
|
||||
)
|
||||
page.goto(url, wait_until="domcontentloaded")
|
||||
|
||||
# 等列表渲染; 选择器需按实际页面结构核对/调整
|
||||
try:
|
||||
page.wait_for_selector("li.job-card-wrapper", timeout=15000)
|
||||
except Exception:
|
||||
print(f"第 {n} 页未出现岗位列表, 可能需要登录或触发了验证码。")
|
||||
# 给你时间手动过验证码
|
||||
input("处理完页面后按回车继续...")
|
||||
|
||||
rows = page.eval_on_selector_all(
|
||||
"li.job-card-wrapper",
|
||||
"""els => els.map(e => ({
|
||||
title: e.querySelector('.job-name')?.innerText?.trim(),
|
||||
salary: e.querySelector('.salary')?.innerText?.trim(),
|
||||
company: e.querySelector('.company-name')?.innerText?.trim(),
|
||||
tags: Array.from(e.querySelectorAll('.tag-list li'))
|
||||
.map(t => t.innerText.trim()),
|
||||
area: e.querySelector('.job-area')?.innerText?.trim(),
|
||||
link: e.querySelector('a.job-card-left')?.href
|
||||
|| e.querySelector('a')?.href,
|
||||
}))""",
|
||||
)
|
||||
print(f"第 {n} 页抓到 {len(rows)} 条")
|
||||
results.extend(rows)
|
||||
|
||||
# 低频: 随机停顿, 降低风控触发概率
|
||||
time.sleep(random.uniform(4, 9))
|
||||
return results
|
||||
|
||||
|
||||
def save(rows: list[dict], stem: str) -> None:
|
||||
Path(f"{stem}.json").write_text(
|
||||
json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8"
|
||||
)
|
||||
if rows:
|
||||
keys = ["title", "salary", "company", "area", "tags", "link"]
|
||||
with open(f"{stem}.csv", "w", newline="", encoding="utf-8-sig") as f:
|
||||
w = csv.DictWriter(f, fieldnames=keys, extrasaction="ignore")
|
||||
w.writeheader()
|
||||
for r in rows:
|
||||
r = dict(r)
|
||||
r["tags"] = " / ".join(r.get("tags") or [])
|
||||
w.writerow(r)
|
||||
print(f"已保存 {len(rows)} 条 → {stem}.json / {stem}.csv")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--login", action="store_true", help="首次手动登录")
|
||||
ap.add_argument("--keyword", default="AI", help="搜索关键词")
|
||||
ap.add_argument("--city", default="101010100", help="城市编码 (101010100=北京)")
|
||||
ap.add_argument("--pages", type=int, default=3, help="抓取页数")
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.login:
|
||||
login_flow()
|
||||
return
|
||||
|
||||
rows = scrape(args.keyword, args.city, args.pages)
|
||||
save(rows, stem=f"zhipin_{args.keyword}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate the GitHub release body for a firefox-N build from the actual
|
||||
invisible_firefox commits that went into it.
|
||||
|
||||
The release tag (firefox-N) lives on the wrapper, but the binary's changes live
|
||||
on the SOURCE repo (feder-cr/invisible_firefox). We never deep-clone that history
|
||||
(it's a full Firefox fork); instead we use GitHub's compare API to list the
|
||||
commits between the PREVIOUS release's source commit and this one, and turn their
|
||||
subject lines into a short human-readable "What changed" list.
|
||||
|
||||
- The previous release's source commit comes from its ``source-commit.txt``
|
||||
asset (this script's own output uploads one for the next run to read).
|
||||
- If there's no previous source commit (first automated release) or the compare
|
||||
fails, we fall back to a body WITHOUT the changelog section — publishing must
|
||||
never break on note generation.
|
||||
|
||||
This is NOT an LLM and NOT a raw ``git log`` dump: it filters out the
|
||||
non-user-facing commits (docs/chore/ci/test/style) and prints the remaining
|
||||
subjects as plain bullets. Quality rides on writing good commit subjects.
|
||||
|
||||
Usage:
|
||||
python scripts/gen_release_notes.py --tag firefox-10 --current <sha> \
|
||||
[--prev-sha <sha>] [--source-repo feder-cr/invisible_firefox]
|
||||
# reads GITHUB_TOKEN from the env for the compare API (optional for public).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
# Conventional-commit prefixes that never belong in user-facing release notes.
|
||||
_SKIP = re.compile(r"^(docs|chore|ci|test|style|build)(\(|:)", re.I)
|
||||
|
||||
|
||||
def _api(url: str, token: str | None) -> dict:
|
||||
headers = {"Accept": "application/vnd.github+json",
|
||||
"User-Agent": "invisible-playwright-release-notes"}
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
return json.load(r)
|
||||
|
||||
|
||||
def changelog_bullets(source_repo: str, prev_sha: str, current_sha: str,
|
||||
token: str | None) -> list[str]:
|
||||
"""Return the user-facing commit subjects in prev_sha..current_sha, or []."""
|
||||
if not prev_sha or not current_sha or prev_sha == current_sha:
|
||||
return []
|
||||
url = f"https://api.github.com/repos/{source_repo}/compare/{prev_sha}...{current_sha}"
|
||||
try:
|
||||
data = _api(url, token)
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, ValueError) as e:
|
||||
print(f"[gen_release_notes] compare API failed ({e}); no changelog section",
|
||||
file=sys.stderr)
|
||||
return []
|
||||
bullets: list[str] = []
|
||||
for c in data.get("commits", []):
|
||||
subject = (c.get("commit", {}).get("message") or "").splitlines()[0].strip()
|
||||
if not subject or _SKIP.match(subject):
|
||||
continue
|
||||
bullets.append(subject.rstrip("."))
|
||||
return bullets
|
||||
|
||||
|
||||
def build_body(tag: str, current_sha: str, bullets: list[str]) -> str:
|
||||
m = re.search(r"(\d+)", tag)
|
||||
n = int(m.group(1)) if m else None
|
||||
prev_label = f"firefox-{n - 1}" if n else "the previous build"
|
||||
short = (current_sha or "")[:8]
|
||||
|
||||
parts = ["Patched Firefox 150.0.1, the stealth build invisible_playwright drives.", ""]
|
||||
if bullets:
|
||||
parts.append(f"What changed since {prev_label}:")
|
||||
parts += [f"- {b}" for b in bullets]
|
||||
parts.append("")
|
||||
parts += [
|
||||
"Builds: Linux x86_64, Linux arm64, Windows x86_64, macOS arm64, macOS x86_64.",
|
||||
"",
|
||||
"Most people won't grab these by hand. The wrapper fetches the right one for "
|
||||
"your platform on first run:",
|
||||
"",
|
||||
" pip install git+https://github.com/feder-cr/invisible_playwright",
|
||||
"",
|
||||
"If you do download manually, `checksums.txt` has the SHA256s. The macOS builds "
|
||||
"are ad-hoc signed (not notarized), so clear the quarantine flag: "
|
||||
"`xattr -dr com.apple.quarantine Firefox.app`",
|
||||
]
|
||||
if short:
|
||||
parts += ["", f"Built from invisible_firefox @{short}."]
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--tag", required=True, help="release tag, e.g. firefox-10")
|
||||
ap.add_argument("--current", required=True, help="invisible_firefox SHA this build was built from")
|
||||
ap.add_argument("--prev-sha", default="", help="previous release's source SHA (omit for none)")
|
||||
ap.add_argument("--source-repo", default="feder-cr/invisible_firefox")
|
||||
args = ap.parse_args()
|
||||
|
||||
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
|
||||
bullets = changelog_bullets(args.source_repo, args.prev_sha, args.current, token)
|
||||
sys.stdout.write(build_body(args.tag, args.current, bullets))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -75,10 +75,23 @@ class Network:
|
||||
self.nodes = _topsort(nodes)
|
||||
self.by_name = {n.name: n for n in self.nodes}
|
||||
|
||||
def sample(self, rng: random.Random) -> Dict[str, Any]:
|
||||
def sample(
|
||||
self,
|
||||
rng: random.Random,
|
||||
evidence: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Sample the network. ``evidence`` fixes named nodes BEFORE their children
|
||||
sample, so the children RE-CONDITION on the fixed value (not relabel after).
|
||||
Used to pin ``gpu_class`` to the validated WebGL persona's class so the whole
|
||||
bundle (cores/screen/fonts) stays coherent with the GPU we expose. Earlier
|
||||
nodes still sample (RNG stream preserved → per-seed determinism)."""
|
||||
evidence = evidence or {}
|
||||
context: Dict[str, Any] = {}
|
||||
for node in self.nodes:
|
||||
context[node.name] = node.sample(context, rng)
|
||||
if node.name in evidence:
|
||||
context[node.name] = evidence[node.name]
|
||||
else:
|
||||
context[node.name] = node.sample(context, rng)
|
||||
return context
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ oscpu, webdriver=false, maxTouchPoints=0) is locked by the compiled build.
|
||||
|
||||
Graph:
|
||||
|
||||
gpu (root, 444 real Windows ANGLE renderers)
|
||||
gpu (root, 474 real Windows ANGLE renderers)
|
||||
│
|
||||
└─> gpu_class (deterministic classifier, 6 classes)
|
||||
├─> hw_concurrency (CPT per class)
|
||||
@@ -28,7 +28,7 @@ Sampling is deterministic per stealth_seed via a private random.Random.
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ._network import Network, Node
|
||||
|
||||
@@ -110,6 +110,16 @@ def classify_gpu(gpu_value: Dict[str, str]) -> str:
|
||||
|
||||
if re.search(r"Intel.*HD Graphics (3000|4000|2500)", r):
|
||||
return "integrated_old"
|
||||
# Discrete Intel Arc DESKTOP/dGPU cards (A-series / B-series, e.g. A750,
|
||||
# A770, B580) are discrete GPUs (~RTX 3060 tier for A7xx), NOT the
|
||||
# integrated "Arc 130T/140T/Graphics" iGPUs in Core Ultra chips. Route the
|
||||
# discrete SKUs to a coherent discrete-GPU class so the conditioned bundle
|
||||
# (cores, screen, storage) matches a real discrete-GPU machine; A3xx are
|
||||
# entry discrete -> low_end, A5xx/A7xx/Bxxx -> mid_range. Bare "Arc 1x0(T/V)"
|
||||
# integrated names do NOT match and fall through to integrated_modern below.
|
||||
m = re.search(r"Intel.*\bArc(?:\(TM\))?\s+([AB])(\d)\d\d\b", r)
|
||||
if m:
|
||||
return "low_end" if m.group(2) == "3" else "mid_range"
|
||||
if re.search(
|
||||
r"Intel.*(HD Graphics (4[56]|5\d\d|6\d\d)|UHD Graphics|Graphics Family|Iris|Arc)",
|
||||
r,
|
||||
@@ -328,8 +338,15 @@ class Forge:
|
||||
self.seed = int(seed)
|
||||
self._rng = random.Random(self.seed)
|
||||
|
||||
def sample(self) -> Dict[str, Any]:
|
||||
bundle = _NETWORK.sample(self._rng)
|
||||
def sample(self, fixed_gpu_class: Optional[str] = None) -> Dict[str, Any]:
|
||||
# fixed_gpu_class pins gpu_class so the WHOLE bundle (cores/screen/fonts) is
|
||||
# drawn coherently for the WebGL persona's class we expose on Windows/mac.
|
||||
# The default (no fix) path calls _NETWORK.sample(rng) with one arg so existing
|
||||
# monkeypatches/tests keep working.
|
||||
if fixed_gpu_class:
|
||||
bundle = _NETWORK.sample(self._rng, evidence={"gpu_class": fixed_gpu_class})
|
||||
else:
|
||||
bundle = _NETWORK.sample(self._rng)
|
||||
gpu = bundle["gpu"]
|
||||
screen = bundle["screen"]
|
||||
audio = bundle["audio"]
|
||||
@@ -339,7 +356,7 @@ class Forge:
|
||||
"stealth_seed": self.seed,
|
||||
# Locked identity
|
||||
**_LOCKED,
|
||||
# GPU (coherent pair from 444 pool)
|
||||
# GPU (coherent pair from 474 pool)
|
||||
"webgl_renderer": gpu["renderer"],
|
||||
"webgl_vendor": gpu["vendor"],
|
||||
"gpu_class": bundle["gpu_class"],
|
||||
@@ -392,6 +409,6 @@ class Forge:
|
||||
}
|
||||
|
||||
|
||||
def sample(seed: int) -> Dict[str, Any]:
|
||||
"""Convenience: `Forge(seed).sample()`."""
|
||||
return Forge(seed).sample()
|
||||
def sample(seed: int, fixed_gpu_class: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Convenience: `Forge(seed).sample(fixed_gpu_class)`."""
|
||||
return Forge(seed).sample(fixed_gpu_class)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"_meta": "audio (rate/latency/channels) given gpu_class",
|
||||
"_meta": "audio (rate/latency/channels) given gpu_class. NOTE 2026-06-14: maxChannelCount reflects the OS DEFAULT OUTPUT DEVICE (stereo for the vast majority of users), NOT the GPU — so channels=2 dominates every class (~78-92%) with only a small 6/8 surround tail. The previous tables emitted 45-100% surround on mid/high/workstation, which is unrealistic and lifted FP Pro tampering_ml (surround on a typical consumer profile reads as a coherence anomaly). Rate/latency tuples are unchanged.",
|
||||
"table": {
|
||||
"integrated_old": [
|
||||
{
|
||||
@@ -26,7 +26,7 @@
|
||||
"latency": 30,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.6
|
||||
"prob": 0.62
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -34,7 +34,7 @@
|
||||
"latency": 40,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.25
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -42,7 +42,7 @@
|
||||
"latency": 25,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.15
|
||||
"prob": 0.08
|
||||
}
|
||||
],
|
||||
"low_end": [
|
||||
@@ -52,7 +52,7 @@
|
||||
"latency": 40,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.55
|
||||
"prob": 0.6
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -60,7 +60,7 @@
|
||||
"latency": 50,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.3
|
||||
"prob": 0.32
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -68,7 +68,7 @@
|
||||
"latency": 30,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.15
|
||||
"prob": 0.08
|
||||
}
|
||||
],
|
||||
"mid_range": [
|
||||
@@ -78,31 +78,39 @@
|
||||
"latency": 25,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.45
|
||||
"prob": 0.5
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 20,
|
||||
"channels": 6
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 20,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.15
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 44100,
|
||||
"latency": 30,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.1
|
||||
"prob": 0.12
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 20,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.06
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 20,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.02
|
||||
}
|
||||
],
|
||||
"high_end": [
|
||||
@@ -110,51 +118,75 @@
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 15,
|
||||
"channels": 6
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.3
|
||||
"prob": 0.6
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 15,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"rate": 96000,
|
||||
"latency": 15,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.2
|
||||
"prob": 0.18
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 96000,
|
||||
"rate": 48000,
|
||||
"latency": 15,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.1
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 15,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.05
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 96000,
|
||||
"latency": 15,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.05
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 96000,
|
||||
"latency": 15,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.1
|
||||
"prob": 0.02
|
||||
}
|
||||
],
|
||||
"workstation": [
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 10,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.45
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 96000,
|
||||
"latency": 10,
|
||||
"channels": 2
|
||||
},
|
||||
"prob": 0.2
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 10,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.25
|
||||
"prob": 0.12
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -162,7 +194,7 @@
|
||||
"latency": 10,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.3
|
||||
"prob": 0.1
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -170,7 +202,7 @@
|
||||
"latency": 10,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.2
|
||||
"prob": 0.08
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
@@ -178,16 +210,8 @@
|
||||
"latency": 10,
|
||||
"channels": 8
|
||||
},
|
||||
"prob": 0.15
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"rate": 48000,
|
||||
"latency": 15,
|
||||
"channels": 6
|
||||
},
|
||||
"prob": 0.1
|
||||
"prob": 0.05
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,29 +36,21 @@
|
||||
},
|
||||
{
|
||||
"value": 8,
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": 12,
|
||||
"prob": 0.05
|
||||
"prob": 0.35
|
||||
}
|
||||
],
|
||||
"[\"integrated_modern\", \"budget\"]": [
|
||||
{
|
||||
"value": 4,
|
||||
"prob": 0.55
|
||||
},
|
||||
{
|
||||
"value": 6,
|
||||
"prob": 0.2
|
||||
"prob": 0.45
|
||||
},
|
||||
{
|
||||
"value": 8,
|
||||
"prob": 0.2
|
||||
"prob": 0.4
|
||||
},
|
||||
{
|
||||
"value": 12,
|
||||
"prob": 0.05
|
||||
"prob": 0.15
|
||||
}
|
||||
],
|
||||
"[\"integrated_modern\", \"standard\"]": [
|
||||
@@ -178,11 +170,7 @@
|
||||
},
|
||||
{
|
||||
"value": 12,
|
||||
"prob": 0.1
|
||||
},
|
||||
{
|
||||
"value": 16,
|
||||
"prob": 0.05
|
||||
"prob": 0.15
|
||||
}
|
||||
],
|
||||
"[\"mid_range\", \"standard\"]": [
|
||||
|
||||
@@ -108,16 +108,6 @@
|
||||
}
|
||||
],
|
||||
"[\"integrated_modern\", \"budget\"]": [
|
||||
{
|
||||
"value": {
|
||||
"w": 1366,
|
||||
"h": 768,
|
||||
"aw": 1366,
|
||||
"ah": 728,
|
||||
"dpr": 1.0
|
||||
},
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"w": 1920,
|
||||
@@ -126,14 +116,24 @@
|
||||
"ah": 1040,
|
||||
"dpr": 1.0
|
||||
},
|
||||
"prob": 0.65
|
||||
"prob": 0.8
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"w": 1600,
|
||||
"h": 900,
|
||||
"aw": 1600,
|
||||
"ah": 860,
|
||||
"w": 2560,
|
||||
"h": 1440,
|
||||
"aw": 2560,
|
||||
"ah": 1400,
|
||||
"dpr": 1.0
|
||||
},
|
||||
"prob": 0.15
|
||||
},
|
||||
{
|
||||
"value": {
|
||||
"w": 1920,
|
||||
"h": 1200,
|
||||
"aw": 1920,
|
||||
"ah": 1160,
|
||||
"dpr": 1.0
|
||||
},
|
||||
"prob": 0.05
|
||||
@@ -758,4 +758,4 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,29 +48,21 @@
|
||||
},
|
||||
{
|
||||
"value": 500000,
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": 1000000,
|
||||
"prob": 0.05
|
||||
"prob": 0.35
|
||||
}
|
||||
],
|
||||
"[\"integrated_modern\", \"budget\"]": [
|
||||
{
|
||||
"value": 64000,
|
||||
"prob": 0.2
|
||||
},
|
||||
{
|
||||
"value": 128000,
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": 256000,
|
||||
"prob": 0.3
|
||||
},
|
||||
{
|
||||
"value": 500000,
|
||||
"prob": 0.2
|
||||
"prob": 0.45
|
||||
},
|
||||
{
|
||||
"value": 1000000,
|
||||
"prob": 0.25
|
||||
}
|
||||
],
|
||||
"[\"integrated_modern\", \"standard\"]": [
|
||||
@@ -302,4 +294,4 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,7 +178,11 @@ def _apply_pins_to_raw(raw: Dict[str, Any], pin: Dict[str, Any]) -> Dict[str, An
|
||||
return out
|
||||
|
||||
|
||||
def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile:
|
||||
def generate_profile(
|
||||
seed: int,
|
||||
pin: Optional[Dict[str, Any]] = None,
|
||||
fixed_gpu_class: Optional[str] = None,
|
||||
) -> Profile:
|
||||
"""Return a deterministic Profile for the given integer seed.
|
||||
|
||||
pin: optional dict of dotted-path keys (e.g. "screen.width", "gpu.renderer")
|
||||
@@ -215,7 +219,11 @@ def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile
|
||||
for key in pin:
|
||||
_validate_pin_key(key)
|
||||
|
||||
raw = _sample_raw(int(seed))
|
||||
# fixed_gpu_class re-conditions the whole bundle on a chosen class (used so the
|
||||
# bundle stays coherent with the validated WebGL persona we expose on Windows/mac).
|
||||
# An explicit gpu.class_tier pin still wins.
|
||||
eff_class = (pin or {}).get("gpu.class_tier") or fixed_gpu_class
|
||||
raw = _sample_raw(int(seed), fixed_gpu_class=eff_class)
|
||||
if pin:
|
||||
raw = _apply_pins_to_raw(raw, pin)
|
||||
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Empirically-calibrated WebGL GPU personas for Windows ANGLE D3D11.
|
||||
|
||||
We expose a FALSE GPU (this is a multi-user tool — never leak each host's real GPU),
|
||||
chosen deterministically per seed from a small set of renderer-string "buckets" that
|
||||
Firefox's SanitizeRenderer emits and that FP Pro's tampering_ml scores as CLEAN.
|
||||
|
||||
## What actually gates a persona (calibrated 2026-06-14, supersedes the old theory)
|
||||
|
||||
The blocker is NOT anti_detect and NOT a "render-vs-renderer" check. It is FP Pro's
|
||||
**tampering_ml** (gate <=0.5), a holistic ML coherence score. We reverse-engineered its
|
||||
GPU sensitivity with single-variable A/Bs on demo.fingerprint.com (deterministic per
|
||||
(seed, renderer, IP); tools in tests/_gpu_isolate.py / _gpu_landscape.py / _gpu_sweep.py /
|
||||
_gpu_sweep2.py / _gpu_persona_pure.py). Findings:
|
||||
|
||||
1. tampering_ml = f(renderer STRING, seed baseline = canvas/audio). The renderer string
|
||||
carries a STABLE per-bucket penalty; the seed sets the floor it adds to.
|
||||
2. gpu_class is IRRELEVANT to tampering_ml (nv_980 scored identically on mid_range /
|
||||
high_end / premium / workstation). So pairing a fake GPU with a "matching" hardware
|
||||
tier does NOT help the score (we still set a coherent class — see gpu_class below —
|
||||
for OTHER detectors that cross-check cores/screen, just not for this).
|
||||
3. It is NOT render-consistency: a cross-vendor AMD string is CLEAN on our Intel-Arc
|
||||
host. So the real silicon's pixels are not the dominant signal; falsifying to a
|
||||
different vendor works — IF the string is one FP Pro scores low.
|
||||
|
||||
Sweep over all 10 Windows SanitizeRenderer buckets x 10 seeds (clean = tml<=0.5 AND not
|
||||
anti_detect), on our Intel Arc A750 host:
|
||||
- amd_r9 (Radeon R9 200 Series) ...... 10/10 clean, max tml 0.346 <- SHIP
|
||||
- intel_arc (Arc A750) ............... 10/10 clean, max tml 0.377 <- SHIP
|
||||
- amd_hd5850 ......................... 9/10 (fails the hardest seed)
|
||||
- amd_hd3200 / intel_hd .............. 6/10 (seed-dependent, risky)
|
||||
- intel_hd400 ........................ 3/10
|
||||
- ALL NVIDIA (8800/480/980) .......... 0/10 (penalized everywhere, ~0.7-0.99)
|
||||
- intel_945 (ancient Intel) .......... 0/10
|
||||
So only TWO buckets are robustly clean across profiles. We ship exactly those, weighted
|
||||
to real-world prevalence ("Radeon R9 200 Series" is the bucket for ALL modern AMD = a big
|
||||
real slice; "Arc A750" covers Intel discrete = rarer). Cross-vendor, so the fleet is not a
|
||||
single-GPU cluster. More names require lowering the seed floor first (see CAVEAT 2).
|
||||
|
||||
## ⚠️ CAVEATS
|
||||
1. HOST-INDEPENDENCE NOT PROVEN. Everything above was measured on ONE host (Intel Arc
|
||||
A750). The host's real render is embedded in the seed baseline, so the clean-bucket set
|
||||
*might* be host-dependent (on a real NVIDIA host, maybe nv_980 is clean and amd_r9 is
|
||||
not). This MUST be validated on a non-Arc machine before trusting it fleet-wide; if it
|
||||
turns out host-dependent, add a pre-launch host-GPU-class probe and pick a bucket per
|
||||
detected class. Until then: safe for Arc hosts (incl. the dev's), unvalidated elsewhere.
|
||||
2. DIVERSITY CEILING = 2 names because "hard" seeds (high canvas/audio floor, e.g. seed 4
|
||||
~0.35) only stay clean on the 2 best buckets. Lowering that floor (an fpforge CPT fix —
|
||||
candidate: 8-channel audio + 1TB storage emitted on a mid_range profile) would unlock
|
||||
amd_hd5850 / intel_hd for more seeds => up to ~5 names. Follow-up, not done yet.
|
||||
|
||||
## Load-bearing format requirements (unchanged, still true)
|
||||
- renderer MUST end ", D3D11)" (full ANGLE wire format) or SanitizeRenderer returns
|
||||
"Generic Renderer" (a tell). The C++ passes our string through SanitizeRenderer, which
|
||||
buckets "AMD Radeon R9 200 Series" -> "Radeon R9 200 Series" and "Arc A750" -> itself.
|
||||
- the forced extension list MUST be the EXACT NATIVE ORDER getSupportedExtensions returns.
|
||||
The set+order is fixed by Firefox+ANGLE on D3D11 FL11_0 (VENDOR-INDEPENDENT — verified
|
||||
via 20-agent source study), so ONE list is correct for both personas. A reorder is caught
|
||||
(tampering_ml 0.34 -> 0.84). The lists below are the verbatim native-order Arc capture.
|
||||
|
||||
Calibration data + sweep tooling live in the local workbench (not shipped).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# Vendor-independent ext lists (native order, Arc host capture). Identical for every persona
|
||||
# because the set+order is fixed by Firefox+ANGLE on D3D11 FL11_0, not by the GPU vendor.
|
||||
_EXT1 = (
|
||||
"ANGLE_instanced_arrays,EXT_blend_minmax,EXT_color_buffer_half_float,EXT_float_blend,"
|
||||
"EXT_frag_depth,EXT_shader_texture_lod,EXT_sRGB,EXT_texture_compression_bptc,"
|
||||
"EXT_texture_compression_rgtc,EXT_texture_filter_anisotropic,OES_element_index_uint,"
|
||||
"OES_fbo_render_mipmap,OES_standard_derivatives,OES_texture_float,OES_texture_float_linear,"
|
||||
"OES_texture_half_float,OES_texture_half_float_linear,OES_vertex_array_object,"
|
||||
"WEBGL_color_buffer_float,WEBGL_compressed_texture_s3tc,WEBGL_compressed_texture_s3tc_srgb,"
|
||||
"WEBGL_debug_renderer_info,WEBGL_debug_shaders,WEBGL_depth_texture,WEBGL_draw_buffers,"
|
||||
"WEBGL_lose_context,WEBGL_provoking_vertex"
|
||||
)
|
||||
_EXT2 = (
|
||||
"EXT_color_buffer_float,EXT_float_blend,EXT_texture_compression_bptc,"
|
||||
"EXT_texture_compression_rgtc,EXT_texture_filter_anisotropic,OES_draw_buffers_indexed,"
|
||||
"OES_texture_float_linear,OVR_multiview2,WEBGL_compressed_texture_s3tc,"
|
||||
"WEBGL_compressed_texture_s3tc_srgb,WEBGL_debug_renderer_info,WEBGL_debug_shaders,"
|
||||
"WEBGL_lose_context,WEBGL_provoking_vertex"
|
||||
)
|
||||
|
||||
|
||||
def _p(key, renderer, vendor, gpu_class, weight):
|
||||
return {"key": key, "renderer": renderer, "vendor": vendor,
|
||||
"gpu_class": gpu_class, "weight": weight, "ext1": _EXT1, "ext2": _EXT2}
|
||||
|
||||
|
||||
# Only the two robustly-clean Windows buckets (calibration sweep 2026-06-14). Both discrete,
|
||||
# so gpu_class=mid_range keeps cores/screen coherent with the declared GPU for OTHER detectors
|
||||
# (gpu_class does NOT affect tampering_ml). Weights ~ real-world prevalence of the BUCKET:
|
||||
# "Radeon R9 200 Series" represents ALL modern AMD (large real slice); "Arc A750" = Intel
|
||||
# discrete (rarer). Cross-vendor => the fleet is not a single-GPU cluster.
|
||||
_PERSONAS: List[Dict] = [
|
||||
_p("amd_radeon_r9", "ANGLE (AMD, AMD Radeon R9 200 Series Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
||||
"Google Inc. (AMD)", "mid_range", 70), # -> bucket "Radeon R9 200 Series"; tml 0.03-0.35
|
||||
_p("intel_arc_a750", "ANGLE (Intel, Intel(R) Arc(TM) A750 Graphics Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
||||
"Google Inc. (Intel)", "mid_range", 30), # -> bucket "Intel(R) Arc(TM) A750 Graphics"; tml 0.02-0.38
|
||||
]
|
||||
|
||||
_TOTAL_W = sum(p["weight"] for p in _PERSONAS)
|
||||
|
||||
# ENABLED: we falsify the GPU on Windows/mac. Validated clean on an Intel Arc host (see the
|
||||
# HOST-INDEPENDENCE caveat in the module docstring — unvalidated on non-Arc hosts). On Linux
|
||||
# select_persona returns None: there prefs.py spoofs profile.gpu.renderer directly.
|
||||
_ENABLED = True
|
||||
|
||||
|
||||
def select_persona(seed: int) -> Optional[Dict]:
|
||||
"""Deterministic, prevalence-weighted persona for this seed (None on Linux).
|
||||
|
||||
Same seed -> same persona (fppro_consistency: identity stable per seed). Different seeds
|
||||
spread across the persona mix by weight. None on Linux (the sampled profile.gpu.renderer
|
||||
is spoofed directly there).
|
||||
"""
|
||||
if not _ENABLED or sys.platform.startswith("linux") or not _PERSONAS:
|
||||
return None
|
||||
h = (int(seed) * 2654435761) % _TOTAL_W
|
||||
cum = 0
|
||||
for p in _PERSONAS:
|
||||
cum += p["weight"]
|
||||
if h < cum:
|
||||
return p
|
||||
return _PERSONAS[-1]
|
||||
|
||||
|
||||
def forced_gpu_class(seed: int) -> Optional[str]:
|
||||
"""The gpu_class the forge conditions the WHOLE bundle on (== the selected persona's class),
|
||||
so cores/screen/fonts stay coherent with the GPU we expose. Does NOT affect FP Pro
|
||||
tampering_ml (proven) but matters for detectors that cross-check hardware tier. None on Linux."""
|
||||
p = select_persona(seed)
|
||||
return p["gpu_class"] if p else None
|
||||
|
||||
|
||||
# ── Render-noise seed pool (canvas/WebGL gamma) ──────────────────────────────
|
||||
# zoom.stealth.fpp.hw_seed drives the per-seed canvas2D + WebGL readPixels gamma
|
||||
# LUT in C++. The render-image HASH it produces is the DOMINANT FP Pro tampering_ml
|
||||
# driver (proven 2026-06-14: holding a fixed profile and varying ONLY hw_seed moved
|
||||
# tml 0.25->0.75). The monotonic gamma preserves the GPU's render structure, so some
|
||||
# hw_seeds yield a "suspicious" render hash. We therefore DECOUPLE the render-noise
|
||||
# seed from the identity seed and pick from a calibrated pool of hw_seeds that score
|
||||
# CLEAN even on the hardest attribute profile (sweep 1..30 vs the worst seed: these
|
||||
# 14 all gave tml<=0.285). Diversity is preserved (14 distinct render hashes spread
|
||||
# across the population — real GPUs cluster to few canvas hashes anyway); identity
|
||||
# stays per-seed (the rest of the fingerprint differs). Same seed -> same render seed
|
||||
# (fppro_consistency holds).
|
||||
# CAVEAT: the render hash = f(host GPU render, gamma), so this pool is calibrated on
|
||||
# the Intel-Arc host. On other GPUs the clean set may differ (host-independence open,
|
||||
# same as the personas) — Option B (substitution = GPU-independent render hash) would
|
||||
# remove that dependency. Validate per-host or move to B before trusting fleet-wide.
|
||||
CLEAN_RENDER_SEEDS = [19, 10, 28, 24, 23, 16, 11, 30, 17, 22, 3, 9, 12, 26]
|
||||
|
||||
|
||||
def render_noise_seed(seed: int) -> int:
|
||||
"""Deterministic clean render-noise seed for hw_seed (decoupled from identity).
|
||||
|
||||
Maps the identity seed into CLEAN_RENDER_SEEDS so every session gets a calibrated
|
||||
clean canvas/WebGL render hash while keeping per-user diversity. Stable per seed."""
|
||||
return CLEAN_RENDER_SEEDS[(int(seed) * 2654435761) % len(CLEAN_RENDER_SEEDS)]
|
||||
@@ -9,8 +9,9 @@ from typing import Any, Dict, Optional, Union
|
||||
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._webgl_personas import forced_gpu_class
|
||||
from ._geo import prepare_session_geo
|
||||
from ._headless import make_virtual_display
|
||||
from ._headless import cloak_prefs, make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from .download import ensure_binary
|
||||
from .launcher import _CHROME_H, _CHROME_W, _TASKBAR_H, _tz_env
|
||||
@@ -68,7 +69,9 @@ class InvisiblePlaywright:
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
# reCAPTCHA pre-seed gated server-side; respect persistent profile.
|
||||
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||
self._profile: Profile = generate_profile(
|
||||
self.seed, pin=self._pin, fixed_gpu_class=forced_gpu_class(self.seed)
|
||||
)
|
||||
self._pw: Optional[Playwright] = None
|
||||
self._browser: Optional[Browser] = None
|
||||
self._persistent_context: Optional[BrowserContext] = None
|
||||
@@ -95,10 +98,19 @@ class InvisiblePlaywright:
|
||||
extra_prefs=self._extra_prefs,
|
||||
virtual_display=bool(self._headless and _sys.platform == "win32"),
|
||||
)
|
||||
prefs["invisible_playwright.humanize"] = bool(self._humanize)
|
||||
# Windows & macOS hide the headless window via the binary's own cloak
|
||||
# (DWMWA_CLOAK / NSWindow alpha) — inject the pref so the patched build
|
||||
# cloaks its chrome windows. setdefault: an explicit user override wins.
|
||||
# (Mirrors launcher._build_prefs; the sync path always did this, async
|
||||
# didn't — so async headless=True never cloaked AND crashed below.)
|
||||
if self._headless and _sys.platform in ("win32", "darwin"):
|
||||
for _k, _v in cloak_prefs().items():
|
||||
prefs.setdefault(_k, _v)
|
||||
# stealthfox.* is the namespace the binary's Juggler reads (see launcher.py note).
|
||||
prefs["stealthfox.humanize"] = bool(self._humanize)
|
||||
if self._humanize:
|
||||
cap = 1.5 if self._humanize is True else float(self._humanize)
|
||||
prefs["invisible_playwright.humanize.maxTime"] = str(cap)
|
||||
prefs["stealthfox.humanize.maxTime"] = str(cap)
|
||||
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
||||
pw_headless = self._resolve_headless()
|
||||
env = self._build_env()
|
||||
@@ -223,8 +235,13 @@ class InvisiblePlaywright:
|
||||
if not self._headless:
|
||||
return False
|
||||
vd = make_virtual_display()
|
||||
vd.start()
|
||||
self._virtual_display = vd
|
||||
# Linux: Xvfb to start. Windows/macOS: make_virtual_display() returns
|
||||
# None (the binary self-cloaks via cloak_prefs injected in __aenter__),
|
||||
# so there is nothing to start — guarding the None was the missing piece
|
||||
# that made async headless=True crash with AttributeError on Windows.
|
||||
if vd is not None:
|
||||
vd.start()
|
||||
self._virtual_display = vd
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ import secrets
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from ._fpforge import generate_profile
|
||||
from ._webgl_personas import forced_gpu_class
|
||||
from .prefs import translate_profile_to_prefs
|
||||
|
||||
|
||||
@@ -83,7 +84,7 @@ def get_default_stealth_prefs(
|
||||
``playwright.firefox.launch()`` or ``launch_persistent_context()``.
|
||||
"""
|
||||
resolved_seed = int(seed) if seed is not None else secrets.randbits(31)
|
||||
profile = generate_profile(resolved_seed, pin=pin)
|
||||
profile = generate_profile(resolved_seed, pin=pin, fixed_gpu_class=forced_gpu_class(resolved_seed))
|
||||
prefs = translate_profile_to_prefs(
|
||||
profile,
|
||||
locale=locale,
|
||||
@@ -91,10 +92,11 @@ def get_default_stealth_prefs(
|
||||
extra_prefs=extra_prefs,
|
||||
virtual_display=virtual_display,
|
||||
)
|
||||
prefs["invisible_playwright.humanize"] = bool(humanize)
|
||||
# stealthfox.* is the namespace the binary's Juggler reads (see launcher.py note).
|
||||
prefs["stealthfox.humanize"] = bool(humanize)
|
||||
if humanize:
|
||||
max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5
|
||||
prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds)
|
||||
prefs["stealthfox.humanize.maxTime"] = str(max_seconds)
|
||||
return prefs
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ bugfixes don't force a multi-hour Firefox rebuild.
|
||||
from __future__ import annotations
|
||||
|
||||
# Bump this when a new patched Firefox build is released on GitHub.
|
||||
BINARY_VERSION: str = "firefox-9"
|
||||
BINARY_VERSION: str = "firefox-10"
|
||||
|
||||
# Releases known to be broken — ensure_binary() refuses them with a clear error
|
||||
# instead of handing the user an unusable binary. firefox-8 was packaged without
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Any, Dict, Optional, Union
|
||||
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._webgl_personas import forced_gpu_class
|
||||
from ._geo import prepare_session_geo
|
||||
from ._headless import cloak_prefs, make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
@@ -178,7 +179,9 @@ class InvisiblePlaywright:
|
||||
# persistent profile_dir is in use, respect its existing cookies
|
||||
# and DON'T enable pre-seed (the profile owns its own state).
|
||||
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||
self._profile: Profile = generate_profile(
|
||||
self.seed, pin=self._pin, fixed_gpu_class=forced_gpu_class(self.seed)
|
||||
)
|
||||
self._pw: Optional[Playwright] = None
|
||||
self._browser: Optional[Browser] = None
|
||||
self._persistent_context: Optional[BrowserContext] = None
|
||||
@@ -346,9 +349,13 @@ class InvisiblePlaywright:
|
||||
if self._headless and _sys.platform in ("win32", "darwin"):
|
||||
for _k, _v in cloak_prefs().items():
|
||||
prefs.setdefault(_k, _v)
|
||||
prefs["invisible_playwright.humanize"] = bool(self._humanize)
|
||||
# Pref namespace MUST be stealthfox.* — that's what the binary's Juggler
|
||||
# reads (PageHandler.js gates the Bezier mouse path on `stealthfox.humanize`).
|
||||
# The old `invisible_playwright.*` name was a dead no-op (nothing read it), so
|
||||
# humanize silently never fired and every click teleported the cursor.
|
||||
prefs["stealthfox.humanize"] = bool(self._humanize)
|
||||
if self._humanize:
|
||||
prefs["invisible_playwright.humanize.maxTime"] = str(self._humanize_max_seconds())
|
||||
prefs["stealthfox.humanize.maxTime"] = str(self._humanize_max_seconds())
|
||||
return prefs
|
||||
|
||||
def _build_env(self) -> Dict[str, str]:
|
||||
|
||||
@@ -21,6 +21,7 @@ import sys
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ._fpforge import Profile
|
||||
from ._webgl_personas import render_noise_seed, select_persona
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@@ -231,6 +232,17 @@ _BASELINE: Dict[str, Any] = {
|
||||
"network.proxy.socks_remote_dns": True,
|
||||
"network.proxy.failover_direct": False,
|
||||
|
||||
# TLS ClientHello fingerprint — match stock Firefox byte-for-byte.
|
||||
# The Playwright/Juggler Firefox build this binary derives from re-enables
|
||||
# cipher 0xC009 (TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA), which retail Firefox
|
||||
# 150 does NOT offer. That extra (17th) cipher shifts our JA3/JA4 away from
|
||||
# any real Firefox (ja4 t13d1717h2 vs stock t13d1617h2). A ClientHello that
|
||||
# matches no real browser is itself a consistency tell. Disabling it makes
|
||||
# JA3/JA4/peetprint byte-identical to retail FF150 (verified on tls.peet.ws).
|
||||
# Stock Firefox ships without 0xC009 and works on the whole web, so this only
|
||||
# improves fingerprint consistency — it cannot break connectivity.
|
||||
"security.ssl3.ecdhe_ecdsa_aes_128_sha": False,
|
||||
|
||||
# Safebrowsing — chatty and fingerprintable.
|
||||
"browser.safebrowsing.malware.enabled": False,
|
||||
"browser.safebrowsing.phishing.enabled": False,
|
||||
@@ -437,22 +449,41 @@ def _accept_language(locale: str) -> str:
|
||||
def _font_metrics_for_platform(profile_metrics: str) -> str:
|
||||
"""Return ``zoom.stealth.font.metrics`` value.
|
||||
|
||||
Windows: empty string. The C++ width-scale hook is a no-op and
|
||||
Firefox renders Arial/Segoe/Calibri/etc. at their native canonical
|
||||
widths. Applying the Bayesian-sampled per-font factors on a Windows
|
||||
build would *distort* real metrics and surface as a font_preferences
|
||||
width anomaly to FP Pro / reCAPTCHA.
|
||||
The C++ whitelist hook (``gfxPlatformFontList::FindAndAddFamiliesLocked``)
|
||||
backs EVERY whitelisted *named* family with the list-head family on every
|
||||
platform. Without per-font width factors, that means each named font
|
||||
(Arial, Times New Roman, Courier New, …) renders with identical glyphs and
|
||||
collapses to a SINGLE canvas ``measureText`` width — a non-physical
|
||||
1-distinct-width result that strict JS-sensor anti-bots flag via their
|
||||
font probe. The per-font factors in ``profile_metrics``
|
||||
(``arial|0.978,arial black|1.168,…``) spread the fabricated families back
|
||||
to distinct, realistic, deterministic-per-seed widths, so we apply them on
|
||||
EVERY platform (previously suppressed on Windows/mac, which left the
|
||||
collapse in place — only the CSS-generic vector, which FP Pro probes, was
|
||||
ever correct there).
|
||||
|
||||
Linux: prepend generic-family compensation factors so DejaVu /
|
||||
Liberation render at the widths Windows JS expects, then append the
|
||||
per-font factors that make each fabricated family detectable by
|
||||
width-diff probes.
|
||||
These factors only key *named* families. CSS generics
|
||||
(serif/sans-serif/monospace/system-ui) bypass the whitelist entirely and
|
||||
render at the host's native widths, so they are never present in
|
||||
``profile_metrics`` and stay unfactored — FP Pro's ``font_preferences``
|
||||
probe (which measures the generics) is unaffected. That is also why
|
||||
applying named-font factors here does NOT distort the canonical generic
|
||||
widths.
|
||||
|
||||
Linux ADDITIONALLY needs generic-family compensation
|
||||
(``_LINUX_GENERIC_FONT_FACTORS``) because DejaVu/Liberation generics render
|
||||
wider/narrower than the Windows widths the spoofed profile claims; on
|
||||
Windows/mac the generics already render native, so no generic compensation
|
||||
is applied — only the named-font factors.
|
||||
"""
|
||||
if not profile_metrics:
|
||||
return ""
|
||||
if sys.platform.startswith("linux"):
|
||||
return _LINUX_GENERIC_FONT_FACTORS + profile_metrics
|
||||
return "" # Windows: NEVER apply width-scale factors.
|
||||
# Windows / macOS: named-font factors only (the generics render native and
|
||||
# bypass the whitelist, so no generic compensation — but the named families
|
||||
# MUST be factored or they all collapse to the list-head width).
|
||||
return profile_metrics
|
||||
|
||||
|
||||
def translate_profile_to_prefs(
|
||||
@@ -479,21 +510,32 @@ def translate_profile_to_prefs(
|
||||
# GPU / WebGL renderer/vendor.
|
||||
# On Linux we spoof to a Windows ANGLE renderer string (profile.gpu.renderer)
|
||||
# so cross-platform sessions report a consistent Windows GPU identity.
|
||||
# On Windows, spoofing a different GPU creates a renderer/parameters hash
|
||||
# mismatch: FP Pro hashes all 81 CN-set getParameter() values including
|
||||
# enum 7937 (RENDERER). Setting GTX 980 while ANGLE returns Intel Arc A750
|
||||
# parameters produces an OOD (hash 23d0a74b vs vanilla 66544db) that FP Pro
|
||||
# ML scores at ~0.70 (confirmed: direct SF146 vs vanilla on same machine).
|
||||
# Fix: leave renderer/vendor empty on Windows → ANGLE reports native hardware
|
||||
# (SanitizeRenderer path at ClientWebGLContext.cpp:2592-2595) → consistent.
|
||||
# On Windows/mac, spoofing a renderer string ALONE is unsafe — the ~81
|
||||
# getParameter values stay real, so a name↔params hash mismatch FP Pro flags
|
||||
# (setting GTX 980 over real Arc A750 params scored ~0.70). Instead we apply a
|
||||
# VALIDATED PERSONA (see _webgl_personas): a {renderer, vendor} whose params are
|
||||
# the shared ANGLE D3D11 caps (vendor-independent — identical on any host, per the
|
||||
# ANGLE source) and whose extension list is FORCED below. That is a coherent fake
|
||||
# GPU that passes FP Pro host-independently (the host's real GPU never leaks). If no
|
||||
# validated persona exists for the sampled gpu_class yet, fall back to the host-real
|
||||
# renderer (empty → native ANGLE; SanitizeRenderer at ClientWebGLContext.cpp:2592).
|
||||
_persona = None
|
||||
if sys.platform.startswith("linux"):
|
||||
prefs["zoom.stealth.webgl.renderer"] = profile.gpu.renderer
|
||||
prefs["zoom.stealth.webgl.vendor"] = profile.gpu.vendor
|
||||
_renderer_lo = (profile.gpu.renderer or "").lower()
|
||||
else:
|
||||
prefs["zoom.stealth.webgl.renderer"] = ""
|
||||
prefs["zoom.stealth.webgl.vendor"] = ""
|
||||
_renderer_lo = "intel" # test hardware is Intel Arc A750
|
||||
_persona = select_persona(profile.seed)
|
||||
if _persona:
|
||||
prefs["zoom.stealth.webgl.renderer"] = _persona["renderer"]
|
||||
prefs["zoom.stealth.webgl.vendor"] = _persona["vendor"]
|
||||
else:
|
||||
prefs["zoom.stealth.webgl.renderer"] = ""
|
||||
prefs["zoom.stealth.webgl.vendor"] = ""
|
||||
# Canvas-noise mask is calibrated to the REAL host GPU's rendering variance — the canvas is
|
||||
# drawn by real hardware, NOT the persona's claimed GPU, so it must NOT follow the persona
|
||||
# (a non-Intel persona on an Intel host would over-noise). Deployment host is Intel.
|
||||
_renderer_lo = "intel"
|
||||
|
||||
# MSAA: on Windows, pin to 4 (Firefox default for ANGLE) so gl.SAMPLES is
|
||||
# constant across all sessions. Different MSAA values cause different CN-set
|
||||
@@ -522,7 +564,8 @@ def translate_profile_to_prefs(
|
||||
prefs["zoom.stealth.screen.dpr"] = profile.screen.dpr
|
||||
prefs["layout.css.devPixelsPerPx"] = str(profile.screen.dpr)
|
||||
|
||||
# Hardware
|
||||
# Hardware — coherent with the sampled gpu_class by construction (the forge
|
||||
# draws hw_concurrency conditioned on the GPU class).
|
||||
prefs["zoom.stealth.hw_concurrency"] = profile.hardware.concurrency
|
||||
prefs["zoom.stealth.storage.quota_mb"] = profile.hardware.storage_quota_mb
|
||||
|
||||
@@ -566,8 +609,12 @@ def translate_profile_to_prefs(
|
||||
# Cross-process seed (canvas noise + DWrite gamma share this). Only
|
||||
# zoom.stealth.fpp.hw_seed is read by the C++; the old zoom.stealth.seed
|
||||
# alias was never declared in the yaml and read by nothing — dropped
|
||||
# 2026-06-10.
|
||||
prefs["zoom.stealth.fpp.hw_seed"] = profile.seed
|
||||
# 2026-06-10. The render-noise seed is DECOUPLED from the identity seed and
|
||||
# drawn from a calibrated CLEAN pool: the canvas/WebGL render HASH it drives
|
||||
# is the dominant FP Pro tampering_ml signal, and some hw_seeds yield a
|
||||
# "suspicious" render hash. render_noise_seed() maps to the clean pool while
|
||||
# keeping per-seed determinism + diversity. See _webgl_personas.
|
||||
prefs["zoom.stealth.fpp.hw_seed"] = render_noise_seed(profile.seed)
|
||||
|
||||
# Synthetic host ICE candidate — injected by C++ when addr_ct==0 (SOCKS5
|
||||
# proxy suppresses all local addresses so Firefox can't gather host cands).
|
||||
@@ -577,13 +624,22 @@ def translate_profile_to_prefs(
|
||||
_lan_ip = f"192.168.{(_s >> 8) % 254 + 1}.{_s % 254 + 1}"
|
||||
prefs["zoom.stealth.webrtc.host_ip"] = _lan_ip
|
||||
|
||||
# On Windows, native ANGLE extension list already matches real Windows users.
|
||||
# The baseline hard-codes a curated _WEBGL1/2_EXTENSIONS list designed for
|
||||
# Linux Mesa → clear it so Windows sessions report the native extension set
|
||||
# (hash matches real Intel Arc A750 vanilla captures).
|
||||
# Windows/mac extension list:
|
||||
# - persona active → FORCE the validated extension list. A non-Intel host's native
|
||||
# extensions would mismatch the persona's renderer (renderer says AMD/Intel-Arc but
|
||||
# extensions are the host's), so the persona must carry its own list to stay
|
||||
# host-independent.
|
||||
# - no persona → clear so the host-real renderer reports its native extension set
|
||||
# (matches real vanilla captures for that host's GPU).
|
||||
if not sys.platform.startswith("linux"):
|
||||
prefs["zoom.stealth.webgl.extensions"] = ""
|
||||
prefs["zoom.stealth.webgl2.extensions"] = ""
|
||||
if _persona:
|
||||
# The persona carries its OWN extension lists in EXACT NATIVE ORDER — a
|
||||
# reordered/foreign list is flagged by FP Pro (verified 2026-06-13).
|
||||
prefs["zoom.stealth.webgl.extensions"] = _persona["ext1"]
|
||||
prefs["zoom.stealth.webgl2.extensions"] = _persona["ext2"]
|
||||
else:
|
||||
prefs["zoom.stealth.webgl.extensions"] = ""
|
||||
prefs["zoom.stealth.webgl2.extensions"] = ""
|
||||
|
||||
# Linux Xvfb workarounds (no-op on Windows).
|
||||
if sys.platform.startswith("linux"):
|
||||
|
||||
+11
-2
@@ -91,9 +91,18 @@ def test_cloak_hides_window_but_keeps_rendering(firefox_binary):
|
||||
shot = page.screenshot()
|
||||
assert len(shot) > 3000, "cloaked window produced a blank screenshot (rendering paused)"
|
||||
|
||||
# 2) real WebGL present (native headless has none) -> headed pipeline intact.
|
||||
# 2) headed pipeline intact: a real WebGL context (Playwright's native
|
||||
# headless has none). Linux (Xvfb + llvmpipe) and Windows (WARP) give a
|
||||
# software context on the GPU-less runners, so a missing context there
|
||||
# is a real regression -> hard fail. macOS GitHub runners expose NO
|
||||
# WebGL in the CI session at all (even vanilla Firefox), and macOS has
|
||||
# no software-GL fallback; the cloak's "still rendering" property is
|
||||
# already proven by the non-blank screenshot above, so we don't also
|
||||
# require a live WebGL context there.
|
||||
renderer = page.evaluate(_WEBGL_RENDERER)
|
||||
assert renderer and renderer != "NO-WEBGL", f"no real WebGL under cloak: {renderer!r}"
|
||||
webgl_ok = bool(renderer) and renderer != "NO-WEBGL"
|
||||
if not (sys.platform == "darwin" and not webgl_ok):
|
||||
assert webgl_ok, f"no real WebGL under cloak: {renderer!r}"
|
||||
|
||||
# 3) the window is actually hidden (per-platform).
|
||||
if sys.platform == "win32":
|
||||
|
||||
+152
-29
@@ -6,21 +6,32 @@ libraries and uses their FULL API surface:
|
||||
* BotD (@fingerprintjs/botd, MIT) — the client-side bot detector that
|
||||
FingerprintJS Pro itself uses. We assert the aggregate verdict
|
||||
(``detect().bot == False``) AND every one of its ~18 individual detectors
|
||||
(``getDetections()``) returns ``bot == False``. The per-detector view is
|
||||
why we could delete our hand-rolled ``test_botd_*`` mirrors — the real
|
||||
library now covers each detector, with the same granularity.
|
||||
(``getDetections()``) returns ``bot == False``.
|
||||
* FingerprintJS open-source (MIT) — ``get()`` must return a ``visitorId``
|
||||
that is STABLE across two fresh launches with the same seed (an
|
||||
over-randomized spoof drifts), and a RICH component set (the fingerprint
|
||||
surface is real, not a stub).
|
||||
that is STABLE across two fresh launches with the same seed, and a RICH
|
||||
component set (the fingerprint surface is real, not a stub).
|
||||
* fpscanner (antoinevastel/fpscanner 1.0.6, MIT) — ``collectFingerprint()``
|
||||
runs ~21 bot-detection rules in the browser. We assert the **engine-agnostic**
|
||||
subset (webdriver / selenium / bot-UA / platform / timezone / language) is
|
||||
clean. We deliberately do NOT assert the Chrome/GPU-only rules (hasCDP,
|
||||
hasPlaywright, hasSwiftshaderRenderer, hasMissingChromeObject, …): they're
|
||||
trivially clean on Firefox, and the GPU ones can legitimately fire on a
|
||||
software-WebGL CI host (Xvfb/llvmpipe) — asserting them would false-red.
|
||||
* CreepJS (abrahamjuliot/creepjs, MIT, pinned) — the gold-standard Firefox-aware
|
||||
headless/stealth/lie detector. It exposes its result on ``window.Fingerprint``.
|
||||
We assert ``headlessRating == 0`` (webdriver + headless-UA tells) and the
|
||||
JS-proxy stealth tells are absent. ``stealthRating`` / ``totalLies`` /
|
||||
``likeHeadlessRating`` are LOGGED, not hard-asserted, because some of their
|
||||
sub-signals (hasBadWebGL, prefers-light-color) are GPU/theme-sensitive and
|
||||
differ on a GPU-less CI host.
|
||||
|
||||
Everything is hermetic: the libraries are vendored (tests/vendor/) and served
|
||||
from a localhost HTTP server — no external CDN call (Firefox tracking-protection
|
||||
blocks the CDN anyway) and no IP/network dependency. Runs identically on a dev
|
||||
box and on a GitHub runner.
|
||||
from a localhost HTTP server — no external CDN call. For CreepJS, every non-local
|
||||
request is aborted, so its optional crowd-comparison POST never runs and the
|
||||
verdict is computed purely locally. Runs identically on a dev box and a GH runner.
|
||||
|
||||
NOT covered: FingerprintJS *Pro* (commercial, server-side, IP/residential
|
||||
analysis) — can't be self-hosted, stays the local realness gate.
|
||||
NOT covered: FingerprintJS *Pro* (commercial, server-side) — stays the local
|
||||
realness gate.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -36,19 +47,33 @@ from invisible_playwright import InvisiblePlaywright
|
||||
_VENDOR = Path(__file__).parent / "vendor"
|
||||
_BOTD = "botd-2.0.0.esm.js"
|
||||
_FPJS = "fingerprintjs-5.2.0.umd.min.js"
|
||||
_FPSCANNER = "fpscanner-1.0.6.es.js"
|
||||
_CREEPJS = "creepjs-10aa672.js" # pinned abrahamjuliot/creepjs@10aa6724
|
||||
|
||||
# fpscanner rules that are MEANINGFUL on Firefox and GPU-independent — these must
|
||||
# stay clean. The omitted rules are Chrome-only (hasCDP/hasPlaywright/
|
||||
# hasMissingChromeObject/hasHighCPUCount/hasImpossibleDeviceMemory/
|
||||
# headlessChromeScreenResolution) or GPU-sensitive on a software-WebGL CI host
|
||||
# (hasSwiftshaderRenderer/hasGPUMismatch/hasMismatchWebGLInWorker).
|
||||
_FPSCANNER_AGNOSTIC = [
|
||||
"hasWebdriver", "hasWebdriverIframe", "hasWebdriverWorker", "hasWebdriverWritable",
|
||||
"hasSeleniumProperty", "hasBotUserAgent", "hasPlatformMismatch",
|
||||
"hasMismatchLanguages", "hasUTCTimezone", "hasMismatchPlatformIframe",
|
||||
"hasMismatchPlatformWorker", "hasInconsistentEtsl",
|
||||
]
|
||||
|
||||
_PAGE = f"""<!doctype html><html><head><meta charset="utf-8">
|
||||
<title>detectors</title>
|
||||
<script src="/{_FPJS}"></script>
|
||||
</head><body><h1 id="state">loading</h1>
|
||||
<script type="module">
|
||||
window.__botd = null; window.__fp = null; window.__err = "";
|
||||
window.__botd = null; window.__fp = null; window.__fps = null; window.__err = "";
|
||||
(async () => {{
|
||||
try {{
|
||||
const Botd = await import("/{_BOTD}");
|
||||
const botd = await Botd.load(); // load() collects internally
|
||||
const verdict = botd.detect(); // {{bot:false}} | {{bot:true,botKind}}
|
||||
const raw = botd.getDetections() || {{}}; // per-detector verdicts
|
||||
const botd = await Botd.load();
|
||||
const verdict = botd.detect();
|
||||
const raw = botd.getDetections() || {{}};
|
||||
const detections = {{}};
|
||||
for (const k in raw) detections[k] = {{ bot: raw[k].bot, botKind: raw[k].botKind || null }};
|
||||
window.__botd = {{ bot: verdict.bot, botKind: verdict.botKind || null, detections }};
|
||||
@@ -60,24 +85,41 @@ window.__botd = null; window.__fp = null; window.__err = "";
|
||||
const errored = keys.filter(k => r.components[k] && "error" in r.components[k]);
|
||||
window.__fp = {{ visitorId: r.visitorId, componentKeys: keys, erroredComponents: errored }};
|
||||
}} catch (e) {{ window.__err += " fp:" + e; }}
|
||||
try {{
|
||||
const M = await import("/{_FPSCANNER}");
|
||||
const scanner = new M.default();
|
||||
const fp = await scanner.collectFingerprint({{ encrypt: false }});
|
||||
window.__fps = {{ fastBotDetection: fp.fastBotDetection, details: fp.fastBotDetectionDetails }};
|
||||
}} catch (e) {{ window.__err += " fps:" + e; }}
|
||||
document.getElementById("state").textContent = "done";
|
||||
}})();
|
||||
</script></body></html>"""
|
||||
|
||||
# CreepJS gets its own page: creep.js is a plain `defer` script that runs on load
|
||||
# and populates window.Fingerprint. A minimal DOM is enough (the rich report DOM
|
||||
# is only for the visual page, not the computation).
|
||||
_CREEP_PAGE = f"""<!doctype html><html><head><meta charset="utf-8"><title>creep</title></head>
|
||||
<body><div id="fingerprint-data"></div><script src="/{_CREEPJS}" defer></script></body></html>"""
|
||||
|
||||
|
||||
class _DetectorSite:
|
||||
"""Localhost server: `/` → the page; `/<lib>` → the vendored bundle."""
|
||||
"""Localhost server: `/` → BotD+FPJS+fpscanner page, `/creepjs` → CreepJS page,
|
||||
`/<file>` → the vendored bundle."""
|
||||
|
||||
def __init__(self):
|
||||
page = _PAGE.encode()
|
||||
creep_page = _CREEP_PAGE.encode()
|
||||
vendor = _VENDOR
|
||||
|
||||
class H(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self): # noqa: N802
|
||||
if self.path == "/" or self.path.startswith("/?"):
|
||||
p = self.path.split("?")[0]
|
||||
if p == "/":
|
||||
body, ctype = page, "text/html; charset=utf-8"
|
||||
elif p == "/creepjs":
|
||||
body, ctype = creep_page, "text/html; charset=utf-8"
|
||||
else:
|
||||
f = vendor / Path(self.path.lstrip("/")).name
|
||||
f = vendor / Path(p.lstrip("/")).name
|
||||
if not f.is_file():
|
||||
self.send_error(404); return
|
||||
body = f.read_bytes()
|
||||
@@ -99,6 +141,10 @@ class _DetectorSite:
|
||||
def url(self):
|
||||
return f"http://127.0.0.1:{self.port}/"
|
||||
|
||||
@property
|
||||
def creep_url(self):
|
||||
return f"http://127.0.0.1:{self.port}/creepjs"
|
||||
|
||||
def close(self):
|
||||
self._srv.shutdown()
|
||||
|
||||
@@ -111,7 +157,7 @@ def detector_site():
|
||||
|
||||
|
||||
def _run_detectors(firefox_binary, url):
|
||||
"""Launch the binary, load the page, return (botd, fp, err)."""
|
||||
"""Launch the binary, load the page, return (botd, fp, fps, err)."""
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser:
|
||||
page = browser.new_page()
|
||||
page.goto(url, wait_until="load", timeout=45000)
|
||||
@@ -121,16 +167,48 @@ def _run_detectors(firefox_binary, url):
|
||||
)
|
||||
botd = page.evaluate("() => window.__botd")
|
||||
fp = page.evaluate("() => window.__fp")
|
||||
fps = page.evaluate("() => window.__fps")
|
||||
err = page.evaluate("() => window.__err")
|
||||
return botd, fp, err
|
||||
return botd, fp, fps, err
|
||||
|
||||
|
||||
def _run_creepjs(firefox_binary, creep_url):
|
||||
"""Launch the binary, run CreepJS fully offline, return its headless result."""
|
||||
_EV = """() => {
|
||||
const f = window.Fingerprint;
|
||||
if (!f || !f.headless) return { ready: false };
|
||||
const h = f.headless;
|
||||
return {
|
||||
ready: true,
|
||||
headlessRating: h.headlessRating,
|
||||
stealthRating: h.stealthRating,
|
||||
likeHeadlessRating: h.likeHeadlessRating,
|
||||
headless: h.headless || {},
|
||||
stealth: h.stealth || {},
|
||||
totalLies: (f.lies && f.lies.totalLies) || 0,
|
||||
};
|
||||
}"""
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser:
|
||||
page = browser.new_page()
|
||||
# truly offline: abort every non-loopback request (CreepJS's optional
|
||||
# crowd-comparison POST to arh.antoinevastel.com never runs).
|
||||
page.route(
|
||||
"**/*",
|
||||
lambda r: r.abort() if "127.0.0.1" not in r.request.url else r.continue_(),
|
||||
)
|
||||
page.goto(creep_url, wait_until="domcontentloaded", timeout=45000)
|
||||
page.wait_for_function(
|
||||
"() => !!(window.Fingerprint && window.Fingerprint.headless)",
|
||||
timeout=60000,
|
||||
)
|
||||
return page.evaluate(_EV)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_botd_no_detector_flags_automation(firefox_binary, detector_site):
|
||||
"""The real BotD must not flag the build — aggregate AND every one of its
|
||||
individual detectors (webDriver/userAgent/appVersion/plugins/process/... ).
|
||||
"""
|
||||
botd, _fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
individual detectors (webDriver/userAgent/appVersion/plugins/process/...)."""
|
||||
botd, _fp, _fps, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert botd is not None, f"BotD produced no result (err:{err!r})"
|
||||
assert botd.get("bot") is False, (
|
||||
f"BotD aggregate flagged a bot: botKind={botd.get('botKind')!r}"
|
||||
@@ -146,8 +224,8 @@ def test_fingerprintjs_visitorid_stable_across_launches(firefox_binary, detector
|
||||
"""FingerprintJS visitorId must be present and identical across two fresh
|
||||
launches with the same seed — a real browser is stable; an over-randomized
|
||||
spoof drifts (and a drifting fingerprint is itself a bot tell)."""
|
||||
_b1, fp1, err1 = _run_detectors(firefox_binary, detector_site.url)
|
||||
_b2, fp2, err2 = _run_detectors(firefox_binary, detector_site.url)
|
||||
_b1, fp1, _f1, err1 = _run_detectors(firefox_binary, detector_site.url)
|
||||
_b2, fp2, _f2, err2 = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert fp1 and fp1.get("visitorId"), f"no visitorId on run 1 (err:{err1!r})"
|
||||
assert fp2 and fp2.get("visitorId"), f"no visitorId on run 2 (err:{err2!r})"
|
||||
assert fp1["visitorId"] == fp2["visitorId"], (
|
||||
@@ -159,13 +237,58 @@ def test_fingerprintjs_visitorid_stable_across_launches(firefox_binary, detector
|
||||
@pytest.mark.e2e
|
||||
def test_fingerprintjs_collects_rich_fingerprint(firefox_binary, detector_site):
|
||||
"""FingerprintJS must collect a RICH component surface (a real browser
|
||||
exposes many signals; a stripped/blocked surface is itself suspicious).
|
||||
We don't assert zero errored components (some are legitimately unsupported
|
||||
per browser), only that the surface is substantial and the id computed."""
|
||||
_b, fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
exposes many signals; a stripped/blocked surface is itself suspicious)."""
|
||||
_b, fp, _f, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert fp and fp.get("visitorId"), f"FingerprintJS produced no id (err:{err!r})"
|
||||
keys = fp.get("componentKeys") or []
|
||||
assert len(keys) >= 15, (
|
||||
f"FingerprintJS collected only {len(keys)} components — surface too thin "
|
||||
f"(suppressed signals are themselves a tell): {keys}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpscanner_no_automation_rules(firefox_binary, detector_site):
|
||||
"""fpscanner's engine-agnostic bot rules (webdriver/selenium/bot-UA/platform/
|
||||
timezone/language) must all be clean. The Chrome/GPU-only rules are ignored
|
||||
on purpose (see module docstring) — they false-red on a software-WebGL host."""
|
||||
_b, _fp, fps, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert fps is not None, f"fpscanner produced no result (err:{err!r})"
|
||||
details = fps.get("details") or {}
|
||||
assert details, f"fpscanner returned no detection details (err:{err!r})"
|
||||
flagged = [
|
||||
k for k in _FPSCANNER_AGNOSTIC
|
||||
if details.get(k) and details[k].get("detected")
|
||||
]
|
||||
assert not flagged, (
|
||||
f"fpscanner flagged automation on engine-agnostic rules: {flagged} "
|
||||
f"(full details: { {k: v for k, v in details.items() if v.get('detected')} })"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_creepjs_headless_and_proxy_clean(firefox_binary, detector_site):
|
||||
"""CreepJS (Firefox-aware) must see no headless tell and no JS-proxy stealth
|
||||
tell. ``headlessRating`` aggregates webDriverIsOn + headless-UA checks (all
|
||||
GPU-independent). The proxy/runtime stealth sub-signals (hasIframeProxy,
|
||||
hasToStringProxy, hasBadChromeRuntime) must be false — a spoof implemented
|
||||
with a JS Proxy is exactly what CreepJS catches. stealthRating/totalLies/
|
||||
likeHeadlessRating are GPU/theme-sensitive, so we log them, not assert."""
|
||||
r = _run_creepjs(firefox_binary, detector_site.creep_url)
|
||||
assert r and r.get("ready"), f"CreepJS never populated window.Fingerprint: {r!r}"
|
||||
print(
|
||||
f"[creepjs] headlessRating={r['headlessRating']} stealthRating={r['stealthRating']} "
|
||||
f"likeHeadlessRating={r['likeHeadlessRating']} totalLies={r['totalLies']} "
|
||||
f"headless={r['headless']} stealth={r['stealth']}"
|
||||
)
|
||||
assert r["headlessRating"] == 0, (
|
||||
f"CreepJS headless tells fired: headless={r['headless']} "
|
||||
f"(headlessRating={r['headlessRating']})"
|
||||
)
|
||||
stealth = r.get("stealth") or {}
|
||||
proxy_tells = {
|
||||
k: stealth.get(k)
|
||||
for k in ("hasIframeProxy", "hasToStringProxy", "hasBadChromeRuntime")
|
||||
if stealth.get(k)
|
||||
}
|
||||
assert not proxy_tells, f"CreepJS JS-proxy stealth tells fired: {proxy_tells}"
|
||||
|
||||
@@ -27,6 +27,7 @@ Run only this file:
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -296,6 +297,12 @@ def test_webgl_readpixels_no_masking_signature(page):
|
||||
~300+ 'spikes' and pixelscan flagged it as masking; the gamma remap leaves
|
||||
the gradient smooth (~0 spikes). Regression guard for the gamma fix."""
|
||||
res = _ev(page, _WEBGL_MASKING_PROBE)
|
||||
if res.get("error") == "no-webgl" and sys.platform == "darwin":
|
||||
pytest.skip(
|
||||
"macOS CI runners expose no WebGL (no software-GL fallback); the gamma "
|
||||
"readPixels remap is platform-agnostic C++ and is exercised by the Linux "
|
||||
"(Xvfb/llvmpipe) and Windows (WARP) gates."
|
||||
)
|
||||
assert "error" not in res, f"WebGL probe failed: {res}"
|
||||
# genuine / gamma -> ~0; the rejected +-1 algorithm produced ~320.
|
||||
assert res["spikes"] < 30, (
|
||||
|
||||
@@ -289,8 +289,10 @@ def test_windows_virtual_display_with_socks_proxy(monkeypatch):
|
||||
assert prefs["security.sandbox.gpu.level"] == 0 # virtual_display branch
|
||||
assert prefs["network.proxy.type"] == 1 # SOCKS branch
|
||||
assert prefs["network.proxy.socks"] == "127.0.0.1"
|
||||
# Windows still has the renderer cleared.
|
||||
assert prefs["zoom.stealth.webgl.renderer"] == ""
|
||||
# Windows exposes a validated persona renderer (calibrated clean bucket),
|
||||
# not empty/native — see _webgl_personas.
|
||||
assert prefs["zoom.stealth.webgl.renderer"].startswith("ANGLE (")
|
||||
assert prefs["zoom.stealth.webgl.renderer"].rstrip().endswith(", D3D11)")
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
+29
-10
@@ -132,12 +132,9 @@ def test_mouse_move_outside_viewport_does_not_raise(firefox_binary):
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_humanize_emits_intermediate_moves(firefox_binary):
|
||||
"""A long mouse.move from one corner to another should fire several
|
||||
mousemove events on the page when the humanize hook is enabled (which
|
||||
is the StealthFox default)."""
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser:
|
||||
def _humanize_move_count(firefox_binary, humanize):
|
||||
"""Count page mousemove events fired by ONE long mouse.move."""
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=humanize) as browser:
|
||||
page = browser.new_page()
|
||||
page.goto(_data_url(
|
||||
"<div id=d style='width:600px;height:400px' "
|
||||
@@ -146,8 +143,27 @@ def test_humanize_emits_intermediate_moves(firefox_binary):
|
||||
page.mouse.move(10, 10)
|
||||
page.evaluate("window.__n = 0")
|
||||
page.mouse.move(500, 300)
|
||||
moves = page.evaluate("window.__n")
|
||||
assert moves >= 1, f"expected at least 1 mousemove event, got {moves}"
|
||||
return page.evaluate("window.__n")
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_humanize_emits_intermediate_moves(firefox_binary):
|
||||
"""A long mouse.move must expand into MANY intermediate mousemove events when
|
||||
humanize is on (Bezier), and ~1 (a teleport) when off. We assert the on/off
|
||||
CONTRAST: `moves >= 1` alone was a false-green — a teleport already fires 1 —
|
||||
and that false-green hid a pref-namespace bug (wrapper wrote
|
||||
`invisible_playwright.humanize`, the binary's Juggler reads `stealthfox.humanize`)
|
||||
that left humanize silently dead in production. This test now fails if the
|
||||
pref ever stops reaching the binary."""
|
||||
on = _humanize_move_count(firefox_binary, True)
|
||||
off = _humanize_move_count(firefox_binary, False)
|
||||
assert off <= 2, f"humanize OFF should ~teleport (<=2 moves), got {off}"
|
||||
assert on >= 4, (
|
||||
f"humanize ON must expand into many intermediate moves (Bezier); got {on} "
|
||||
f"(off={off}). moves==1 means the cursor teleports — the exact automation "
|
||||
f"tell humanize exists to remove, and a sign the stealthfox.* pref isn't "
|
||||
f"reaching the binary's Juggler."
|
||||
)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
@@ -185,8 +201,11 @@ def test_hover_triggers_mouseenter(firefox_binary):
|
||||
# Wait for the event rather than reading immediately: under load / on a
|
||||
# virtual display the mouseenter can land a beat after hover() returns,
|
||||
# which made an instant read flaky. wait_for_function still fails (times
|
||||
# out) if mouseenter genuinely never fires.
|
||||
page.wait_for_function("() => window.__h === true", timeout=5000)
|
||||
# out) if mouseenter genuinely never fires. Timeout is generous (10s) so a
|
||||
# busy full-suite run — where browser startup + CPU contention can push
|
||||
# the event past a tight 5s window — doesn't flake; the event itself fires
|
||||
# in well under a second when run in isolation.
|
||||
page.wait_for_function("() => window.__h === true", timeout=10_000)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
+29
-14
@@ -15,12 +15,18 @@ from invisible_playwright.prefs import (
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translate_includes_gpu_renderer_windows(monkeypatch):
|
||||
"""On Windows, renderer/vendor are cleared so ANGLE reports native hardware."""
|
||||
"""On Windows we falsify the GPU to one of the calibrated CLEAN buckets (FP Pro
|
||||
tampering_ml<=0.5 on every seed; sweep 2026-06-14). Only Radeon R9 200 Series and
|
||||
Intel Arc A750 ship — every NVIDIA/iGPU/945 bucket is penalized. See _webgl_personas."""
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
_CLEAN = {
|
||||
"ANGLE (AMD, AMD Radeon R9 200 Series Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
||||
"ANGLE (Intel, Intel(R) Arc(TM) A750 Graphics Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
||||
}
|
||||
p = generate_profile(seed=42)
|
||||
prefs = translate_profile_to_prefs(p)
|
||||
assert prefs["zoom.stealth.webgl.renderer"] == ""
|
||||
assert prefs["zoom.stealth.webgl.vendor"] == ""
|
||||
assert prefs["zoom.stealth.webgl.renderer"] in _CLEAN
|
||||
assert prefs["zoom.stealth.webgl.vendor"] in {"Google Inc. (AMD)", "Google Inc. (Intel)"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@@ -82,10 +88,15 @@ def test_accept_language_underscore_normalized():
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_font_metrics_windows_returns_empty(monkeypatch):
|
||||
# FM2: Windows never applies width-scale factors.
|
||||
def test_font_metrics_windows_applies_named_factors(monkeypatch):
|
||||
# FM2: Windows/mac apply the per-NAMED-font factors (so whitelisted named
|
||||
# families don't collapse to the list-head width on the canvas measureText
|
||||
# path), but WITHOUT the Linux generic-family compensation (generics bypass
|
||||
# the whitelist and render native there).
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
assert _font_metrics_for_platform("Arial|1.0,Verdana|0.9,") == ""
|
||||
out = _font_metrics_for_platform("Arial|1.0,Verdana|0.9,")
|
||||
assert out == "Arial|1.0,Verdana|0.9,"
|
||||
assert "sans-serif|" not in out # no generic compensation on Windows
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@@ -100,13 +111,14 @@ def test_font_metrics_empty_input_returns_empty():
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_gpu_renderer_empty_on_windows(monkeypatch):
|
||||
# PG2
|
||||
def test_gpu_renderer_persona_on_windows(monkeypatch):
|
||||
# PG2: Windows exposes a validated persona renderer (well-formed ANGLE bucket, NOT empty/native).
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
p = generate_profile(seed=42)
|
||||
prefs = translate_profile_to_prefs(p)
|
||||
assert prefs["zoom.stealth.webgl.renderer"] == ""
|
||||
assert prefs["zoom.stealth.webgl.vendor"] == ""
|
||||
r = prefs["zoom.stealth.webgl.renderer"]
|
||||
assert r and r.startswith("ANGLE (") and r.rstrip().endswith(", D3D11)")
|
||||
assert prefs["zoom.stealth.webgl.vendor"].startswith("Google Inc. (")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@@ -143,13 +155,16 @@ def test_canvas_noise_mask_windows_uses_intel_path(monkeypatch):
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_webgl_extensions_cleared_on_windows(monkeypatch):
|
||||
# WE2
|
||||
def test_webgl_extensions_persona_on_windows(monkeypatch):
|
||||
# WE2: with a persona active on Windows, extensions are FORCED to the persona's native-order
|
||||
# list (host-independent), NOT cleared. Order is load-bearing (must match the persona verbatim).
|
||||
monkeypatch.setattr(sys, "platform", "win32")
|
||||
from invisible_playwright._webgl_personas import select_persona
|
||||
p = generate_profile(seed=42)
|
||||
prefs = translate_profile_to_prefs(p)
|
||||
assert prefs["zoom.stealth.webgl.extensions"] == ""
|
||||
assert prefs["zoom.stealth.webgl2.extensions"] == ""
|
||||
persona = select_persona(42)
|
||||
assert prefs["zoom.stealth.webgl.extensions"] == persona["ext1"]
|
||||
assert prefs["zoom.stealth.webgl2.extensions"] == persona["ext2"]
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
+17
-2
@@ -45,13 +45,28 @@ def test_classify_gpu_intel_hd_old_buckets(renderer):
|
||||
"ANGLE (Intel, Intel(R) HD Graphics 530 Direct3D11)",
|
||||
"ANGLE (Intel, Intel(R) UHD Graphics 630 Direct3D11)",
|
||||
"ANGLE (Intel, Intel(R) Iris Xe Graphics Direct3D11)",
|
||||
"ANGLE (Intel, Intel(R) Arc A750 Direct3D11)",
|
||||
# Integrated Arc iGPUs (Core Ultra "Arc 130T/140T/Graphics") stay integrated_modern.
|
||||
"ANGLE (Intel, Intel(R) Arc(TM) 140T GPU Direct3D11)",
|
||||
])
|
||||
def test_classify_gpu_intel_modern(renderer):
|
||||
"""CG4-CG7 [DT]: modern Intel HD/UHD/Iris/Arc → integrated_modern."""
|
||||
"""CG4-CG7 [DT]: modern Intel HD/UHD/Iris + integrated Arc → integrated_modern."""
|
||||
assert classify_gpu(_gpu(renderer)) == "integrated_modern"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("renderer,expected", [
|
||||
# Discrete Intel Arc DESKTOP cards are NOT integrated: A5xx/A7xx/Bxxx ~ mid-range
|
||||
# discrete (RTX 3060 tier); A3xx are entry discrete → low_end.
|
||||
("ANGLE (Intel, Intel(R) Arc(TM) A750 Graphics Direct3D11 vs_5_0 ps_5_0)", "mid_range"),
|
||||
("ANGLE (Intel, Intel(R) Arc(TM) A770 Graphics Direct3D11)", "mid_range"),
|
||||
("ANGLE (Intel, Intel(R) Arc(TM) B580 Graphics Direct3D11)", "mid_range"),
|
||||
("ANGLE (Intel, Intel(R) Arc(TM) A380 Graphics Direct3D11)", "low_end"),
|
||||
])
|
||||
def test_classify_gpu_intel_arc_discrete(renderer, expected):
|
||||
"""Discrete Intel Arc desktop SKUs map to a discrete-GPU class, not integrated."""
|
||||
assert classify_gpu(_gpu(renderer)) == expected
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("renderer", [
|
||||
"ANGLE (AMD, AMD Radeon Graphics Direct3D11)",
|
||||
|
||||
@@ -29,8 +29,8 @@ def test_get_default_stealth_prefs_random_seed_returns_dict():
|
||||
assert isinstance(prefs, dict)
|
||||
assert len(prefs) > 0
|
||||
# humanize toggle is always set explicitly
|
||||
assert "invisible_playwright.humanize" in prefs
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
assert "stealthfox.humanize" in prefs
|
||||
assert prefs["stealthfox.humanize"] is True
|
||||
|
||||
|
||||
def test_get_default_stealth_prefs_seed_is_deterministic():
|
||||
@@ -50,22 +50,22 @@ def test_get_default_stealth_prefs_different_seeds_differ():
|
||||
def test_humanize_false_disables_prefs():
|
||||
"""humanize=False removes the maxTime knob and flips the toggle to False."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=False)
|
||||
assert prefs["invisible_playwright.humanize"] is False
|
||||
assert "invisible_playwright.humanize.maxTime" not in prefs
|
||||
assert prefs["stealthfox.humanize"] is False
|
||||
assert "stealthfox.humanize.maxTime" not in prefs
|
||||
|
||||
|
||||
def test_humanize_default_sets_max_time_1_5():
|
||||
"""humanize=True -> default maxTime is 1.5s, stored as string."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=True)
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
assert prefs["invisible_playwright.humanize.maxTime"] == "1.5"
|
||||
assert prefs["stealthfox.humanize"] is True
|
||||
assert prefs["stealthfox.humanize.maxTime"] == "1.5"
|
||||
|
||||
|
||||
def test_humanize_float_overrides_max_time():
|
||||
"""Float for humanize is the explicit cap in seconds."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=3.0)
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
assert prefs["invisible_playwright.humanize.maxTime"] == "3.0"
|
||||
assert prefs["stealthfox.humanize"] is True
|
||||
assert prefs["stealthfox.humanize.maxTime"] == "3.0"
|
||||
|
||||
|
||||
def test_extra_prefs_overlay_takes_precedence():
|
||||
|
||||
Vendored
+10
-4
@@ -6,13 +6,19 @@ on a dev box and on a GitHub runner (no external CDN at test time — Firefox
|
||||
tracking-protection blocks the openfpcdn.io CDN anyway, and we want CI offline).
|
||||
|
||||
They are served from a localhost HTTP server and loaded into the patched Firefox;
|
||||
the tests assert the REAL detectors don't flag the stealth build (BotD: `bot===false`)
|
||||
and that the fingerprint is stable (FingerprintJS: same `visitorId` across launches).
|
||||
the tests assert the REAL detectors don't flag the stealth build (BotD: `bot===false`;
|
||||
fpscanner: engine-agnostic rules clean; CreepJS: `headlessRating===0` + no JS-proxy
|
||||
tells) and that the fingerprint is stable (FingerprintJS: same `visitorId` across
|
||||
launches). CreepJS runs fully offline — the tests abort every non-loopback request,
|
||||
so its optional crowd-comparison POST never fires and the verdict is computed locally.
|
||||
|
||||
| File | Package | Version | Source | License |
|
||||
|---|---|---|---|---|
|
||||
| `botd-2.0.0.esm.js` | `@fingerprintjs/botd` | 2.0.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/botd@2.0.0/dist/botd.esm.js | MIT |
|
||||
| `fingerprintjs-5.2.0.umd.min.js` | `@fingerprintjs/fingerprintjs` | 5.2.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/fingerprintjs@5.2.0/dist/fp.umd.min.js | MIT |
|
||||
| `fpscanner-1.0.6.es.js` | `fpscanner` | 1.0.6 | https://cdn.jsdelivr.net/npm/fpscanner@1.0.6/dist/fpScanner.es.js | MIT |
|
||||
| `creepjs-10aa672.js` | `abrahamjuliot/creepjs` | git `10aa6724` | https://raw.githubusercontent.com/abrahamjuliot/creepjs/10aa6724cd33a1015db1574211890518cd04f0cc/docs/creep.js | MIT |
|
||||
|
||||
Both are MIT (Copyright © FingerprintJS, Inc.). To update: download the pinned
|
||||
dist from jsdelivr, drop it here, and bump the version in the filename + this table.
|
||||
All MIT (FingerprintJS Inc. / Antoine Vastel / Abraham Juliot). To update: download
|
||||
the pinned dist (jsdelivr for npm packages, raw.githubusercontent for CreepJS at a
|
||||
commit SHA), drop it here, and bump the version in the filename + this table.
|
||||
|
||||
Vendored
+9710
File diff suppressed because one or more lines are too long
Vendored
+1253
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user