# playwright_iframe_extract_debug.py from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup import re, time, os, sys TEST_URL = "https://striphub.cam/play/68f731ea62f66877cc80e54f" # replace if needed MX_PATTERN = re.compile(r"https?://(?:www\.)?mxdrop\.to/e/[^\s\"'<>]+", re.I) def extract_from_html(html: str): """Fallback regex scan over raw HTML (catches inline JS/JSON).""" return set(MX_PATTERN.findall(html)) def run(): found = set() with sync_playwright() as p: # Use headless=False while debugging to *see* what's happening browser = p.chromium.launch(headless=False, args=["--disable-blink-features=AutomationControlled"]) context = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", locale="en-US", viewport={"width": 1280, "height": 900} ) page = context.new_page() # 1) Capture ANY network response hitting mxdrop def on_response(resp): url = resp.url if "mxdrop.to/e/" in url: print("[NET] mxdrop response:", url) found.add(url) page.on("response", on_response) print("Navigating to", TEST_URL) page.goto(TEST_URL, wait_until="domcontentloaded", timeout=60000) # 2) Give the page time to settle network/XHR try: page.wait_for_load_state("networkidle", timeout=20000) except Exception: print("⚠️ networkidle timed out—continuing") # 3) Try a few generic clicks that often reveal the player/iframe # (No-ops if not present; they just fail silently) for sel in [ 'button:has-text("Play")', 'button:has-text("I understand")', 'button:has-text("Continue")', 'button:has-text("Accept")', "#player, .video-player, .plyr__control", ]: try: el = page.locator(sel) if el.count() > 0: el.first.click(timeout=2000) except Exception: pass # 4) Scroll to bottom to trigger lazy-load iframes try: page.evaluate(""" const delay = ms => new Promise(r => setTimeout(r, ms)); (async () => { for (let y = 0; y < document.body.scrollHeight; y += 800) { window.scrollTo(0, y); await delay(200); } window.scrollTo(0, document.body.scrollHeight); })(); """) time.sleep(1.0) except Exception: pass # 5) Dump a screenshot & HTML so you can inspect what loaded try: page.screenshot(path="debug_page.png", full_page=True) print("Saved screenshot -> debug_page.png") except Exception: pass html = page.content() with open("debug_page.html", "w", encoding="utf-8") as f: f.write(html) print("Saved HTML -> debug_page.html") # 6) Parse DOM for iframes (src and data-src) soup = BeautifulSoup(html, "html.parser") iframes = soup.find_all("iframe") print(f"Found {len(iframes)}