You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.6 KiB
Python

import requests
from bs4 import BeautifulSoup
import time
def fetch_video_links(page_url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
try:
response = requests.get(page_url, headers=headers, timeout=10)
response.raise_for_status()
except requests.RequestException as e:
print(f"❌ Failed to fetch {page_url}: {e}")
return []
soup = BeautifulSoup(response.text, "html.parser")
video_links = []
for a_tag in soup.find_all("a", href=True):
if a_tag["href"].startswith("/play/"):
# Build full URL
base_url = "https://striphub.cam"
full_link = base_url + a_tag["href"]
video_links.append(full_link)
print(f"✅ Found {len(video_links)} videos on {page_url}")
return video_links
def crawl_all_pages(base_url, total_pages, output_file="video_links.txt"):
all_links = []
for page in range(1, total_pages + 1):
page_url = f"{base_url}/page/{page}"
print(f"\n🌐 Crawling page {page_url}...")
links = fetch_video_links(page_url)
all_links.extend(links)
time.sleep(1) # polite delay so you don't hammer the server
# Remove duplicates
all_links = list(set(all_links))
# Save all to file
with open(output_file, "w", encoding="utf-8") as f:
for link in all_links:
f.write(link + "\n")
print(f"\n✅ Done! Saved {len(all_links)} unique video links to {output_file}")
return all_links
for link in all_links:
r = requests.get(link)
# Example usage:
crawl_all_pages("https://striphub.cam", total_pages=5)