You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
1.6 KiB
Python
61 lines
1.6 KiB
Python
|
14 hours ago
|
import requests
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
import time
|
||
|
|
|
||
|
|
def fetch_video_links(page_url):
|
||
|
|
|
||
|
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
||
|
|
|
||
|
|
try:
|
||
|
|
response = requests.get(page_url, headers=headers, timeout=10)
|
||
|
|
response.raise_for_status()
|
||
|
|
except requests.RequestException as e:
|
||
|
|
print(f"❌ Failed to fetch {page_url}: {e}")
|
||
|
|
return []
|
||
|
|
|
||
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
||
|
|
|
||
|
|
video_links = []
|
||
|
|
for a_tag in soup.find_all("a", href=True):
|
||
|
|
if a_tag["href"].startswith("/play/"):
|
||
|
|
# Build full URL
|
||
|
|
base_url = "https://striphub.cam"
|
||
|
|
full_link = base_url + a_tag["href"]
|
||
|
|
video_links.append(full_link)
|
||
|
|
|
||
|
|
print(f"✅ Found {len(video_links)} videos on {page_url}")
|
||
|
|
return video_links
|
||
|
|
|
||
|
|
|
||
|
|
def crawl_all_pages(base_url, total_pages, output_file="video_links.txt"):
|
||
|
|
|
||
|
|
all_links = []
|
||
|
|
|
||
|
|
for page in range(1, total_pages + 1):
|
||
|
|
page_url = f"{base_url}/page/{page}"
|
||
|
|
print(f"\n🌐 Crawling page {page_url}...")
|
||
|
|
links = fetch_video_links(page_url)
|
||
|
|
all_links.extend(links)
|
||
|
|
time.sleep(1) # polite delay so you don't hammer the server
|
||
|
|
|
||
|
|
# Remove duplicates
|
||
|
|
all_links = list(set(all_links))
|
||
|
|
|
||
|
|
# Save all to file
|
||
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
||
|
|
for link in all_links:
|
||
|
|
f.write(link + "\n")
|
||
|
|
|
||
|
|
print(f"\n✅ Done! Saved {len(all_links)} unique video links to {output_file}")
|
||
|
|
return all_links
|
||
|
|
for link in all_links:
|
||
|
|
r = requests.get(link)
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
# Example usage:
|
||
|
|
crawl_all_pages("https://striphub.cam", total_pages=5)
|