ASS STILL FUCKING BROKEN MY NIGGA WTFFFFFFF

main
Your Name 12 hours ago
parent c51464d68e
commit f90cbd7b53

@ -3,18 +3,16 @@ import requests, time, mysql.connector
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
def insert_video_to_db(data):
def insert_video_to_db(data, db):
try:
cursor = db.cursor()
sql = """
INSERT IGNORE INTO videos (username, date, embed_link, source_url, created_at)
VALUES (%s, %s, %s, %s, NOW())
"""
"""
values = (
data['username'],
data['date'],
@ -27,29 +25,31 @@ def insert_video_to_db(data):
print("❌ Video already exists in DB")
else:
print("✅ Inserted into DB!")
except mysql.connector.Error as err:
print(f"❌ Failed to insert: {err}")
finally:
cursor.close()
def crawl_user_page(full_url):
response = requests.get(full_url)
if response.status_code != 200:
print(f"❌ Failed to load {full_url}")
return None
soup = BeautifulSoup(response.text, "html.parser")
try:
response = requests.get(full_url)
if response.status_code != 200:
print(f"❌ Failed to load {full_url}")
return None
data = parse_data(soup)
data["source_url"] = full_url
return data
soup = BeautifulSoup(response.text, "html.parser")
data = parse_data(soup)
data["source_url"] = full_url
return data
except Exception as e:
print(f"❌ Exception while crawling {full_url}: {e}")
return None
def parse_data(soup):
title_tag = soup.find("h1", class_="entry-title")
username = title_tag.contents[0].strip()
username = title_tag.contents[0].strip() if title_tag else "unknown"
date_tag = soup.find("span", class_="entry-date")
date = date_tag.text.strip() if date_tag else None
@ -61,28 +61,49 @@ def parse_data(soup):
print(f"⚠️ Failed to parse date: {date}")
date = None
embed_link = None
for iframe in soup.find_all("iframe", src=True):
src = iframe["src"]
if "xpornium.net" in src:
embed_link = src # no urljoin needed!
break # stop after finding the first match
embed_link = None
for iframe in soup.find_all("iframe", src=True):
src = iframe["src"]
if "xpornium.net" in src:
embed_link = src
break
print(f"\n✅ Scraped {username}: — {date}")
return {
"username": username,
"date": date,
"embed_link": embed_link,
}
def process_link(link, seen_urls, init_url):
full_url = init_url + link
if full_url in seen_urls:
print(f"⚠️ Skipping {link} - already seen.")
return
user_data = crawl_user_page(full_url)
if not user_data:
print("⚠️ Skipping empty user_data.")
return
# --- print info after crawling this user ---
print(f"\n✅ Scraped {username}: — {date}")
if not user_data["embed_link"]:
print(f"⚠️ Skipping {user_data['username']} - no embed link found.")
return
# -------------------------------------------
local_db = get_db_connection()
insert_video_to_db(user_data, local_db)
local_db.close()
return {
"username": username,
"date": date,
"embed_link": embed_link,
}
def crawl_all(init_url):
"""Crawl page by page and extract user data as we go."""
page = 1
all_data = []
db = get_db_connection()
cursor = db.cursor()
seen_urls = preload_source_urls(cursor)
cursor.close()
db.close()
while True:
url = f"{init_url}?p={page}"
@ -95,44 +116,21 @@ def crawl_all(init_url):
soup = BeautifulSoup(response.text, "html.parser")
video_pages = soup.find_all("a", class_="thumbnail-link", href=True)
video_pages = [link['href'] for link in video_pages]
if not video_pages:
print("⚠️ No user links found — reached end of site.")
break
cursor = db.cursor()
seen_urls = preload_source_urls(cursor)
for link in video_pages:
full_url = init_url + link
if full_url in seen_urls:
print(f"⚠️ Skipping {link} - already seen.")
continue
user_data = crawl_user_page(full_url) # slow as fuk
if not user_data:
print("⚠️ Skipping empty user_data.")
continue
if not user_data["embed_link"]:
print(f"⚠️ Skipping {user_data['username']} - no embed link found.")
continue
insert_video_to_db(user_data)
with ThreadPoolExecutor(max_workers=50) as executor:
futures = [executor.submit(process_link, link, seen_urls, init_url) for link in video_pages]
for _ in as_completed(futures):
pass # we already log inside the functions
page += 1
print("\n✅ Finished crawling all pages.")
print(f"\n✅ Finished crawling all pages. Total users: {len(all_data)}")
return all_data
if __name__ == "__main__":
db = get_db_connection()
BASE_URL = "https://webcamrips.to"
results = crawl_all(BASE_URL)
print("💾 All data saved to users_data.json")
cursor = db.cursor()
cursor.close()
db.close()
crawl_all(BASE_URL)

Loading…
Cancel
Save