import requests, os, time, mysql.connector, json from bs4 import BeautifulSoup from urllib.parse import urljoin from datetime import datetime from dotenv import load_dotenv from xpornium import get_file_info, upload_video, get_upload_url, remote_upload load_dotenv() def insert_video_to_db(data): try: db_host = os.getenv("DB_HOST") db_user = os.getenv("DB_USER") db_pass = os.getenv("DB_PASS") db_name = os.getenv("DB_NAME") db = mysql.connector.connect(host=db_host, user=db_user, password=db_pass, database=db_name) except mysql.connector.Error as err: print(f"❌ Failed to connect to DB: {err}") return # don’t continue if DB failed try: cursor = db.cursor() sql = """ INSERT IGNORE INTO videos (username, url, title, date, embed_link, source_url, created_at) VALUES (%s, %s, %s, %s, %s, %s, NOW()) """ values = ( data['username'], data['url'], data['title'], data['date'], data['embed_link'], data['source_url'] ) cursor.execute(sql, values) db.commit() print("✅ Inserted into DB!") except mysql.connector.Error as err: print(f"❌ Failed to insert: {err}") finally: cursor.close() db.close() def save_xpornium_upload(embed_link, fileid, xpornium_url, title, cat_id, duration, thumbnail): try: db = mysql.connector.connect( host=os.getenv("DB_HOST"), user=os.getenv("DB_USER"), password=os.getenv("DB_PASS"), database=os.getenv("DB_NAME") ) cursor = db.cursor() new_embed_link = f"https://xpornium.net/embed/{fileid}" sql = """ INSERT INTO xpornium_uploads (original_embed_link, xpornium_fileid, xpornium_url, new_embed_link, title, category_id, uploaded_at, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ values = ( embed_link, fileid, xpornium_url, new_embed_link, title, cat_id, datetime.now(), duration, thumbnail ) cursor.execute(sql, values) db.commit() print(f"✅ Saved to DB: {fileid} | duration: {duration}s | thumb: {thumbnail}") except Exception as e: print(f"❌ DB Save Failed: {e}") finally: if db and db.is_connected(): cursor.close() db.close() def crawl_user_page(base_url, user_path): """Visits one 'url/username' page and extracts info.""" full_url = urljoin(base_url, user_path) response = requests.get(full_url) if response.status_code != 200: print(f"❌ Failed to load {full_url}") return None soup = BeautifulSoup(response.text, "html.parser") username = user_path.strip("/") title_tag = soup.find("h1", class_="entry-title") title = title_tag.text.strip() if title_tag else "(no title)" source_url = full_url date_tag = soup.find("span", class_="entry-date") date = date_tag.text.strip() if date_tag else None # Convert DD/MM/YYYY → YYYY-MM-DD if date: try: date_obj = datetime.strptime(date, "%d/%m/%Y") date = date_obj.strftime("%Y-%m-%d") except ValueError: print(f"⚠️ Failed to parse date: {date}") date = None embed_link = None for iframe in soup.find_all("iframe", src=True): src = iframe["src"] if "xpornium.net" in src: embed_link = src # no urljoin needed! break # stop after finding the first match # --- print info after crawling this user --- print(f"\n✅ Scraped {username}: — {date}") # ------------------------------------------- return { "username": username, "url": full_url, "title": title, "date": date, "embed_link": embed_link, "source_url": source_url } def crawl_all(init_url): """Crawl page by page and extract user data as we go.""" page = 1 all_data = [] while True: url = f"{init_url}?p={page}" print(f"\n🕷️ Crawling index page {page}: {url}") response = requests.get(url) if response.status_code != 200: print(f"❌ Page {page} returned {response.status_code}, stopping.") break soup = BeautifulSoup(response.text, "html.parser") user_links = soup.find_all("a", class_="thumbnail-link", href=True) if not user_links: print("⚠️ No user links found — reached end of site.") break for link in user_links: user_path = link["href"] user_data = crawl_user_page(init_url, user_path) if not user_data: print("⚠️ Skipping empty user_data.") continue if not user_data["embed_link"]: print(f"⚠️ Skipping {user_data['username']} - no embed link found.") continue insert_video_to_db(user_data) # Get fileid and xpornium url fileid = user_data["embed_link"].split("/")[-1] xpornium_url = f"https://xpornium.net/embed/{fileid}" remote_upload(xpornium_url) # Get file info (duration, thumb) info_response = get_file_info(fileid) info_json = info_response.json() if info_json.get("status") != 200 or not info_json.get("result"): print(f"❌ Failed to get file info for {fileid}") continue info = info_json["result"][0] duration = info.get("duration") or 0 thumbnail = info.get("thumbnail") or "" # Save to DB save_xpornium_upload(user_data["embed_link"], fileid, xpornium_url, user_data["title"], 127, duration, thumbnail) time.sleep(0.5) page += 1 time.sleep(1) print(f"\n✅ Finished crawling all pages. Total users: {len(all_data)}") return all_data if __name__ == "__main__": BASE_URL = "https://webcamrips.to" results = crawl_all(BASE_URL) print("💾 All data saved to users_data.json")