MY ASS BROKEN!!!!

main
Your Name 7 hours ago
parent c79a86eddb
commit c51464d68e

Binary file not shown.

@ -14,4 +14,8 @@ def get_db_connection():
except mysql.connector.Error as err:
print(f"❌ Failed to connect to DB: {err}")
return # dont continue if DB failed
return db
return db
def preload_source_urls(cursor):
cursor.execute("SELECT source_url FROM videos")
return set(row[0] for row in cursor.fetchall())

@ -1,4 +1,4 @@
from config import get_db_connection
from config import get_db_connection, preload_source_urls
import requests, time, mysql.connector
from bs4 import BeautifulSoup
from urllib.parse import urljoin
@ -6,36 +6,34 @@ from datetime import datetime
def insert_video_to_db(data):
db = get_db_connection()
try:
cursor = db.cursor()
sql = """
INSERT IGNORE INTO videos (username, url, title, date, embed_link, source_url, created_at)
VALUES (%s, %s, %s, %s, %s, %s, NOW())
INSERT IGNORE INTO videos (username, date, embed_link, source_url, created_at)
VALUES (%s, %s, %s, %s, NOW())
"""
values = (
data['username'],
data['url'],
data['title'],
data['date'],
data['embed_link'],
data['source_url']
)
cursor.execute(sql, values)
db.commit()
print("✅ Inserted into DB!")
if cursor.rowcount == 0:
print("❌ Video already exists in DB")
else:
print("✅ Inserted into DB!")
except mysql.connector.Error as err:
print(f"❌ Failed to insert: {err}")
finally:
cursor.close()
db.close()
def crawl_user_page(base_url, user_path):
full_url = urljoin(base_url, user_path)
def crawl_user_page(full_url):
response = requests.get(full_url)
if response.status_code != 200:
print(f"❌ Failed to load {full_url}")
@ -43,13 +41,15 @@ def crawl_user_page(base_url, user_path):
soup = BeautifulSoup(response.text, "html.parser")
data = parse_data(soup)
data["source_url"] = full_url
return data
def parse_data(soup):
username = user_path.strip("/")
title_tag = soup.find("h1", class_="entry-title")
title = title_tag.text.strip() if title_tag else "(no title)"
username = title_tag.contents[0].strip()
date_tag = soup.find("span", class_="entry-date")
date = date_tag.text.strip() if date_tag else None
@ -75,7 +75,6 @@ def parse_data(soup):
return {
"username": username,
"title": title,
"date": date,
"embed_link": embed_link,
}
@ -94,14 +93,24 @@ def crawl_all(init_url):
break
soup = BeautifulSoup(response.text, "html.parser")
user_links = soup.find_all("a", class_="thumbnail-link", href=True)
if not user_links:
video_pages = soup.find_all("a", class_="thumbnail-link", href=True)
video_pages = [link['href'] for link in video_pages]
if not video_pages:
print("⚠️ No user links found — reached end of site.")
break
for link in user_links:
user_path = link["href"]
user_data = crawl_user_page(init_url, user_path)
cursor = db.cursor()
seen_urls = preload_source_urls(cursor)
for link in video_pages:
full_url = init_url + link
if full_url in seen_urls:
print(f"⚠️ Skipping {link} - already seen.")
continue
user_data = crawl_user_page(full_url) # slow as fuk
if not user_data:
print("⚠️ Skipping empty user_data.")
continue
@ -111,25 +120,19 @@ def crawl_all(init_url):
continue
insert_video_to_db(user_data)
time.sleep(0.5)
page += 1
time.sleep(1)
print(f"\n✅ Finished crawling all pages. Total users: {len(all_data)}")
return all_data
if __name__ == "__main__":
db = get_db_connection()
BASE_URL = "https://webcamrips.to"
results = crawl_all(BASE_URL)
print("💾 All data saved to users_data.json")
cursor = db.cursor()
cursor.close()
db.close()
Loading…
Cancel
Save