ASS STILL HURT

ASS STILL FUCKING BROKEN MY NIGGA WTFFFFFFF
MY ASS BROKEN!!!!
5 changed files with 92 additions and 105 deletions
--- a/pycache/test1.cpython-314.pyc
+++ b/pycache/test1.cpython-314.pyc
--- a/wcr/pycache/config.cpython-313.pyc
+++ b/wcr/pycache/config.cpython-313.pyc
--- a/wcr/pycache/config.cpython-314.pyc
+++ b/wcr/pycache/config.cpython-314.pyc
--- a/wcr/config.py
+++ b/wcr/config.py
@ -14,4 +14,8 @@ def get_db_connection():
    except mysql.connector.Error as err:
        print(f"❌ Failed to connect to DB: {err}")
        return  # don’t continue if DB failed
-    return db
+    return db
+
+def preload_source_urls(cursor):
+    cursor.execute("SELECT source_url FROM videos")
+    return set(row[0] for row in cursor.fetchall())
--- a/wcr/crawl_wcr.py
+++ b/wcr/crawl_wcr.py
@ -1,135 +1,118 @@
-from config import get_db_connection
-import requests, time, mysql.connector
+import requests, mysql.connector, time
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from config import get_db_connection, preload_source_urls

-
+# ────────────────────────────────────────────────────────────────
 def insert_video_to_db(data):
-    db = get_db_connection()
-    
    try:
+        db = get_db_connection()
        cursor = db.cursor()
-        sql = """
-        INSERT IGNORE INTO videos (username, url, title, date, embed_link, source_url, created_at)
-        VALUES (%s, %s, %s, %s, %s, %s, NOW())
-                """
-
-        values = (
-            data['username'],
-            data['url'],
-            data['title'],
-            data['date'],
-            data['embed_link'],
-            data['source_url']
-        )
-        cursor.execute(sql, values)
+        cursor.execute("""
+            INSERT IGNORE INTO videos (username, date, embed_link, source_url, created_at)
+            VALUES (%s, %s, %s, %s, NOW())
+        """, (data['username'], data['date'], data['embed_link'], data['source_url']))
        db.commit()
-        print("✅ Inserted into DB!")
-
+        if cursor.rowcount > 0:
+            print(f"✅ New: {data['username']} — {data['date']}")
    except mysql.connector.Error as err:
-        print(f"❌ Failed to insert: {err}")
-
+        print(f"❌ DB insert error: {err}")
    finally:
        cursor.close()
        db.close()

-def crawl_user_page(base_url, user_path):
-    full_url = urljoin(base_url, user_path)
-    response = requests.get(full_url)
-    if response.status_code != 200:
-        print(f"❌ Failed to load {full_url}")
-        return None
-
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    

+# ────────────────────────────────────────────────────────────────
 def parse_data(soup):
-    username = user_path.strip("/")
-    title_tag = soup.find("h1", class_="entry-title")
-    title = title_tag.text.strip() if title_tag else "(no title)"
+    username = soup.select_one("h1.entry-title")
+    username = username.contents[0].strip() if username and username.contents else None

-    date_tag = soup.find("span", class_="entry-date")
+    date_tag = soup.select_one("span.entry-date")
    date = date_tag.text.strip() if date_tag else None
-
    if date:
        try:
-            date_obj = datetime.strptime(date, "%d/%m/%Y")
-            date = date_obj.strftime("%Y-%m-%d")
+            date = datetime.strptime(date, "%d/%m/%Y").strftime("%Y-%m-%d")
        except ValueError:
-            print(f"⚠️ Failed to parse date: {date}")
            date = None

-        embed_link = None
-        for iframe in soup.find_all("iframe", src=True):
-            src = iframe["src"]
-            if "xpornium.net" in src:
-                embed_link = src  # no urljoin needed!
-                break  # stop after finding the first match
-
-        # --- print info after crawling this user ---
-        print(f"\n✅ Scraped {username}: — {date}")
+    embed_link = next((i["src"] for i in soup.find_all("iframe", src=True) if "xpornium.net" in i["src"]), None)
+    return {"username": username, "date": date, "embed_link": embed_link}

-        # -------------------------------------------
-
-        return {
-            "username": username,
-            "title": title,
-            "date": date,
-            "embed_link": embed_link,
-        }
-
-def crawl_all(init_url):
-    """Crawl page by page and extract user data as we go."""
-    page = 1
-    all_data = []
+# ────────────────────────────────────────────────────────────────
+def crawl_user_page(url):
+    try:
+        res = requests.get(url, timeout=15)
+        if res.ok:
+            soup = BeautifulSoup(res.text, "html.parser")
+            data = parse_data(soup)
+            data["source_url"] = url
+            return data if data["embed_link"] else None
+    except Exception:
+        pass
+    return None
+
+# ────────────────────────────────────────────────────────────────
+def crawl_index_page(base_url, page_num, seen, cursor, db):
+    url = f"{base_url}?p={page_num}"
+    print(f"📄 Page {page_num}")
+    try:
+        res = requests.get(url, timeout=15)
+        if not res.ok:
+            return 0
+
+        soup = BeautifulSoup(res.text, "html.parser")
+        links = [urljoin(base_url, a["href"]) for a in soup.select("a.thumbnail-link[href]")]
+        links = [link for link in links if link not in seen]
+        if not links:
+            return 0
+
+        new_count = 0
+        with ThreadPoolExecutor(max_workers=50) as pool:
+            for f in as_completed(pool.submit(crawl_user_page, l) for l in links):
+                data = f.result()
+                if data:
+                    insert_video_to_db(data, cursor, db)
+                    new_count += 1
+        return new_count
+    except Exception:
+        return 0
+
+# ────────────────────────────────────────────────────────────────
+def crawl_all(base_url):
+    db = get_db_connection()
+    cursor = db.cursor()
+    seen = preload_source_urls(cursor)
+    
+    QUIT_LOGIC = True

+    page, total, empty_results = 1000, 0, 0
    while True:
-        url = f"{init_url}?p={page}"
-        print(f"\n🕷️ Crawling index page {page}: {url}")
-        response = requests.get(url)
-        if response.status_code != 200:
-            print(f"❌ Page {page} returned {response.status_code}, stopping.")
-            break
-
-        soup = BeautifulSoup(response.text, "html.parser")
-        user_links = soup.find_all("a", class_="thumbnail-link", href=True)
-        if not user_links:
-            print("⚠️ No user links found — reached end of site.")
-            break
-
-        for link in user_links:
-            user_path = link["href"]
-            user_data = crawl_user_page(init_url, user_path)
-            if not user_data:
-                print("⚠️ Skipping empty user_data.")
-                continue
-
-            if not user_data["embed_link"]:
-                print(f"⚠️ Skipping {user_data['username']} - no embed link found.")
-                continue
-
-            insert_video_to_db(user_data)
-            time.sleep(0.5)
-
-        page += 1
-        time.sleep(1)
-            
-    print(f"\n✅ Finished crawling all pages. Total users: {len(all_data)}")
-    return all_data
+        batch = range(page, page + 10)
+        print(f"\n🚀 Batch {page}–{page + 9}")

-if __name__ == "__main__":
-    BASE_URL = "https://webcamrips.to"
-    results = crawl_all(BASE_URL)
-    print("💾 All data saved to users_data.json")
-            
-            
-   
-            
-                
+        with ThreadPoolExecutor(max_workers=10) as pool:
+            results = [f.result() for f in as_completed(pool.submit(crawl_index_page, base_url, p, seen, cursor, db) for p in batch)]

+        batch_total = sum(results)
+        total += batch_total
+        print(f"📦 Batch complete — {batch_total} new videos (total: {total})")

+        if not QUIT_LOGIC:
+            if batch_total == 0:
+                empty_results += 1

+            if empty_results >= 10:
+                print("\n🛑 No new videos found for 30 consecutive pages. Stopping.")
+                break

+        page += 10

+    cursor.close()
+    db.close()
+    print(f"\n✅ Done! Total new videos: {total}")
+
+# ────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    crawl_all("https://webcamrips.to")
Author	SHA1	Message	Date
Your Name	f5d91117d8	ASS STILL HURT	11 hours ago
Your Name	f90cbd7b53	ASS STILL FUCKING BROKEN MY NIGGA WTFFFFFFF	12 hours ago
Your Name	c51464d68e	MY ASS BROKEN!!!!	13 hours ago