ASS STILL HURT

ASS STILL FUCKING BROKEN MY NIGGA WTFFFFFFF
MY ASS BROKEN!!!!
5 changed files with 92 additions and 105 deletions
--- a/pycache/test1.cpython-314.pyc
+++ b/pycache/test1.cpython-314.pyc
--- a/wcr/pycache/config.cpython-313.pyc
+++ b/wcr/pycache/config.cpython-313.pyc
--- a/wcr/pycache/config.cpython-314.pyc
+++ b/wcr/pycache/config.cpython-314.pyc
--- a/wcr/config.py
+++ b/wcr/config.py
@ -14,4 +14,8 @@ def get_db_connection():
    except mysql.connector.Error as err:
        print(f"❌ Failed to connect to DB: {err}")
        return  # don’t continue if DB failed
-    return db
+    return db
+
+def preload_source_urls(cursor):
+    cursor.execute("SELECT source_url FROM videos")
+    return set(row[0] for row in cursor.fetchall())
--- a/wcr/crawl_wcr.py
+++ b/wcr/crawl_wcr.py
@ -1,135 +1,118 @@
-from config import get_db_connection
-import requests, time, mysql.connector
+import requests, mysql.connector, time
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from config import get_db_connection, preload_source_urls

-
+# ────────────────────────────────────────────────────────────────
 def insert_video_to_db(data):
-    db = get_db_connection()
-    
    try:
+        db = get_db_connection()
        cursor = db.cursor()
-        sql = """
-        INSERT IGNORE INTO videos (username, url, title, date, embed_link, source_url, created_at)
-        VALUES (%s, %s, %s, %s, %s, %s, NOW())
-                """
-
-        values = (
-            data['username'],
-            data['url'],
-            data['title'],
-            data['date'],
-            data['embed_link'],
-            data['source_url']
-        )
-        cursor.execute(sql, values)
+        cursor.execute("""
+            INSERT IGNORE INTO videos (username, date, embed_link, source_url, created_at)
+            VALUES (%s, %s, %s, %s, NOW())
+        """, (data['username'], data['date'], data['embed_link'], data['source_url']))
        db.commit()
-        print("✅ Inserted into DB!")
-
+        if cursor.rowcount > 0:
+            print(f"✅ New: {data['username']} — {data['date']}")
    except mysql.connector.Error as err:
-        print(f"❌ Failed to insert: {err}")
-
+        print(f"❌ DB insert error: {err}")
    finally:
        cursor.close()
        db.close()

-def crawl_user_page(base_url, user_path):
-    full_url = urljoin(base_url, user_path)
-    response = requests.get(full_url)
-    if response.status_code != 200:
-        print(f"❌ Failed to load {full_url}")
-        return None
-
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    

+# ────────────────────────────────────────────────────────────────
 def parse_data(soup):
-    username = user_path.strip("/")
-    title_tag = soup.find("h1", class_="entry-title")
-    title = title_tag.text.strip() if title_tag else "(no title)"
+    username = soup.select_one("h1.entry-title")
+    username = username.contents[0].strip() if username and username.contents else None

-    date_tag = soup.find("span", class_="entry-date")
+    date_tag = soup.select_one("span.entry-date")
    date = date_tag.text.strip() if date_tag else None
-
    if date:
        try:
-            date_obj = datetime.strptime(date, "%d/%m/%Y")
-            date = date_obj.strftime("%Y-%m-%d")
+            date = datetime.strptime(date, "%d/%m/%Y").strftime("%Y-%m-%d")
        except ValueError:
-            print(f"⚠️ Failed to parse date: {date}")
            date = None

-        embed_link = None
-        for iframe in soup.find_all("iframe", src=True):
-            src = iframe["src"]
-            if "xpornium.net" in src:
-                embed_link = src  # no urljoin needed!
-                break  # stop after finding the first match
-
-        # --- print info after crawling this user ---
-        print(f"\n✅ Scraped {username}: — {date}")
+    embed_link = next((i["src"] for i in soup.find_all("iframe", src=True) if "xpornium.net" in i["src"]), None)
+    return {"username": username, "date": date, "embed_link": embed_link}

-        # -------------------------------------------
-
-        return {
-            "username": username,
-            "title": title,
-            "date": date,
-            "embed_link": embed_link,
-        }
-
-def crawl_all(init_url):
-    """Crawl page by page and extract user data as we go."""
-    page = 1
-    all_data = []
+# ────────────────────────────────────────────────────────────────
+def crawl_user_page(url):
+    try:
+        res = requests.get(url, timeout=15)
+        if res.ok:
+            soup = BeautifulSoup(res.text, "html.parser")
+            data = parse_data(soup)
+            data["source_url"] = url
+            return data if data["embed_link"] else None
+    except Exception:
+        pass
+    return None
+
+# ────────────────────────────────────────────────────────────────
+def crawl_index_page(base_url, page_num, seen, cursor, db):
+    url = f"{base_url}?p={page_num}"
+    print(f"📄 Page {page_num}")
+    try:
+        res = requests.get(url, timeout=15)
+        if not res.ok:
+            return 0
+
+        soup = BeautifulSoup(res.text, "html.parser")
+        links = [urljoin(base_url, a["href"]) for a in soup.select("a.thumbnail-link[href]")]
+        links = [link for link in links if link not in seen]
+        if not links:
+            return 0
+
+        new_count = 0
+        with ThreadPoolExecutor(max_workers=50) as pool:
+            for f in as_completed(pool.submit(crawl_user_page, l) for l in links):
+                data = f.result()
+                if data:
+                    insert_video_to_db(data, cursor, db)
+                    new_count += 1
+        return new_count
+    except Exception:
+        return 0
+
+# ────────────────────────────────────────────────────────────────
+def crawl_all(base_url):
+    db = get_db_connection()
+    cursor = db.cursor()
+    seen = preload_source_urls(cursor)
+    
+    QUIT_LOGIC = True

+    page, total, empty_results = 1000, 0, 0
    while True:
-        url = f"{init_url}?p={page}"
-        print(f"\n🕷️ Crawling index page {page}: {url}")
-        response = requests.get(url)
-        if response.status_code != 200:
-            print(f"❌ Page {page} returned {response.status_code}, stopping.")
-            break
-
-        soup = BeautifulSoup(response.text, "html.parser")
-        user_links = soup.find_all("a", class_="thumbnail-link", href=True)
-        if not user_links:
-            print("⚠️ No user links found — reached end of site.")
-            break
-
-        for link in user_links:
-            user_path = link["href"]
-            user_data = crawl_user_page(init_url, user_path)
-            if not user_data:
-                print("⚠️ Skipping empty user_data.")
-                continue
-
-            if not user_data["embed_link"]:
-                print(f"⚠️ Skipping {user_data['username']} - no embed link found.")
-                continue
-
-            insert_video_to_db(user_data)
-            time.sleep(0.5)
-
-        page += 1
-        time.sleep(1)
-            
-    print(f"\n✅ Finished crawling all pages. Total users: {len(all_data)}")
-    return all_data
+        batch = range(page, page + 10)
+        print(f"\n🚀 Batch {page}–{page + 9}")

-if __name__ == "__main__":
-    BASE_URL = "https://webcamrips.to"
-    results = crawl_all(BASE_URL)
-    print("💾 All data saved to users_data.json")
-            
-            
-   
-            
-                
+        with ThreadPoolExecutor(max_workers=10) as pool:
+            results = [f.result() for f in as_completed(pool.submit(crawl_index_page, base_url, p, seen, cursor, db) for p in batch)]

+        batch_total = sum(results)
+        total += batch_total
+        print(f"📦 Batch complete — {batch_total} new videos (total: {total})")

+        if not QUIT_LOGIC:
+            if batch_total == 0:
+                empty_results += 1

+            if empty_results >= 10:
+                print("\n🛑 No new videos found for 30 consecutive pages. Stopping.")
+                break

+        page += 10

+    cursor.close()
+    db.close()
+    print(f"\n✅ Done! Total new videos: {total}")
+
+# ────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    crawl_all("https://webcamrips.to")
Author	SHA1	Message	Date
Your Name	f5d91117d8	ASS STILL HURT	9 hours ago
Your Name	f90cbd7b53	ASS STILL FUCKING BROKEN MY NIGGA WTFFFFFFF	10 hours ago
Your Name	c51464d68e	MY ASS BROKEN!!!!	10 hours ago