massive update

3 months ago · a65cc43999
parent 55484ebf11
commit a65cc43999
21 changed files with 532 additions and 1350 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@ -32,3 +32,4 @@ uploadlater
 snapchat.json
 /add_to_liked
 /.profiles
+/.vscode
--- a/config.py
+++ b/config.py
@ -1,17 +1,3 @@
-import os
-
-MEDIA_DIRECTORY = "media"
-SNAPCHAT_DIRECTORY = "snapchat"
-INSTAGRAM_DIRECTORY = "instagram"
-
-@property
-def get_instagram_directory():
-    return os.path.join(MEDIA_DIRECTORY, INSTAGRAM_DIRECTORY)
-
-@property
-def snapchat_output_dir():
-    return os.path.join(MEDIA_DIRECTORY, SNAPCHAT_DIRECTORY)
-
 username = "doadmin"
 password = "AVNS_2qeFJuiGRpBQXkJjlA6"
 host = "storysave-do-user-13308724-0.c.db.ondigitalocean.com"
--- a/db_normalizer.py
+++ b/db_normalizer.py
@ -1,5 +1,5 @@
 import os
-from funcs import calculate_file_hash, get_media_dimensions, get_media_type, generate_phash
+from funcs import calculate_file_hash, get_media_dimensions, generate_phash
 import config

 # --- Configuration & Constants ---
@ -54,8 +54,11 @@ def update_dimensions(cursor, db, obj_storage):
            obj_storage.DownloadFile(storage_path=server_path, download_path=CACHE_DIR)
        
        # Optionally, you could get the media type if needed:
-        media_type = get_media_type(local_file)
        width, height = get_media_dimensions(local_file)
+
+        if width == 0 or height == 0:
+            print(f"Error getting dimensions for {media_url}")
+            continue
        
        cursor.execute("UPDATE media SET width = %s, height = %s WHERE id = %s;", (width, height, record_id))
        db.commit()
@ -103,6 +106,31 @@ def update_phash(cursor, db, obj_storage):
        db.commit()
        print(f"[{idx}/{total}] Processed record {record_id} with pHash: {phash}")

+def update_user_ids(cursor, db):
+    cursor.execute("SELECT DISTINCT username FROM media WHERE user_id IS NULL AND platform = 'instagram';")
+    usernames = [username[0] for username in cursor.fetchall()]
+    total = len(usernames)
+    print(f"Found {total} usernames to process for user_id updating.")
+
+    for idx, username in enumerate(usernames, start=1):
+        print(f"[{idx}/{total}] Username: {username}")
+
+        cursor.execute("SELECT DISTINCT user_id FROM media WHERE username = %s AND user_id IS NOT NULL;", [username])
+        possible_user_ids = [user_id for user_id, in cursor.fetchall()]
+        
+        if len(possible_user_ids) == 0:
+            print(f"No user_id found for {username}")
+            continue
+
+        if len(possible_user_ids) > 1:
+            print(f"Multiple user_ids found for {username}: {possible_user_ids}")
+            continue
+        
+        user_id = possible_user_ids[0]
+        cursor.execute("UPDATE media SET user_id = %s WHERE username = %s AND user_id IS NULL;", [user_id, username])
+        db.commit()
+        print(f"[{idx}/{total}] Updated user_id for {username}, Rows affected: {cursor.rowcount}")
+
 def main():
    obj_storage = config.get_storage()
    db, cursor = config.gen_connection()
@ -111,6 +139,7 @@ def main():
    update_dimensions(cursor, db, obj_storage)
    update_file_size(cursor, db, obj_storage)
    update_phash(cursor, db, obj_storage)
+    update_user_ids(cursor, db)

 if __name__ == '__main__':
    main()
--- a/funcs.py
+++ b/funcs.py
@ -107,6 +107,7 @@ def compare_images(image_path1, image_path2):
 def download_file(url, filePath):
    try:
        if os.path.exists(filePath):
+            print(f"File already exists: {filePath}")
            return filePath

        if not url:
@ -198,4 +199,15 @@ def calculate_file_hash(file_path, hash_func='sha256'):
        while chunk:
            h.update(chunk)
            chunk = file.read(8192)
-    return h.hexdigest()
+    return h.hexdigest()
+
+def files_are_identical(file1, file2):
+    """Compare two files byte-by-byte."""
+    with open(file1, "rb") as f1, open(file2, "rb") as f2:
+        while True:
+            chunk1 = f1.read(4096)
+            chunk2 = f2.read(4096)
+            if chunk1 != chunk2:
+                return False
+            if not chunk1:  # End of file
+                return True
--- a/key.enc
+++ b/key.enc
@ -1 +0,0 @@
-DH3ucOuYLbJ2Va3lfJPEYQq_6mk_v3R9dnrAYSQHr-Q=
--- a/kick_downloader.py
+++ b/kick_downloader.py
--- a/p.enc
+++ b/p.enc
@ -1 +0,0 @@
-gAAAAABmRUff7c9t9gngWj_2cwvaTBrUDJ_JUyYVUfG-p3SvDV7qOSHddJ4eHADiJeRtJNtY9UxkohSB5I1MmLahAb_hxxwIVA==
--- a/profile_pic.py
+++ b/profile_pic.py
@ -1,20 +1,41 @@
 from storysave_api import get_hd_profile_picture
-import config, funcs, os
+import config, funcs, os, time

+known_phashes = {'e7c51a904b69d366': 'default empty profile picture',
+                 'cb3ce46194c335dc': 'default empty profile picture',
+                 }
+
+known_hashes = {
+    '09c3cf34d4f117d99fa6285f4bfd3a0d888d7ab2cbca665b16097f6b93ca0de6' : 'default empty profile picture',
+    '2b9c0914d8f3f0aa6cf86705df70b7b21e9ca2f9013a346463788e7cebd0158f' : 'default empty profile picture',
+}

 db, cursor = config.gen_connection()

-cursor.execute(f"SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND username IN (SELECT username FROM following WHERE platform = 'instagram');")
+cursor.execute("SELECT DISTINCT username, user_id, favorite FROM following WHERE user_id IS NOT NULL AND platform = 'instagram' ORDER BY favorite DESC;")
 usernames = cursor.fetchall()

-for username, user_id in usernames:
+for username, user_id, favorite in usernames:
    profilepicurl = get_hd_profile_picture(user_id=user_id)
    if not profilepicurl: 
+        print(f'Failed for {username}')
        continue

    filename = os.path.basename(profilepicurl).split('?')[0]
    user_dir = os.path.join('media', 'instagram', 'profile', username)
    filepath = os.path.join(user_dir, filename)

-    funcs.download_file(profilepicurl, filepath)
-    print(f"Downloaded profile picture for {username}.")
+    filepath = funcs.download_file(profilepicurl, filepath)
+    
+    if not filepath:
+        continue
+
+    phash = funcs.generate_phash(filepath)
+    if phash in known_phashes:
+        print(f"Profile picture for {username} is the default empty profile picture.")
+        os.remove(filepath)
+        continue
+    
+    print(f"Downloaded profile picture for {username}.")
+
+    time.sleep(1)
--- a/requirements.txt
+++ b/requirements.txt
@ -18,4 +18,5 @@ tqdm
 webdriver-manager
 moviepy==1.0.3
 instagrapi
-ImageHash
+ImageHash
+watchdog
--- a/snapchat.py
+++ b/snapchat.py
@ -1,153 +0,0 @@
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from bs4 import BeautifulSoup
-import requests
-import json
-
-headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
-
-snap_types = {
-    27 : ['spotlight', 'video'],
-    256 : ['thumbnail', 'image'],
-    400 : ['idk', 'image'],
-    1023 : ['idk', 'image'],
-    1034 : ['downscaled_video', 'video'],
-    1322 : ['idk', 'video'],
-    1325 : ['idk', 'video'],
-}
-
-def get_data(username):
-    url = f"https://www.snapchat.com/add/{username}"
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, "html.parser")
-    data_script = soup.find("script", id="__NEXT_DATA__")
-    if not data_script:
-        print(f"No data found for {username}.")
-        return None
-    data = json.loads(data_script.string)
-    return data
-
-def get_social_medias(data):
-    website_url = None
-    try:
-        website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl']
-    except KeyError:
-        pass
-    return website_url
-
-def get_related_profiles(data):
-    related_profiles = []
-    try:
-        related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles']
-        for profile in related_profiles_data:
-            related_profiles.append(profile['username'])
-    except KeyError:
-        pass
-    return related_profiles
-
-def get_all_users_data(usernames):
-    all_data = {}
-    
-    # Define a helper function for threading
-    def fetch_data(username):
-        return username, get_data(username)
-
-    # Use ThreadPoolExecutor for concurrent fetching
-    with ThreadPoolExecutor() as executor:
-        futures = {executor.submit(fetch_data, username): username for username in usernames}
-
-        for future in as_completed(futures):
-            username = futures[future]
-            try:
-                username, data = future.result()
-                all_data[username] = data
-            except Exception as e:
-                print(f"Error fetching data for {username}: {e}")
-                all_data[username] = None
-
-    return all_data
-
-def parse_stories(stories):
-    parsed_stories = []
-    
-    for story in stories:
-        parsed_story = parse_story(story)
-        parsed_stories.append(parsed_story)
-        
-    return parsed_stories
-
-def get_stories(data):
-    """Extract story list from the JSON data."""
-    try:
-        stories = data['props']['pageProps']['story']['snapList']
-        
-        if not type(stories) == list:
-            return []
-        
-        stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
-        return stories
-    except:
-        return []
-
-def get_highlights(data):
-    """Extract highlights from possible highlight keys in JSON data."""
-    highlights = []
-
-    page_props = data.get('props', {}).get('pageProps', {})
-    possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
-
-    for key in possible_highlight_keys:
-        highlight_data = page_props.get(key, [])
-        if highlight_data:
-            highlights.extend(highlight_data)
-
-    return highlights
-
-def parse_story(story):
-    original_snap_id = story.get('snapId', {}).get('value', '')
-    snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
-    timestamp = story.get('timestampInSec', {}).get('value', '')
-    media_type = story.get('snapMediaType')
-    media_type = 'image' if media_type == 0 else 'video'
-
-    return {
-        "original_snap_id": original_snap_id,
-        "snap_id": get_snap_id(snap_url),
-        "url": snap_url,
-        "timestamp": timestamp,
-        "platform": "snapchat",
-        "type": "story",
-        "username": story.get('username', ''),
-        "media_type": media_type,
-    }
-
-def get_snap_id(url):
-    return url.split('?')[0].split('/')[-1].split('.')[0]
-
-def get_highlight_stories(data):
-    stories = []
-    highlights = get_highlights(data)
- 
-    for highlight in highlights:
-        snap_list = highlight.get('snapList', [])
-  
-        for snap in snap_list:
-            story = parse_story(snap)
-            stories.append(story)
-			
-    return stories
-
-def get_spotlight_metadata(data):
-    """Extract spotlight metadata from JSON data."""
-    try:
-        return data['props']['pageProps']['spotlightStoryMetadata']
-    except KeyError:
-        return []
-
-def get_username(data):
-    """Extract username from JSON data."""
-    try:
-        return data['props']['pageProps']['userProfile']['publicProfileInfo']['username']
-    except KeyError:
-        return None
-    
-
--- a/snapchat_backer.py
+++ b/snapchat_backer.py
@ -1,126 +0,0 @@
-import os
-import json
-from tqdm import tqdm
-
-from funcs import get_files
-from snapchat import get_stories, get_highlights, get_spotlight_metadata, get_username
-
-# import config as altpinsConfig
-import altpinsConfig
-
-def get_data(filepath):
-    try:
-        with open(filepath, 'r', encoding='utf-8') as f:
-            return json.load(f)
-    except:
-        print(f"Error reading {filepath}")
-        return None
-
-def process_story(story, username, story_type, db, cursor):
-    snap_urls = story.get('snapUrls', {})
-    media_url = snap_urls.get('mediaUrl', '').split('?')[0]
-    media_id = media_url.split('/')[-1].split('.')[0].split('?')[-1]
-
-    if media_id in existing_media_ids:
-        return False
-
-    media_url = f"https://cf-st.sc-cdn.net/d/{media_url.split('/')[-1]}"
-
-    media_preview_url = snap_urls.get('mediaPreviewUrl', '').get('value', '').split('?')[0]
-    media_preview_url = f"https://cf-st.sc-cdn.net/d/{media_preview_url.split('/')[-1]}"
-
-
-    timestamp = story.get('timestampInSec', {}).get('value', '')
-    media_type = story.get('snapMediaType')
-    snap_id = story.get('snapId', {}).get('value', '')
-
-
-    query = "INSERT IGNORE INTO snapchat_stories (snapId, mediaUrl, mediaPreviewUrl, timestampInSec, snapMediaType, storyType, username, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
-    cursor.execute(query, (snap_id, media_url, media_preview_url, timestamp, media_type, story_type, username, media_id))
-    db.commit()
-
-    existing_media_ids.add(media_id)
-    
-    print_emoji = '✅' if cursor.rowcount else '❌'
-    print(f"{print_emoji} Inserted story {media_id}")
-
-def process_json(json_path, db, cursor):
-    """
-    Given a path to a JSON file, parse it and insert relevant data
-    into the database.
-    """
-
-    # Load JSON data
-    data = get_data(json_path)
-    username = get_username(data)
-
-    ready_stories = []
-
-    # Insert stories (regular)
-    stories = get_stories(data)
-    for story in stories:
-        story['storyType'] = 'story'
-        ready_stories.append(story)
-
-    # Insert stories (highlights)
-    highlights = get_highlights(data)
-    highlight_stories = [story for highlight in highlights for story in highlight.get('snapList', [])]
-    highlight_stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
-    for story in highlight_stories:
-        story['storyType'] = 'highlight'
-        ready_stories.append(story)
-
-
-    for story in ready_stories:
-        story_type = story.get('storyType')
-        process_story(story, username, story_type, db, cursor)
-
-
-    # Insert spotlight metadata
-    spotlight_metadata = get_spotlight_metadata(data)
-    for story in spotlight_metadata:
-        try:
-            media_id = story['videoMetadata']['contentUrl'].split('/')[-1].split('.')[0].split('?')[-1]
-            deepLinkUrl = story['oneLinkParams']['deepLinkUrl'].split('?')[0]
-        except:
-            continue
-
-        if not all((media_id, deepLinkUrl)):
-            continue
-        
-        if deepLinkUrl in existing_spotlights:
-            continue
-        
-        deepLinkId = deepLinkUrl.split('/')[-1]
-        description = story['description']
-
-        insert_query = "INSERT IGNORE INTO snapchat_metadata (media_id, deepLinkUrl, description, username, deepLinkId) VALUES (%s, %s, %s, %s, %s)"
-        cursor.execute(insert_query, (media_id, deepLinkUrl, description, username, deepLinkId))
-        db.commit()
-
-        existing_spotlights.add(deepLinkUrl)
-        
-        print_emoji = '✅' if cursor.rowcount else '❌'
-        print(f"{print_emoji} Inserted spotlight {media_id}")
-
-    os.remove(json_path)
-
-
-db, cursor = altpinsConfig.gen_connection()
-
-existing_media_ids = []
-cursor.execute("SELECT media_id FROM snapchat_stories WHERE media_id != '';")
-existing_media_ids = {row[0] for row in cursor.fetchall()}
-
-existing_spotlights = []
-cursor.execute("SELECT deepLinkUrl FROM snapchat_metadata;")
-existing_spotlights = {row[0] for row in cursor.fetchall()}
-
-data_dir = 'data'
-files = [f for f in get_files(data_dir) if f.endswith('.json')]
-
-# Wrap the file list with tqdm to show a progress bar
-for filepath in tqdm(files, desc="Processing files", unit="file"):
-    process_json(filepath, db, cursor)
-
-db.close()
--- a/snapchat_master_crawler.py
+++ b/snapchat_master_crawler.py
@ -1,66 +0,0 @@
-from snapchat import get_all_users_data, get_stories, get_highlight_stories, get_social_medias, get_related_profiles
-import os, config
-
-snapchat_directory = "snapchat"
-media_directory = "media"
-temp_directory = ".temp"
-data_directory = "data"
-
-directory = os.path.join(media_directory, snapchat_directory)
-
-def get_snapchat_stories(usernames):
-    usernames = usernames[:5]
-    snapchat_users_data = get_all_users_data(usernames)
-    snapchat_users_data = dict(sorted(snapchat_users_data.items()))
-
-    ready_stories = []
-
-    for username, data in snapchat_users_data.items():
-        print(f"Getting stories for {username}...")
-
-        data = snapchat_users_data.get(username)
-        if not data:
-            print(f"Failed to get data for {username}. Skipping.")
-            continue
-
-        website_url = get_social_medias(data)
-
-        related_profiles = get_related_profiles(data)
-		
-        stories = get_stories(data)
-
-        stories.extend(get_highlight_stories(data))
-
-        for story in stories:
-            snap_id = story['snap_id']
-            url = story['url']
-            timestamp = story['timestamp']
-                        
-            # Determine file extension
-            extension = '.jpg' if story['media_type'] == 'image' else '.mp4'
-            
-            filename = f"{username}~{timestamp}~{snap_id}{extension}"
-            filepath = os.path.join(directory, filename)
-            
-            story['media_url'] = url
-            story['snap_id'] = snap_id
-            story['filepath'] = filepath
-            story['username'] = username
-            story['timestamp'] = timestamp
-            story['original_snap_id'] = story['original_snap_id']
-            
-            ready_stories.append(story)
-
-    # sort ready_stories by timestamp from oldest to newest
-    ready_stories.sort(key=lambda x: x['timestamp'])
-
-    return ready_stories
-
-db, cursor = config.gen_connection()
-
-cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
-usernames = [row[0] for row in cursor.fetchall()]
-
-stories = get_snapchat_stories(usernames)
-
-
--- a/snappy_master.py
+++ b/snappy_master.py
@ -1,243 +0,0 @@
-from snapchat import get_stories, get_highlight_stories, get_all_users_data, parse_stories
-from datetime import datetime
-from uuid import uuid4
-import config
-import funcs
-import cv2
-import os
-import json
-
-UPLOAD_MODE = True
-
-media_directory = "media"
-snapchat_directory = "snapchat"
-temp_directory = ".temp"
-data_directory = "data"
-
-directory = os.path.join(media_directory, snapchat_directory)
-
-os.makedirs(media_directory, exist_ok=True)
-os.makedirs(directory, exist_ok=True)
-os.makedirs(temp_directory, exist_ok=True)
-os.makedirs(data_directory, exist_ok=True)
-
-def find_duplicate_snap(existing_snap_ids, snap_id):
-	return snap_id in existing_snap_ids
-
-def archive_data(data, username):
-	try:
-		current_timestamp = int(datetime.now().timestamp())
-		data_filename = f"{username}~{current_timestamp}.json"
-		data_filepath = os.path.join(data_directory, data_filename)
-		with open(data_filepath, 'w') as f:
-			f.write(json.dumps(data, indent=4))
-	except:
-		print(f"Failed to archive data for {username}.")
-		return False
-
-def get_snapchat_stories(usernames):
-	snapchat_users_data = get_all_users_data(usernames)
-	snapchat_users_data = dict(sorted(snapchat_users_data.items()))
-
-	ready_stories = []
-
-	for username, data in snapchat_users_data.items():
-		print(f"Getting stories for {username}...")
-
-		if not data:
-			print(f"Failed to get data for {username}. Skipping.")
-			continue
-		
-		archive_data(data, username)
-		
-		stories = get_stories(data)
-		stories = parse_stories(stories)
-
-		stories.extend(get_highlight_stories(data))
-
-		for story in stories:
-			snap_id = story['snap_id']
-			url = story['url']
-			timestamp = story['timestamp']
-						
-			# Determine file extension
-			file_exts = {'image': '.jpg', 'video': '.mp4'}
-			extension = file_exts.get(story['media_type'])
-			if not extension:
-				print(f"Failed to determine file extension for {url}. Skipping.")
-				continue
-			
-			filename = f"{username}~{timestamp}~{snap_id}{extension}"
-			filepath = os.path.join(directory, filename)
-			
-			story['media_url'] = url
-			story['snap_id'] = snap_id
-			story['filepath'] = filepath
-			story['username'] = username
-			story['timestamp'] = timestamp
-			story['original_snap_id'] = story['original_snap_id']
-			
-			ready_stories.append(story)
-	
-	ready_stories.sort(key=lambda x: x['timestamp'])
-
-	return ready_stories
-
-def get_snapchat_files():
-	stories = funcs.get_files(directory)
-	stories = [get_media_data(filepath) for filepath in stories]
-	stories = [story for story in stories if story]
-	return stories
-
-def main():
-	print('Initializing snappy...')
-	ready_stories = []
-
-	stories_from_files = get_snapchat_files()
-
-	cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
-	usernames = [row[0] for row in cursor.fetchall()]
-	
-	print(f"Getting stories for {len(usernames)} users...")
-	new_stories = get_snapchat_stories(usernames)
-
-	cleaned_stories = []
-	print("Checking for duplicates...")
-	for story in new_stories:
-		duplicate_snap = find_duplicate_snap(existing_snap_ids, story['snap_id'])
-		if duplicate_snap:
-			print(f"Snap {story['filepath']} already exists in the database. Removing...")
-			continue
-		cleaned_stories.append(story)
-
-	cleaned_stories = download_stories(cleaned_stories)
-
-	ready_stories.extend(cleaned_stories)
-	ready_stories.extend(stories_from_files)
-
-	for story in ready_stories:
-		UploadMedia(story)
-
-def download_stories(stories):
-	downloaded_stories = []
-	for story in stories:
-		filepath = story['filepath']
-		url = story['media_url']
-
-		filepath = funcs.download_file(url, filepath)
-		print(f"Downloaded {os.path.basename(filepath)}")
-
-		if not filepath:
-			continue
-
-		story['hash'] = funcs.calculate_file_hash(filepath)
-		story['size'] = os.path.getsize(filepath)
-		
-		downloaded_stories.append(story)
-
-	return downloaded_stories
-		
-def UploadMedia(media):
-	file_size = media['size']
-	file_hash = media['hash']
-	filepath = media['filepath']
-	filename = os.path.basename(filepath)
-
-	username = media['username']
-	timestamp = media['timestamp']
-	media_type = media['media_type']
-	snap_id = media['snap_id']
-	original_snap_id = media['original_snap_id']
-	thumbnail_url = None
-	phash = None
-
-	duplicate_snap = find_duplicate_snap(existing_snap_ids, media['snap_id'])
-	if duplicate_snap:
-		print(f"Snap {filename} already exists in the database. Removing...")
-		os.remove(filepath)
-		return False
-	
-	post_date = datetime.fromtimestamp(int(timestamp))
-
-	width, height = funcs.get_media_dimensions(filepath)
-
-	duration = funcs.get_video_duration(filepath)
-
-	if media_type == 'image':
-		phash = funcs.generate_phash(filepath)
-	elif media_type == 'video':
-		try:
-			thumb_path = generate_thumbnail(filepath)
-			obj_storage.PutFile(thumb_path, f'thumbnails/{filename}')
-			thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}"
-			phash = funcs.generate_phash(thumb_path)
-			os.remove(thumb_path)
-		except:
-			print('Error generating thumbnail. Skipping...')
-			return False
-
-	server_path = f'media/snaps/{username}/{filename}'
-	file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-	obj_storage.PutFile(filepath, server_path)
-
-	query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-	values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id, file_size)
-
-	cursor.execute(query, values)
-	db.commit()
-	print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-	os.remove(filepath)
-
-	return True
-
-def generate_thumbnail(filepath):
-    thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg')
-    cap = cv2.VideoCapture(filepath)
-    ret, frame = cap.read()
-    cv2.imwrite(thumb_path, frame)
-    cap.release()
-    return thumb_path
-
-def get_media_data(filepath):
-	filename = os.path.basename(filepath)
-	parts = filename.split('~')
-	if len(parts) < 3:
-		return False
-
-	username = parts[0]
-	timestamp = parts[1]
-	snap_id = parts[2]
-	snap_id = os.path.splitext(snap_id)[0]
-
-	file_size = os.path.getsize(filepath)
-	file_hash = funcs.calculate_file_hash(filepath)
-
-	data = {
-		"username": username,
-		"timestamp": timestamp,
-		"filepath": filepath,
-		"snap_id": snap_id,
-		"original_snap_id": None,
-		"media_url": None,
-		"size": file_size,
-		"hash": file_hash
-	}
-
-	return data
-
-if __name__ == '__main__':
-	print('Starting snappy...')
-
-	db, cursor = config.gen_connection()
-	obj_storage = config.get_storage()
-	
-	cursor.execute("SELECT snap_id FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
-	existing_snap_ids = cursor.fetchall()
-
-	existing_snap_ids = {row[0] for row in existing_snap_ids}
-
-	main()
-
-	print("Processing completed.")
--- a/storysave_api.py
+++ b/storysave_api.py
@ -2,20 +2,50 @@ from bs4 import BeautifulSoup
 import requests
 import json

-doc_ids = [7663723823674585, 9539110062771438]
+doc_ids = [7663723823674585, 9539110062771438, 8964418863643891, 9066276850131169]
+active_doc_id = doc_ids[3]

-def get_posts():
-    data = {
-        "variables": '{"id":"57771591453","render_surface":"PROFILE"}',
-        "doc_id": "7663723823674585",
+headers = {
+    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
+}
+
+def get_posts(username):
+
+    url = 'https://www.instagram.com/graphql/query/'
+
+    variables = {
+        "data": {
+            "count": 12,
+            "include_reel_media_seen_timestamp": True,
+            "include_relationship_info": True,
+            "latest_besties_reel_media": True,
+            "latest_reel_media": True
+        },
+        "username": username,
+        "__relay_internal__pv__PolarisIsLoggedInrelayprovider": True,
+        "__relay_internal__pv__PolarisShareSheetV3relayprovider": False
+    }   
+
+    params = {
+        'variables': json.dumps(variables),
+        'doc_id': active_doc_id
    }
-    
-    data = requests.get('https://www.instagram.com/graphql/query', params=data).json()

-    posts = data['data']
-    posts = [post['node'] for post in posts]
+    response = requests.get(url, headers=headers, params=params)

-    return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates']))
+    if response.status_code == 200:
+        try:
+            data = response.json()
+            posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
+            end_cursor = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['page_info']['end_cursor']
+            return posts
+        except (KeyError, TypeError) as e:
+            print(f"Error parsing JSON response: {e}")
+            return None
+    else:
+        print(f"Failed to fetch data. Status code: {response.status_code}")
+        return None
+    

 def get_username_by_user_id(user_id):
    url = 'https://www.instagram.com/graphql/query/'
@ -35,10 +65,6 @@ def get_username_by_user_id(user_id):
        'variables': json.dumps(variables)
    }
    
-    headers = {
-        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
-    }
-    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
@ -57,7 +83,7 @@ def extract_script_tags(username):
    url = f"https://www.instagram.com/{username}/"
    try:
        # Fetch the HTML content of the page
-        response = requests.get(url)
+        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        # Parse the HTML content with BeautifulSoup
@ -122,13 +148,14 @@ def get_profile_data(username):

    user_id = get_user_id(username)
    
-    data = {
-        'variables': '{"id":"' + user_id + '","render_surface":"PROFILE"}',
-        'doc_id': 9539110062771438
+    variables = {
+        "id": user_id,
+        "render_surface": "PROFILE"
    }
-
-    headers = {
-        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
+    
+    data = {
+        'variables': json.dumps(variables),
+        'doc_id': active_doc_id
    }

    response = requests.post(url, headers=headers, data=data)
@ -148,9 +175,45 @@ def get_hd_profile_picture(username = None, user_id = None):
        if not user_id:
            return None
    
+    variables = {
+        "id": user_id,
+        "render_surface": "PROFILE"
+    }
+    
+    data = {
+        'variables': json.dumps(variables),
+        'doc_id': '9539110062771438'
+    }
+
    data = {
-        'variables': '{"id":"' + user_id +' ","render_surface":"PROFILE"}',
-        'doc_id': 9539110062771438
+        'av': '17841401225494803',
+        '__d': 'www',
+        '__user': 0,
+        '__a': 1,
+        '__req': 4,
+        '__hs': '20231.HYP%3Ainstagram_web_pkg.2.1...1',
+        'dpr': 2,
+        '__ccg': 'GOOD',
+        '__rev': 1023131892,
+        '__s': 'g7nwhv%3Ad6c29x%3Aaag0uk',
+        '__hsi': 7507576467274562470,
+        '__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1EE2Cw8G11wBw5Zx62G3i1ywOwa90Fw4Hw9O0Lbwae4UaEW2G0AEco5G0zEnwhE3Mw51wLyES1Twoob82ZwrUdUbGwmk0KU6O1FwlE6PhA6bwg8rAwHxW1oxe6UaU3cyUrw4rxO2C',
+        '__csr': 'gg84YIJgSyn2Ob7oDs-h7qhmToSsDl_8uAAaBigC8yQiaKJuumUkyybh4i9qBFaiayqBAVKczV4cBjhHUbqxeq3q9Suuum9zkEjAy9Ua8ymi45DUG7EgzoeUfKm2ym6UblG00kXK0jUE3Ug3dwh24DgAi1mo0AyaDw4WwiU1Y80bCm12g2Jwww5OCkE18Wc0mmqA4pU22wCw1Ucw06TW0csw7Gw',
+        '__hsdp': 'l2DMCyPBdbclSEgBiHWhqWiRV5kKKyoFtoYABrqafK699onQtK1fg96qiK5EZcIk0A5bwau0xVEhwAyQElwik0qi1cwam0m20ou06L82Ew56w4-w8O1Xw75wnoc85i',
+        '__hblp': '08K19xO0V89815oaEtwUCwhoOq4opxG5o8oS4Vk4U9o9o7C0zof82Nwg8uG0jV0Hweu1OwsE13o1ZU11UlwVwko2wwfy0G89E17U11EdU2cwuU5C0Yp8660Eo5idz8vxucw',
+        '__comet_req': 7,
+        'fb_dtsg': 'NAfvHXND-ELXKZFgyrogJIig1C4j6gRiNUaBBBomMZ1mNa-FvpKl6bw%3A17854231342124680%3A1731941013',
+        'jazoest': 26187,
+        'lsd': 'NFD0t4uLm10VsaniLLl9nv',
+        '__spin_r': 1023131892,
+        '__spin_b': 'trunk',
+        '__spin_t': 1747993861,
+        '__crn': 'comet.igweb.PolarisProfilePostsTabRoute',
+        'fb_api_caller_class': 'RelayModern',
+        'fb_api_req_friendly_name': 'PolarisProfileNoteBubbleQuery',
+        'variables': '%7B%22user_id%22%3A%228309584937%22%7D',
+        'server_timestamps': True,
+        'doc_id': 8698637896906070
    }

    try:
@ -166,4 +229,83 @@ def get_hd_profile_picture(username = None, user_id = None):
    except:
        hd_profile_pic = None
    
-    return hd_profile_pic
+    return hd_profile_pic
+
+
+def get_user_id_by_username(username):
+    url = 'https://www.instagram.com/graphql/query'
+
+    variables = {
+        "data": {
+            "context": "blended",
+            "include_reel": True,
+            "query": username,
+            "rank_token": "",
+            "search_surface": "web_top_search"
+        },
+        "hasQuery": True
+    }
+
+    data = {
+        'variables': json.dumps(variables),
+        'doc_id': active_doc_id
+    }
+
+    response = requests.post(url, headers=headers, data=data)
+
+    if response.status_code == 200:
+        json_data = response.json()
+
+        users = json_data['data']['xdt_api__v1__fbsearch__topsearch_connection']['users']
+
+        for user in users:
+            user_data = user['user']
+            if user_data['username'] == username:
+                return user_data['pk']
+    else:
+        print(f"Failed to fetch data. Status code: {response.status_code}")
+        return None
+
+def get_user_id_api(username):
+    url = f"https://www.instagram.com/api/v1/users/web_profile_info/?username={username}"
+    headers['referer'] = f"https://www.instagram.com/{username}/"
+    headers['x-ig-app-id'] = '936619743392459'
+    
+    response = requests.get(url, headers=headers)
+
+    if response.status_code == 200:
+        try:
+            data = response.json()
+            user_id = data['data']['user']['id']
+            return user_id
+        except (KeyError, TypeError) as e:
+            print(f"Error parsing JSON response: {e}")
+            return None
+    else:
+        print(f"Failed to fetch data. Status code: {response.status_code}")
+        return None
+
+def get_highest_quality_image(image_versions):
+    max_res = 0
+    max_res_url = None
+    for image in image_versions:
+        if image['width'] > max_res:
+            max_res = image['width']
+            max_res_url = image['url']
+    return max_res_url
+
+def parse_post(post):
+    medias = post['node']['carousel_media']
+    media_items = []
+    for media in medias:
+        media_item = {}
+
+        image_versions = media['image_versions2']['candidates']
+        
+        media_item['image_url'] = get_highest_quality_image(image_versions)
+        media_item['pk'] = media['pk']
+        media_item['media_type'] = media['media_type']
+
+        media_items.append(media_item)
+
+    return media_items
--- a/storysave_dump.py
+++ b/storysave_dump.py
@ -1,13 +1,15 @@
-from datetime import datetime
+from datetime import datetime, timedelta
 from uuid import uuid4
-import funcs
 import config
+import funcs
+import json
 import cv2
 import os
 import re

 temp_directory = ".temp"
-directory = 'media/instagram/'
+directory = 'media'
+os.makedirs(temp_directory, exist_ok=True)

 media_types = {
    'stories' : 'story',
@ -15,39 +17,42 @@ media_types = {
    'profile' : 'profile'
 }

-os.makedirs(temp_directory, exist_ok=True)
+UPLOAD_CUSTOM = False
+CACHE_FILE = os.path.join(temp_directory, 'existing_media_ids.json')
+CACHE_TTL = timedelta(hours=48)

 def UploadMedia(media):
    username = media['username']
    user_id = media['user_id']
    filepath = media['filepath']
    platform = media['platform']
-
    media_id = media['media_id']
    timestamp = media['timestamp']
    highlight_id = media['highlight_id']
    post_type = media['post_type']
-
-    file_size = os.path.getsize(filepath)
    thumbnail_url = None
    phash = None
    
-    if media_id and media_id in existing_files:
+    if media_id and media_id in existing_media_ids:
        print('Duplicate file detected. Removing...')
        os.remove(filepath)
        return True
-        
+    
+    file_size = os.path.getsize(filepath)
    filename = os.path.basename(filepath)
    file_extension = os.path.splitext(filename)[1].lower()
+    file_hash = funcs.calculate_file_hash(filepath)
+
+    if not user_id:
+        user_id = get_user_id(username)

    media_type = funcs.get_media_type(filename)
    if not media_type:
        print(f'Error determining media type for {filename}. Skipping...')
        return False

-    file_hash = funcs.calculate_file_hash(filepath)
-
-    post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
+    try:post_date = datetime.fromtimestamp(int(timestamp))
+    except:post_date = datetime.fromtimestamp(os.path.getctime(filepath))

    width, height = funcs.get_media_dimensions(filepath)
    if 0 in (width, height):
@ -62,21 +67,19 @@ def UploadMedia(media):
        try:
            thumb_path = generate_thumbnail(filepath)
            obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
-            phash = funcs.generate_phash(thumb_path)           
+            thumbnail_url = f"https://cdn.altpins.com/thumbnails/{file_hash}.jpg"
+            phash = funcs.generate_phash(thumb_path)
            os.remove(thumb_path)
        except Exception as e:
            print(f'Error generating thumbnail: {e}. Skipping...')
            return False

-    if media_id:
-        newFilename = f'{media_id}{file_extension}'
-    else:
-        newFilename = f'{file_hash}{file_extension}'
+    custom_filename = media_id if media_id else file_hash
+    newFilename = f'{custom_filename}{file_extension}'

    server_path = f'media/{post_type}/{username}/{newFilename}'
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
+    file_url = f"https://cdn.altpins.com/{server_path}"
+   
    obj_storage.PutFile(filepath, server_path)

    if highlight_id:
@ -84,18 +87,25 @@ def UploadMedia(media):
        newDB.commit()
        print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')

+
    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)

    newCursor.execute(query, values)
    newDB.commit()

-    print(f'[{newCursor.rowcount}] records updated.\nFile: {filename}\nURL: {file_url}')
+    correct_emoji = '✅' if newCursor.rowcount > 0 else '❌'
+    print(f'{correct_emoji} added {filename} to database')
+    print(f'File: {filename}')
+    print(f'URL: {file_url}')
+    print(f'Pin URL: https://altpins.com/pin/{newCursor.lastrowid}')
    print("="*100)

    os.remove(filepath)

-    return True
+    existing_media_ids.add(media_id)
+
+    return newCursor.lastrowid

 def generate_thumbnail(filepath):
    thumb_path = os.path.join(temp_directory, f'{uuid4()}.jpg')
@ -114,8 +124,9 @@ def get_user_id(username):

 def get_media_data(filepath):
    filename = os.path.basename(filepath)
+
    parts = filename.split('~')
-    if len(parts) < 4:
+    if len(parts) != 4:
        return False

    username = parts[0]
@ -126,7 +137,9 @@ def get_media_data(filepath):

    highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None
    
-    if not user_id.isdigit():
+    if user_id.isdigit():
+        user_id = int(user_id)
+    else:
        user_id = get_user_id(username)

    if media_id.isdigit():
@ -138,52 +151,46 @@ def get_media_data(filepath):

    return data

-def get_media(folder_path):
+def get_media():
    medias = []
+    failed_medias = []

    for media_type, post_type in media_types.items():
-        folder_path = os.path.join(directory, media_type)
+        media_folder_path = os.path.join(directory, media_type)

-        if not os.path.exists(folder_path):
+        if not os.path.exists(media_folder_path):
            continue

-        all_files = funcs.get_files(folder_path)
+        all_files = funcs.get_files(media_folder_path)
        for filepath in all_files:
            data = get_media_data(filepath)
            if not data:
+                failed_medias.append(filepath)
                continue

            data['post_type'] = post_type
            medias.append(data)
    
-    return medias
+    return medias, failed_medias

-def get_custom_media():
+def get_custom_media(failed_medias):
    medias = []

-    folder_path = 'media/instagram'
-    platform = 'instagram'
-
    for media_type, post_type in media_types.items():
        folder_path = os.path.join(directory, media_type)

        user_dirs = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))]
-        for user_dir in user_dirs:
-            user_folder_path = os.path.join(folder_path, user_dir)
+        for username in user_dirs:
+            user_folder_path = os.path.join(folder_path, username)

-            if not os.path.exists(user_folder_path):
-                continue
-
-            username = user_dir
+            for filename in os.listdir(user_folder_path):
+                if filename.startswith('.'):
+                    continue

-            files = os.listdir(user_folder_path)
-        
-            for filename in files:
                filepath = os.path.join(user_folder_path, filename)
-
-                if filename.startswith('.'):
+                if not filepath in failed_medias:
                    continue
-                    
+
                user_id = get_user_id(username)
                timestamp = int(os.path.getctime(filepath))
                media_id = os.path.splitext(filename)[0]
@ -201,7 +208,7 @@ def get_custom_media():
                    "media_id": media_id,
                    "user_id": user_id,
                    "filepath": filepath,
-                    "platform": platform,
+                    "platform": 'instagram',
                    "highlight_id": None,
                    "post_type": post_type
                }
@ -209,20 +216,40 @@ def get_custom_media():
                medias.append(data)

    return medias
+
+def dump_instagram():
+    medias, failed_medias = get_media()
+    medias = clean_dupes(medias)
+    failed_medias = get_custom_media(failed_medias)
    
-def dump_instagram(folder_path):
-    medias = get_media(folder_path)
-    # medias.extend(get_custom_media())
-    
-    if cleanup_dupe_stories(medias):
-        medias = get_media(folder_path)
-    
+    medias.sort(key=lambda x: (x['username'].lower(), x['timestamp']))
+
+    new_user_ids = {}
+    for media in medias:
+        if media['user_id']:
+            user_id = media['user_id']
+            username = media['username']
+            if username not in existing_users:
+                existing_users[username] = user_id
+                new_user_ids[username] = user_id
+
    for media in medias:
-        UploadMedia(media)
-        existing_files.append(media['media_id'])
+        user_id = media['user_id']
+        username = media['username']
+        if user_id is None and username in new_user_ids:
+            media['user_id'] = new_user_ids[username]

-def cleanup_dupe_stories(medias):
+    for media in medias:
+        pinid = UploadMedia(media)
+        existing_media_ids.add(media['media_id'])
+
+    if UPLOAD_CUSTOM:
+        for media in failed_medias:
+            pinid = UploadMedia(media)
+
+def clean_dupes(medias):
    removed_count = 0
+    new_medias = []
    for media in medias:
        media_id = media['media_id']
        filepath = media['filepath']
@ -231,16 +258,70 @@ def cleanup_dupe_stories(medias):
            print(f'Invalid media_id for file {filepath}. Skipping...')
            continue
        
-        # Check if media_id is in existing_files OR if filepath contains any '(number)'
-        if media_id in existing_files or re.search(r'\(\d+\)', filepath):
+        # Check if media_id is in existing_media_ids OR if filepath contains any '(number)'
+        if media_id in existing_media_ids or re.search(r'\(\d+\)', filepath):
            removed_count += 1
            print(f'Found duplicate file {filepath}. Removing...')
            os.remove(filepath)
            continue

+        new_medias.append(media)
+
    print(f'Removed {removed_count} duplicate files.')
-    return removed_count
+    return new_medias
+
+def get_cached_data():
+    if not os.path.exists(CACHE_FILE):
+        print('No cache file found. Generating new cache…')
+        return None, None
+
+    try:
+        with open(CACHE_FILE, 'r') as f:
+            cache_data = json.load(f)
+
+        timestamp = datetime.fromisoformat(cache_data.get('timestamp', ''))
+        if datetime.now() - timestamp < CACHE_TTL:
+            print('Using cached data…')
+            return set(tuple(x) for x in cache_data.get('existing_media_ids', [])), cache_data.get('existing_users', {})
+    except Exception as e:
+        print(f"Cache read error: {e}")
+
+    return None, None
+
+def save_cached_data(existing_media_ids, existing_users):
+    with open(CACHE_FILE, 'w') as f:
+        json.dump({'timestamp': datetime.now().isoformat(), 'existing_media_ids': list(existing_media_ids), 'existing_users': existing_users}, f)
+
+def get_existing_medias(newCursor):
+    existing_media_ids, existing_users = get_cached_data()
+
+    if existing_media_ids and existing_users:
+        newest_id = max(existing_media_ids, key=lambda x: x[0])[0]
+
+        existing_media_ids = {image[1] for image in existing_media_ids}
+
+        newCursor.execute("SELECT id, media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram' AND status = 'public' AND id > %s ORDER BY id DESC", (newest_id,))
+        new_media_ids = {image[1] for image in newCursor.fetchall()}
+
+        for media_id in new_media_ids:
+            existing_media_ids.add(media_id)
+        
+        return existing_media_ids, existing_users
+        
+    print('Getting existing files and users...')
+    newCursor.execute("SELECT id, media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram' AND status = 'public';")
+    existing_media_ids = {image for image in newCursor.fetchall()}
+
+    print('Getting existing users...')
+    newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'")
+    existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
+
+    cache_file = os.path.join(temp_directory, 'existing_media_ids.json')
+    with open(cache_file, 'w') as f:
+        json.dump({'timestamp': datetime.now().isoformat(), 'existing_media_ids': list(existing_media_ids), 'existing_users': existing_users}, f)
    
+    return existing_media_ids, existing_users
+
 if __name__ == '__main__':
    print('Starting processing...')

@ -252,19 +333,11 @@ if __name__ == '__main__':

    obj_storage = config.get_storage()

-    print('Getting existing files and users...')
-    newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL AND platform = 'instagram'")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    print('Getting existing users...')
-    newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL AND platform = 'instagram'")
-    existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
+    existing_media_ids, existing_users = get_existing_medias(newCursor)
    
-    dump_instagram(directory)
+    dump_instagram()

    print("Processing completed.")

-    newDB.close()
-
    for mediatype, _ in media_types.items():
        funcs.clean_empty_folders(os.path.join(directory, mediatype))
--- a/storysave_dump_custom.py
+++ b/storysave_dump_custom.py
@ -1,147 +0,0 @@
-from datetime import datetime
-from uuid import uuid4
-import funcs
-import config
-import cv2
-import os
-
-
-media_directory = "media/ready_for_upload"
-platform = "instagram"
-
-working_directory = os.path.join(media_directory, platform)
-
-def UploadMedia(media):
-    username = media['username']
-    user_id = media['user_id']
-    filepath = media['filepath']
-    platform = media['platform']
-
-    media_id = media['media_id']
-
-    thumbnail_url = None
-    phash = None
-        
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-    if not media_type:
-        print(f'Error determining media type for {filename}. Skipping...')
-        return False
-
-    post_type = funcs.determine_post_type(filepath)
-    if not post_type:
-        print(f'Error determining post type for {filename}. Skipping...')
-        return False
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    post_date = datetime.now()
-
-    width, height = funcs.get_media_dimensions(filepath)
-    
-    duration = funcs.get_video_duration(filepath)
-
-    if media_type == 'image':
-        phash = funcs.generate_phash(filepath)
-    elif media_type == 'video':
-        try:
-            thumb_path = generate_thumbnail(filepath)
-            obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
-            phash = funcs.generate_phash(thumb_path)
-            os.remove(thumb_path)
-        except Exception as e:
-            print(f'Error generating thumbnail. Skipping... {e}')
-            return False
-
-    newFilename = f'{file_hash}{file_extension}'
-    server_path = f'media/{post_type}/{username}/{newFilename}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    obj_storage.PutFile(filepath, server_path) # slow as fuck
-
-    post_type = 'story' if post_type == 'stories' else 'post'
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, media_id)
-
-    newCursor.execute(query, values) # slower
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
-
-def generate_thumbnail(filepath):
-    thumb_path = f'.temp/{uuid4()}.jpg'
-    cap = cv2.VideoCapture(filepath)
-    ret, frame = cap.read()
-    cv2.imwrite(thumb_path, frame)
-    cap.release()
-    return thumb_path
-
-def get_user_id(username):
-    username = username.lower()
-    if username in existing_users:
-        return existing_users[username]
-    
-    return None
-
-def get_media(folder_path):
-    medias = []
-        
-    user_folders = os.listdir(folder_path)
-    for user_folder in user_folders:
-        user_folder_path = os.path.join(folder_path, user_folder)
-
-        if not os.path.isdir(user_folder_path):
-            continue
-
-        files = os.listdir(user_folder_path)
-        for filename in files:
-            filepath = os.path.join(folder_path, user_folder, filename)
-
-            # skip file if its hidden
-            if filename.startswith('.'):
-                continue
-            
-            try:
-                media_id = filename.split('.')[0]
-                media_id = int(media_id)
-            except:
-                media_id = None
-                
-            media = {
-                'username': user_folder,
-                'filepath': filepath,
-                'user_id': get_user_id(user_folder),
-                'media_id': media_id,
-                'platform': platform
-            }
-            
-            medias.append(media)
-                
-    return medias
-
-def dump_instagram(folder_path):
-    medias = get_media(folder_path)
-
-    for media in medias:
-        UploadMedia(media)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = config.get_storage()
-
-    newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
-    existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
-    
-    dump_instagram(working_directory)
-
-    print("Processing completed.")
--- a/storysave_scanner.py
+++ b/storysave_scanner.py
@ -6,7 +6,6 @@ import os
 from funcs import get_media_dimensions

 media_dir = 'media'
-output_dir = 'instagram'
 stories_dir = 'stories'
 posts_dir = 'posts'

@ -75,8 +74,6 @@ class DownloadHandler(FileSystemEventHandler):
        if not os.path.exists(file_path):
            return

-        print(f'Moving {file}...')
-
        post_type = determine_post_type(file_path)
        if post_type == 'posts':
            media_type_dir = posts_dir
@ -86,9 +83,15 @@ class DownloadHandler(FileSystemEventHandler):
            print(f"Could not determine post type for {file}. Skipping...")
            return

-        outputPath = os.path.join(media_dir, output_dir, media_type_dir, file)
+        outputPath = os.path.join(media_dir, media_type_dir, file)
+
+        if os.path.exists(outputPath):
+            print(f"File already exists {outputPath}. Removing...")
+            os.remove(file_path)
+            return

        shutil.move(file_path, outputPath)
+        print(f"Moved {file_path} to {outputPath}")

    def on_created(self, event):
        if not event.is_directory and 'crdownload' not in event.src_path:
@ -110,4 +113,4 @@ if __name__ == "__main__":
            time.sleep(1)  # Add a 1-second sleep to reduce CPU usage
    except KeyboardInterrupt:
        observer.stop()
-    observer.join()
+    observer.join()
--- a/tiktok_dump.py
+++ b/tiktok_dump.py
@ -1,140 +0,0 @@
-from datetime import datetime
-from uuid import uuid4
-import funcs
-import config
-import cv2
-import os
-
-directory = 'processed_tiktoks'
-
-def UploadMedia(media):
-    platform = 'TikTok'
-    username = media['username']
-    filepath = media['filepath']
-    file_size = os.path.getsize(filepath)
-    thumbnail_url = None
-    phash = None
-    
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-    if not media_type:
-        print(f'Error determining media type for {filename}. Skipping...')
-        return False
-
-    post_type = funcs.determine_post_type(filepath)
-    if not post_type:
-        print(f'Error determining post type for {filename}. Skipping...')
-        return False
-
-    file_hash = funcs.calculate_file_hash(filepath)
-    if file_hash in existing_hashes:
-        print(f'File {filename} already exists. Skipping...')
-        return False
-
-    post_date = datetime.now()
-
-    width, height = funcs.get_media_dimensions(filepath)
-    
-    duration = funcs.get_video_duration(filepath)
-
-    if media_type == 'image':
-        phash = funcs.generate_phash(filepath)
-    elif media_type == 'video':
-        try:
-            thumb_path = generate_thumbnail(filepath)
-            obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
-            phash = funcs.generate_phash(thumb_path)
-            os.remove(thumb_path)
-        except:
-            print('Error generating thumbnail. Skipping...')
-            return False
-
-    newFilename = f'{file_hash}{file_extension}'
-    server_path = f'media/tiktoks/{username}/{newFilename}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    obj_storage.PutFile(filepath, server_path) # slow as fuck
-
-    post_type = 'story' if post_type == 'stories' else 'post'
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
-
-    newCursor.execute(query, values) # slower
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
-
-def generate_thumbnail(filepath):
-    thumb_path = f'temp/{uuid4()}.jpg'
-    cap = cv2.VideoCapture(filepath)
-    ret, frame = cap.read()
-    cv2.imwrite(thumb_path, frame)
-    cap.release()
-    return thumb_path
-
-def get_media_data(filepath):
-    filename = os.path.basename(filepath)
-    parts = filename.split('~')
-
-    if len(parts) == 3:
-        username, title, tiktok_id = parts
-    elif len(parts) == 2:
-        username, title = parts
-        tiktok_id = None
-    else:
-        return False
-
-    data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
-
-    return data
-
-def get_media(folder_path):
-    medias = []
-    
-    users = os.listdir(folder_path)
-    for user in users:
-        user_folder = os.path.join(folder_path, user)
-        if not os.path.isdir(user_folder):
-            print(f"Skipping {user}")
-            continue
-
-        files = os.listdir(user_folder)
-        for filename in files:
-            filepath = os.path.join(user_folder, filename)
-            
-            data = get_media_data(filepath)
-            if data:
-                medias.append(data)
-    
-    return medias
-
-def dump_instagram(folder_path):
-    medias = get_media(folder_path)
-
-    for media in medias:
-        UploadMedia(media)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-    
-    if not os.listdir(directory):
-        print('No files to process. Exiting...')
-        exit()
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = config.get_storage()
-
-    newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
-    existing_hashes = [row[0] for row in newCursor.fetchall()]
-
-    dump_instagram(directory)
-
-    print("Processing completed.")
--- a/twitch_downloader.py
+++ b/twitch_downloader.py
@ -1,123 +0,0 @@
-from selenium.webdriver.common.by import By
-import undetected_chromedriver as uc
-import requests
-import base64
-import re
-import os
-
-def format_url(url):
-    clean_url = re.sub(r'%[0-9A-F]{2}', '', url)
-    return clean_url
-
-def encode_offset(offset_num):
-    offset_base64 = str(offset_num).encode('utf-8')
-    offset_base64 = base64.b64encode(offset_base64).decode('utf-8')
-    return offset_base64
-
-def get_clips(username):
-    url = 'https://gql.twitch.tv/gql'
-    
-    offset_num = 20
-    offset_base64 = encode_offset(offset_num)
-    
-    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
-
-    headers = {
-        'client-id': 'kimne78kx3ncx6brgo4mv6wki5h1ko',
-        'Content-Type': 'text/plain;charset=UTF-8',
-        'User-Agent': user_agent
-    }
-
-    data = {
-        "operationName":"ClipsCards__User",
-        "variables":{"login":username,"limit":100,},
-        "extensions":{"persistedQuery":{"version":1,"sha256Hash":"4eb8f85fc41a36c481d809e8e99b2a32127fdb7647c336d27743ec4a88c4ea44"}}
-    }
-
-    response = requests.post(url, headers=headers, json=data)
-
-    clips = response.json()
-
-    clips = clips['data']['user']['clips']['edges']
-
-    cleaned_clips = parse_clips(clips)
-
-    return cleaned_clips
-
-    
-def parse_clips(clips):
-    """
-    clips is a list of dictionaries
-    """
-
-    cleaned_clips = []
-    for clip in clips:
-        clip = clip['node']
-
-        clip_id = clip['id']
-        clip_url = clip['url']
-        clip_title = clip['title']
-        clip_view_count = clip['viewCount']
-        clip_duration = clip['durationSeconds']
-
-        cleaned_clip = {
-            'id': clip_id,
-            'url': clip_url,
-            'title': clip_title,
-            'views': clip_view_count,
-            'duration': clip_duration
-        }
-    
-        cleaned_clips.append(cleaned_clip)
-
-    return cleaned_clips
-    
-def get_video_url(video_url, driver):
-    driver.get(video_url)
-
-    # Get the video element
-    video = driver.find_element(By.TAG_NAME, 'video')
-
-    # Get the video source
-    video_src = video.get_attribute('src')
-
-    return video_src
-
-def download_video(video_url, filepath):
-    if os.path.exists(filepath):
-        return filepath
-    
-    video = requests.get(video_url)
-
-    # Download in chunks
-    with open(filepath, 'wb') as f:
-        for chunk in video.iter_content(chunk_size=1024):
-            f.write(chunk)
-
-    return filepath
-
-
-# Set up an undetected Chrome driver in headless mode
-opts = uc.ChromeOptions()
-opts.add_argument("--headless")
-opts.add_argument("--window-size=1920,1080")
-
-driver = uc.Chrome(use_subprocess=True, options=opts)
-
-username = 'didicandy666'
-clips = get_clips(username)
-
-for clip in clips:
-    clip_url = clip['clip_url']
-
-    filename = f"{clip['id']}.mp4"
-    filepath = os.path.join('clips', filename)
-    
-    if os.path.exists(filepath):
-        print(f"Already downloaded {filename}")
-        continue
-
-    video_url = get_video_url(clip_url, driver)
-
-    download_video(video_url, filepath)
-    print(f"Downloaded {filename}")
--- a/webdriver_instagram_api.py
+++ b/webdriver_instagram_api.py
@ -0,0 +1,143 @@
+import os
+import time
+import requests
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+
+# --- Configuration ---
+USERNAME = "maorshabakov"       # your Instagram username
+PASSWORD = "PeyxCU%MD*Zq9p"       # your Instagram password
+TARGET_USER = "cata.leyah"  # the username of the profile to scrape
+DOWNLOAD_DIR = "downloads"       # directory to save media
+SCROLL_PAUSE_TIME = 2            # seconds to wait after each scroll
+
+# --- Helper functions ---
+def login_instagram(driver, username, password):
+    driver.get("https://www.instagram.com/accounts/login/")
+    time.sleep(3)  # wait for the login page to load
+
+    # Accept cookies if prompted (may need to adjust for your region)
+    try:
+        accept_button = driver.find_element(By.XPATH, "//button[text()='Allow all cookies']")
+        accept_button.click()
+        time.sleep(2)
+    except Exception:
+        pass
+
+    # check if already logged in by checking if the current url has been redirected to the home page
+    if driver.current_url == "https://www.instagram.com/":
+        print("Already logged in.")
+        return
+
+    # Enter username and password
+    username_input = driver.find_element(By.NAME, "username")
+    password_input = driver.find_element(By.NAME, "password")
+    username_input.send_keys(username)
+    password_input.send_keys(password)
+    password_input.send_keys(Keys.RETURN)
+    time.sleep(5)  # wait for login to complete
+
+def scroll_to_load_posts(driver, post_count=12):
+    post_links = dict()
+
+    last_height = driver.execute_script("return document.body.scrollHeight")
+    while True:
+        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+        time.sleep(SCROLL_PAUSE_TIME)
+        new_height = driver.execute_script("return document.body.scrollHeight")
+
+        new_posts = get_post_links(driver)
+        for link in new_posts:
+            if link not in post_links:
+                post_links[link] = True
+        
+        if len(post_links) >= post_count:
+            break
+        
+        if new_height == last_height:
+            break
+        last_height = new_height
+
+def get_post_links(driver):
+    # Find all post links on the profile page.
+    # Instagram posts are links with hrefs that contain '/p/'
+    post_elements = driver.find_elements(By.XPATH, "//a[contains(@href, '/p/')]")
+    links = [elem.get_attribute("href") for elem in post_elements]
+    # Remove duplicates
+    return list(set(links))
+
+def download_media(url, download_folder, filename):
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        filepath = os.path.join(download_folder, filename)
+        with open(filepath, 'wb') as f:
+            for chunk in response.iter_content(1024):
+                f.write(chunk)
+        print(f"Downloaded: {filename}")
+    else:
+        print(f"Failed to download: {url}")
+
+def extract_media_url(driver):
+    # Try to get video first
+    try:
+        video = driver.find_element(By.TAG_NAME, "video")
+        media_url = video.get_attribute("src")
+        if media_url:
+            return media_url, "mp4"
+    except Exception:
+        pass
+
+    # Fallback to image extraction
+    try:
+        # Sometimes the post image is inside a div with role="button"
+        image = driver.find_element(By.XPATH, "//img[contains(@src, 'scontent')]")
+        media_url = image.get_attribute("src")
+        if media_url:
+            return media_url, "jpg"
+    except Exception:
+        pass
+
+    return None, None
+
+# --- Main script ---
+def main():
+    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+
+    chrome_options = Options()
+    chrome_options.add_argument("--user-data-dir=.profiles/thenigga")
+    driver = webdriver.Chrome(options=chrome_options)
+    driver.maximize_window()
+
+    try:
+        # Log in to Instagram
+        login_instagram(driver, USERNAME, PASSWORD)
+
+        # Navigate to the target user's profile
+        driver.get(f"https://www.instagram.com/{TARGET_USER}/")
+        time.sleep(5)  # let the page load
+
+        # Scroll down to load all posts
+        scroll_to_load_posts(driver)
+
+        # Gather all post links from the profile page
+        post_links = get_post_links(driver)
+        print(f"Found {len(post_links)} posts.")
+
+        # Process each post
+        for idx, post_link in enumerate(post_links):
+            driver.get(post_link)
+            time.sleep(3)  # wait for post to load
+
+            # click download button where div class post-download-all-button
+            download_button = driver.find_element(By.XPATH, "//div[@class='post-download-all-button']")
+            driver.execute_script("arguments[0].click();", download_button)
+
+            time.sleep(1)
+
+    finally:
+        driver.quit()
+
+if __name__ == "__main__":
+    main()
				`@ -1 +0,0 @@`
				`DH3ucOuYLbJ2Va3lfJPEYQq_6mk_v3R9dnrAYSQHr-Q=`
				`@ -1 +0,0 @@`
				`gAAAAABmRUff7c9t9gngWj_2cwvaTBrUDJ_JUyYVUfG-p3SvDV7qOSHddJ4eHADiJeRtJNtY9UxkohSB5I1MmLahAb_hxxwIVA==`