cleanup

9 months ago · 445b0ad9f0
parent ad39eeaed1
commit 445b0ad9f0
5 changed files with 0 additions and 2639 deletions
--- a/session_data.json
+++ b/session_data.json
@ -1,37 +0,0 @@
-{
-    "uuids": {
-        "phone_id": "53c03380-c7b9-44ab-b10e-1b585e8e428b",
-        "uuid": "2a9c7a37-c902-4332-8a32-1fd903acd991",
-        "client_session_id": "2b0a28f0-86c4-4cd4-b044-c4effd953cc9",
-        "advertising_id": "d330f041-56f1-4f45-906d-d3740717f0b1",
-        "android_device_id": "android-df5a2572f9762ff7",
-        "request_id": "35de6403-02e2-46b4-a02c-403cea1fe9c6",
-        "tray_session_id": "ed1874f7-cb8d-4ed6-bea8-13c53b9c3d67"
-    },
-    "mid": "ZwOR_QABAAGgkEbeoytBO3EL-dgC",
-    "ig_u_rur": null,
-    "ig_www_claim": null,
-    "authorization_data": {
-        "ds_user_id": "1587432849",
-        "sessionid": "1587432849%3Ak5q9QqmHia2WWq%3A18%3AAYcDFsLKMiFCtVhCcqYl7KZrFLw5IOSgf1pNfQZYLA"
-    },
-    "cookies": {},
-    "last_login": 1728287241.130515,
-    "device_settings": {
-        "app_version": "269.0.0.18.75",
-        "android_version": 26,
-        "android_release": "8.0.0",
-        "dpi": "480dpi",
-        "resolution": "1080x1920",
-        "manufacturer": "OnePlus",
-        "device": "devitron",
-        "model": "6T Dev",
-        "cpu": "qcom",
-        "version_code": "314665256"
-    },
-    "user_agent": "Instagram 269.0.0.18.75 Android (26/8.0.0; 480dpi; 1080x1920; OnePlus; 6T Dev; devitron; qcom; en_US; 314665256)",
-    "country": "US",
-    "country_code": 1,
-    "locale": "en_US",
-    "timezone_offset": -14400
-}
--- a/snapchat.json
+++ b/snapchat.json
--- a/storysave_dump_media.py
+++ b/storysave_dump_media.py
@ -1,142 +0,0 @@
-from datetime import datetime
-import config
-import funcs
-import cv2
-import os
-
-directory = 'storysaver'
-
-def UploadMedia(media):
-    media_id = media['media_id']
-    username = media['username']
-    post_date = media['timestamp']
-    user_id = media['user_id']
-    filepath = media['filepath']
-    highlight_id = media['highlight_id']
-    post_type = media['post_type']
-    thumbnail_url = None
-    phash = None
-    
-    if media_id and int(media_id) in existing_files:
-        print('Duplicate file detected. Removing...')
-        os.remove(filepath)
-        return True
-        
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-
-    file_hash = funcs.calculate_file_hash(filepath)
-
-    width, height = funcs.get_media_dimensions(filepath)
-    
-    duration = funcs.get_video_duration(filepath)
-
-    if media_type == 'video':
-        try:
-            thumbPath = f'temp/{media_id}.jpg'
-            cap = cv2.VideoCapture(filepath)
-            ret, frame = cap.read()
-            cv2.imwrite(thumbPath, frame)
-            cap.release()
-            obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
-            phash = funcs.generate_phash(thumbPath)
-            os.remove(thumbPath)
-        except:
-            print('Error generating thumbnail. Skipping...')
-            return False
-    elif media_type == 'image':
-        phash = funcs.generate_phash(filepath)
-
-    if media_id:
-        newFilename = f'{media_id}{file_extension}'
-    else:
-        newFilename = f'{file_hash}{file_extension}'
-
-    server_path = f'media/{post_type}/{username}/{newFilename}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    obj_storage.PutFile(filepath, server_path) # slow as fuck
-
-    if highlight_id:
-        newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
-        newDB.commit()
-        print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
-
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram')
-
-    newCursor.execute(query, values) # slower
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
-
-def get_user_id(username):
-    username = username.lower()
-    if username in existing_users:
-        return existing_users[username]
-    
-    return None
-
-def get_media():
-    medias = []
-    post_types = {
-        'posts': 'post',
-        'stories': 'story',
-        'profile': 'profile',
-    }
-    
-    for post_type in os.listdir('media'):
-        users = os.listdir(f'media/{post_type}')
-        for user in users:
-            user_path = f'media/{post_type}/{user}'
-            for filename in os.listdir(user_path):
-                data = {}
-                filepath = os.path.join(user_path, filename)
-                
-                data['post_type'] = post_types[post_type]
-                data['username'] = user
-                data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
-                if 'com.instagram.android__' in filename:
-                    data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
-                data['filepath'] = filepath
-                data['media_id'] = None
-                data['user_id'] = get_user_id(data['username'])
-                data['highlight_id'] = None
-                medias.append(data)
-    
-    return medias
-
-def dump_instagram():
-    medias = get_media()
-
-    for media in medias:
-        UploadMedia(media)
-        existing_files.append(media['media_id'])
-
-if __name__ == '__main__':
-    print('Starting processing...')
-
-    if not os.listdir(directory):
-        print('No files to process. Exiting...')
-        exit()
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = config.get_storage()
-
-    newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
-    existing_files = [image[0] for image in newCursor.fetchall()]
-
-    newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
-    existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
-    
-    dump_instagram(directory)
-
-    print("Processing completed.")
--- a/storysave_dump_tiktok.py
+++ b/storysave_dump_tiktok.py
@ -1,140 +0,0 @@
-from datetime import datetime
-from uuid import uuid4
-import funcs
-import config
-import cv2
-import os
-
-directory = 'processed_tiktoks'
-
-def UploadMedia(media):
-    platform = 'TikTok'
-    username = media['username']
-    filepath = media['filepath']
-    file_size = os.path.getsize(filepath)
-    thumbnail_url = None
-    phash = None
-    
-    filename = os.path.basename(filepath)
-    file_extension = os.path.splitext(filename)[1].lower()
-
-    media_type = funcs.get_media_type(filename)
-    if not media_type:
-        print(f'Error determining media type for {filename}. Skipping...')
-        return False
-
-    post_type = funcs.determine_post_type(filepath)
-    if not post_type:
-        print(f'Error determining post type for {filename}. Skipping...')
-        return False
-
-    file_hash = funcs.calculate_file_hash(filepath)
-    if file_hash in existing_hashes:
-        print(f'File {filename} already exists. Skipping...')
-        return False
-
-    post_date = datetime.now()
-
-    width, height = funcs.get_media_dimensions(filepath)
-    
-    duration = funcs.get_video_duration(filepath)
-
-    if media_type == 'image':
-        phash = funcs.generate_phash(filepath)
-    elif media_type == 'video':
-        try:
-            thumb_path = generate_thumbnail(filepath)
-            obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
-            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
-            phash = funcs.generate_phash(thumb_path)
-            os.remove(thumb_path)
-        except:
-            print('Error generating thumbnail. Skipping...')
-            return False
-
-    newFilename = f'{file_hash}{file_extension}'
-    server_path = f'media/tiktoks/{username}/{newFilename}'
-
-    file_url = f"https://storysave.b-cdn.net/{server_path}"
-
-    obj_storage.PutFile(filepath, server_path) # slow as fuck
-
-    post_type = 'story' if post_type == 'stories' else 'post'
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
-
-    newCursor.execute(query, values) # slower
-    newDB.commit()
-    print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
-
-    os.remove(filepath)
-
-    return True
-
-def generate_thumbnail(filepath):
-    thumb_path = f'temp/{uuid4()}.jpg'
-    cap = cv2.VideoCapture(filepath)
-    ret, frame = cap.read()
-    cv2.imwrite(thumb_path, frame)
-    cap.release()
-    return thumb_path
-
-def get_media_data(filepath):
-    filename = os.path.basename(filepath)
-    parts = filename.split('~')
-
-    if len(parts) == 3:
-        username, title, tiktok_id = parts
-    elif len(parts) == 2:
-        username, title = parts
-        tiktok_id = None
-    else:
-        return False
-
-    data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
-
-    return data
-
-def get_media(folder_path):
-    medias = []
-    
-    users = os.listdir(folder_path)
-    for user in users:
-        user_folder = os.path.join(folder_path, user)
-        if not os.path.isdir(user_folder):
-            print(f"Skipping {user}")
-            continue
-
-        files = os.listdir(user_folder)
-        for filename in files:
-            filepath = os.path.join(user_folder, filename)
-            
-            data = get_media_data(filepath)
-            if data:
-                medias.append(data)
-    
-    return medias
-
-def dump_instagram(folder_path):
-    medias = get_media(folder_path)
-
-    for media in medias:
-        UploadMedia(media)
-
-if __name__ == '__main__':
-    print('Starting processing...')
-    
-    if not os.listdir(directory):
-        print('No files to process. Exiting...')
-        exit()
-
-    newDB, newCursor = config.gen_connection()
-
-    obj_storage = config.get_storage()
-
-    newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
-    existing_hashes = [row[0] for row in newCursor.fetchall()]
-
-    dump_instagram(directory)
-
-    print("Processing completed.")
--- a/storysave_dump_tiktok_process.py
+++ b/storysave_dump_tiktok_process.py
@ -1,58 +0,0 @@
-from uuid import uuid4
-import uuid
-import os
-
-def is_valid_uuid(uuid_to_test, version=4):
-    try:
-        uuid_obj = uuid.UUID(uuid_to_test, version=version)
-    except ValueError:
-        return False
-
-    return str(uuid_obj) == uuid_to_test
-
-source_dir = 'tiktoks/'
-processed_dir = 'processed_tiktoks'
-
-os.makedirs(processed_dir, exist_ok=True)
-
-users = os.listdir(source_dir)
-
-for user in users:
-    user_dir = os.path.join(source_dir, user)
-    if not os.path.isdir(user_dir):
-        print(f"Skipping {user}")
-        continue
-
-    for file in os.listdir(user_dir):
-        filename = os.path.splitext(file)[0]
-        filepath = os.path.join(user_dir, file)
-        file_ext = os.path.splitext(file)[1]
-
-        tiktok_id = str(uuid4())
-        username = user
-
-        if is_valid_uuid(filename):
-            title = ''
-            tiktok_id = filename
-        elif 'masstik' in file or 'masstiktok' in file:
-            data = file.split('_')
-            title = filename.split('_')[-1]
-        else:
-            title = filename
-        
-        
-        print("="*100)
-        title = title.encode('utf-8', 'ignore').decode('utf-8')
-        print(f"Username: {username}\nTitle: {title}")
-        
-        new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
-        new_filepath = os.path.join(processed_dir, username, new_filename)
-        
-        os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
-        if not os.path.exists(new_filepath):
-            os.rename(filepath, new_filepath)
-            print(f"Renamed {file} to {new_filepath}")
-        else:
-            print("File with the same name already exists. Renaming aborted.")
-
-        print("="*100)