Altpins-Instagram/old/customdump.py

from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib

def clean_empty_folders(directory):
    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
        for subfolder in subfolders:
            folder_path = os.path.join(foldername, subfolder)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)
                print(f"Removed empty folder: {folder_path}")

def calculate_file_hash(file_path, hash_func='sha256'):
    h = hashlib.new(hash_func)

    with open(file_path, 'rb') as file:
        chunk = 0
        while chunk != b'':
            chunk = file.read(8192)
            h.update(chunk)

    return h.hexdigest()

def extract_file_info(filename):
    try:
        username = filename.split("~")[0]
        timestamp = filename.split("~")[1]
        user_id = filename.split("~")[2]
        media_id, some2 = user_id.split("_")
        user_id = some2.split(".")[0]

        return username, media_id, user_id, timestamp
    except:
        return None, None, None, None

def extract_file_info2(filename):
    try:
        username = filename.split("~")[0]
        elements = filename.split("~")[1].split("_")

        media_id, user_id = elements[0], elements[1].split(".")[0]

        return username, media_id, user_id
    except:
        return None, None, None

def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
    filename = os.path.basename(filepath)
    file_extension = filename.split('.')[-1]

    dirtype = 'stories' if post_type == 'story' else 'posts'
    server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'


    file_url = f"https://storysave.b-cdn.net/{server_path}"
    fileHash = calculate_file_hash(filepath)

    if media_type == 'image':
        with Image.open(filepath) as img:
            width, height = img.size
    else:
        width, height = get_video_dimensions(filepath)

    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
    newCursor.execute(query, values)
    newDB.commit()

    existing_files.append(media_id)

    if newCursor.rowcount == 0:
        print('What the fuck just happend?')

    obj_storage.PutFile(filepath, server_path)

    os.remove(filepath)
    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')


def get_video_dimensions(video_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return width, height


def get_media_type(filename):
    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
        return 'image'
    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
        return 'video'


def dump_instagram(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for folder in dirs:
            username = folder
            folder_path = os.path.join(root, folder)

            for filename in os.listdir(folder_path):
                if "~" not in filename:
                    continue

                username, media_id, user_id, timestamp = extract_file_info(filename)

                if None in [username, media_id, user_id, timestamp]:
                    username, media_id, user_id = extract_file_info2(filename)
                    if None in [username, media_id, user_id]:
                        print(f"Failed to extract info from {filename}")
                        continue

                media_id = int(media_id) if media_id else None

                if media_id in existing_files:
                    print(f'Duplicate, {filename}')
                    os.remove(os.path.join(folder_path, filename))
                    continue

                filepath = os.path.join(folder_path, filename)
                mediatype = get_media_type(filename)
                upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)

if __name__ == '__main__':
    print('Starting processing...')

    newDB, newCursor = config.gen_connection()

    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

    newCursor.execute("SELECT media_id FROM media")
    existing_files = [image[0] for image in newCursor.fetchall()]

    dump_instagram('StorySave/')

    print("Processing completed.")