Altpins-Instagram/archived/weirdump.py

from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
from moviepy.editor import VideoFileClip

def scan_dupes(folder_path):
    newCursor.execute("SELECT hash FROM media")
    existing_files = [image[0] for image in newCursor.fetchall()]

    for root, dirs, files in os.walk(folder_path):
        for folder in dirs:
            folder_path = os.path.join(root, folder)
            for filename in os.listdir(folder_path):
                media_id = filename.replace('.mp4', '').replace('.jpg', '')
                filepath = os.path.join(folder_path, filename)
                if media_id:
                    fileHash = calculate_file_hash(filepath)
                    if fileHash in existing_files:
                        print(f'Duplicate')
                        os.remove(filepath)

def clean_empty_folders(directory):
    for foldername, subfolders, filenames in os.walk(directory, topdown=False):
        for subfolder in subfolders:
            folder_path = os.path.join(foldername, subfolder)
            if not os.listdir(folder_path):
                os.rmdir(folder_path)   
                print(f"Removed empty folder: {folder_path}")

def upload_file(filepath, username, media_type='image', post_type = 'story'):
    filename = os.path.basename(filepath)
    file_extension = filename.split('.')[-1]
    dirtype = 'stories' if post_type == 'story' else 'posts'

    #dirtype = 'profile'

    fileHash = calculate_file_hash(filepath)

    try:
        if int(media_id) in existing_files:
            print(f'Duplicate')
            os.remove(filepath)
            return True
    except: media_id = uuid.uuid4().hex

    server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'

    obj_storage.PutFile(filepath, server_path)

    file_url = f"https://storysave.b-cdn.net/{server_path}"

    duration = 0
    if media_type == 'image':
        try:
            with Image.open(filepath) as img:
                width, height = img.size
        except:
            os.remove(filepath)
            return
    else:
        width, height = get_video_dimensions(filepath)
        duration = get_video_duration(filepath)

    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, hash, filename, media_id, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    values = (username, media_type, file_url, width, height, post_type, fileHash, filename, media_id, duration)
    newCursor.execute(query, values)
    newDB.commit()

    os.remove(filepath)
    print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')


def get_video_dimensions(video_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return width, height

def get_video_duration(file_path):
    """
    Returns the duration of the video file in seconds.

    :param file_path: Path to the video file
    :return: Duration in seconds
    """
    with VideoFileClip(file_path) as video:
        return video.duration

def get_media_type(filename):
    if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
        return 'image'
    if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
        return 'video'


def dump_instagram(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for folder in dirs:
            username = folder
            folder_path = os.path.join(root, folder)

            post_type = 'post' if 'post' in folder_path.lower() else 'story'

            for filename in os.listdir(folder_path):
                filepath = os.path.join(folder_path, filename)
                mediatype = get_media_type(filename)
                upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)

def calculate_file_hash(file_path, hash_func='sha256'):
    h = hashlib.new(hash_func)

    with open(file_path, 'rb') as file:
        chunk = 0
        while chunk != b'':
            chunk = file.read(8192)
            h.update(chunk)

    return h.hexdigest()

if __name__ == '__main__':
    print('Starting processing...')

    newDB, newCursor = config.gen_connection()

    obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

    storiesPath = 'StorySave/'

    dump_instagram(storiesPath)

    print("Processing completed.")
cleanup structure 11 months ago			`from BunnyCDN.Storage import Storage`
			`from PIL import Image`
			`import os, uuid, cv2, config`
			`import hashlib`
			`from moviepy.editor import VideoFileClip`

			`def scan_dupes(folder_path):`
			`newCursor.execute("SELECT hash FROM media")`
			`existing_files = [image[0] for image in newCursor.fetchall()]`

			`for root, dirs, files in os.walk(folder_path):`
			`for folder in dirs:`
			`folder_path = os.path.join(root, folder)`
			`for filename in os.listdir(folder_path):`
			`media_id = filename.replace('.mp4', '').replace('.jpg', '')`
			`filepath = os.path.join(folder_path, filename)`
			`if media_id:`
			`fileHash = calculate_file_hash(filepath)`
			`if fileHash in existing_files:`
			`print(f'Duplicate')`
			`os.remove(filepath)`

			`def clean_empty_folders(directory):`
			`for foldername, subfolders, filenames in os.walk(directory, topdown=False):`
			`for subfolder in subfolders:`
			`folder_path = os.path.join(foldername, subfolder)`
			`if not os.listdir(folder_path):`
			`os.rmdir(folder_path)`
			`print(f"Removed empty folder: {folder_path}")`

			`def upload_file(filepath, username, media_type='image', post_type = 'story'):`
			`filename = os.path.basename(filepath)`
			`file_extension = filename.split('.')[-1]`
			`dirtype = 'stories' if post_type == 'story' else 'posts'`

			`#dirtype = 'profile'`

			`fileHash = calculate_file_hash(filepath)`

			`try:`
			`if int(media_id) in existing_files:`
			`print(f'Duplicate')`
			`os.remove(filepath)`
			`return True`
			`except: media_id = uuid.uuid4().hex`

			`server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'`

			`obj_storage.PutFile(filepath, server_path)`

			`file_url = f"https://storysave.b-cdn.net/{server_path}"`

			`duration = 0`
			`if media_type == 'image':`
			`try:`
			`with Image.open(filepath) as img:`
			`width, height = img.size`
			`except:`
			`os.remove(filepath)`
			`return`
			`else:`
			`width, height = get_video_dimensions(filepath)`
			`duration = get_video_duration(filepath)`

			`query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, hash, filename, media_id, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"`
			`values = (username, media_type, file_url, width, height, post_type, fileHash, filename, media_id, duration)`
			`newCursor.execute(query, values)`
			`newDB.commit()`

			`os.remove(filepath)`
			`print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')`


			`def get_video_dimensions(video_path):`
			`cap = cv2.VideoCapture(video_path)`
			`width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))`
			`height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))`
			`cap.release()`
			`return width, height`

			`def get_video_duration(file_path):`
			`"""`
			`Returns the duration of the video file in seconds.`

			`:param file_path: Path to the video file`
			`:return: Duration in seconds`
			`"""`
			`with VideoFileClip(file_path) as video:`
			`return video.duration`

			`def get_media_type(filename):`
			`if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):`
			`return 'image'`
			`if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):`
			`return 'video'`


			`def dump_instagram(folder_path):`
			`for root, dirs, files in os.walk(folder_path):`
			`for folder in dirs:`
			`username = folder`
			`folder_path = os.path.join(root, folder)`

			`post_type = 'post' if 'post' in folder_path.lower() else 'story'`

			`for filename in os.listdir(folder_path):`
			`filepath = os.path.join(folder_path, filename)`
			`mediatype = get_media_type(filename)`
			`upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)`

			`def calculate_file_hash(file_path, hash_func='sha256'):`
			`h = hashlib.new(hash_func)`

			`with open(file_path, 'rb') as file:`
			`chunk = 0`
			`while chunk != b'':`
			`chunk = file.read(8192)`
			`h.update(chunk)`

			`return h.hexdigest()`

			`if __name__ == '__main__':`
			`print('Starting processing...')`

			`newDB, newCursor = config.gen_connection()`

			`obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')`

			`storiesPath = 'StorySave/'`

			`dump_instagram(storiesPath)`

			`print("Processing completed.")`