from BunnyCDN.Storage import Storage from datetime import datetime import os, config, funcs, cv2 from PIL import Image def UploadMedia(media): media_id = media['media_id'] username = media['username'] timestamp = media['timestamp'] user_id = media['user_id'] filepath = media['filepath'] filename = os.path.basename(filepath) file_extension = os.path.splitext(filename)[1].lower() media_type = funcs.get_media_type(filename) post_type = funcs.determine_post_type(filepath, media_type) file_hash = funcs.calculate_file_hash(filepath) duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size thumbnail_url = None if media_type == 'video': try: thumbPath = f'temp/{media_id}.jpg' cap = cv2.VideoCapture(filepath) ret, frame = cap.read() cv2.imwrite(thumbPath, frame) cap.release() obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg" except: print('Error generating thumbnail. Skipping...') return False server_path = f'media/{post_type}/{username}/{media_id}{file_extension}' file_url = f"https://storysave.b-cdn.net/{server_path}" if user_id and 'highlight' in user_id: highlight_id = user_id.replace('highlight', '') user_id = None try: newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,)) user_id = newCursor.fetchall()[0][0] except: print(f'User {username} not found in database. Skipping...') user_id = None newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id)) newDB.commit() print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}') obj_storage.PutFile(filepath, server_path) query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url) newCursor.execute(query, values) newDB.commit() print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') os.remove(filepath) return True def getMedias(folder_path): medias = [] for filename in os.listdir(folder_path): parts = filename.split('~') if len(parts) < 4: continue username = parts[0] timestamp = parts[1] media_id = parts[2] user_id = parts[3].split('_')[-1].split('.')[0] filepath = os.path.join(folder_path, filename) if not media_id: print(f'Invalid media_id for file {filename}. Skipping...') continue try:media_id = int(media_id) except: print(f'Invalid media_id for file {filename}. Skipping...') continue data = { 'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath } medias.append(data) return medias def dump_instagram(folder_path): medias = getMedias(folder_path) for media in medias: if media['media_id'] in existing_files: print('Duplicate file detected. Removing...') os.remove(media['filepath']) for media in medias: UploadMedia(media) if __name__ == '__main__': print('Starting processing...') newDB, newCursor = config.gen_connection() obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL") existing_files = [image[0] for image in newCursor.fetchall()] dump_instagram('storysaver/') print("Processing completed.")