diff --git a/snappy_dump.py b/snappy_dump.py deleted file mode 100644 index 9f8a786..0000000 --- a/snappy_dump.py +++ /dev/null @@ -1,139 +0,0 @@ -from datetime import datetime -import os, config, funcs, cv2 -from uuid import uuid4 - -directory = 'snapchat' - -def UploadMedia(media): - username = media['username'] - timestamp = media['timestamp'] - filepath = media['filepath'] - filename = os.path.basename(filepath) - media_id = media['media_id'] - thumbnail_url = None - phash = None - - if filename in existing_files: - print('Duplicate file detected. Removing...') - os.remove(filepath) - return True - - if media_id in existing_files: - print('Duplicate file detected. Removing...') - return True - - media_type = funcs.get_media_type(filename) - - file_hash = funcs.calculate_file_hash(filepath) - - if '-' in timestamp: - timestamp = timestamp.split('-')[0] - - post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now() - - width, height = funcs.get_media_dimensions(filepath) - - duration = funcs.get_video_duration(filepath) - - if media_type == 'image': - phash = funcs.generate_phash(filepath) - elif media_type == 'video': - try: - thumb_path = generate_thumbnail(filepath) - obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes - thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg" - phash = funcs.generate_phash(thumb_path) - os.remove(thumb_path) - except: - print('Error generating thumbnail. Skipping...') - return False - - file_extension = os.path.splitext(filename)[1].lower() - new_filename = f'{file_hash}{file_extension}' - server_path = f'media/snaps/{username}/{filename}' - file_url = f"https://storysave.b-cdn.net/{server_path}" - - obj_storage.PutFile(filepath, server_path) # slow as fuck - - query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" - values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat') - - newCursor.execute(query, values) # slower - newDB.commit() - print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}') - - os.remove(filepath) - - return True - -def generate_thumbnail(filepath): - thumb_path = f'temp/{uuid4()}.jpg' - cap = cv2.VideoCapture(filepath) - ret, frame = cap.read() - cv2.imwrite(thumb_path, frame) - cap.release() - return thumb_path - -def get_media_data(filepath): - filename = os.path.basename(filepath) - parts = filename.split('~') - if len(parts) < 3: - return False - - username = parts[0] - timestamp = parts[1] - snap_id = parts[2] - snap_id = os.path.splitext(snap_id)[0] - - data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id} - - return data - -def get_media(folder_path): - medias = [] - - for root, dirs, files in os.walk(folder_path): - for filename in files: - filepath = os.path.join(root, filename) - - data = get_media_data(filepath) - if data: - medias.append(data) - - return medias - -def dump(folder_path): - medias = get_media(folder_path) - - for media in medias: - UploadMedia(media) - -def process_snap_ids(filenames): - snap_ids = [] - for filename in filenames: - snap_id = filename.split('~')[2] - snap_id = os.path.splitext(snap_id)[0] - if snap_id not in snap_ids: - snap_ids.append(snap_id) - - return snap_ids - -if __name__ == '__main__': - print('Starting processing...') - - if not os.listdir(directory): - print('No files to process. Exiting...') - exit() - - newDB, newCursor = config.gen_connection() - - obj_storage = config.get_storage() - - newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'") - existing_files = [image[0] for image in newCursor.fetchall()] - - existing_files = process_snap_ids(existing_files) - - dump(directory) - - print("Processing completed.") \ No newline at end of file