from BunnyCDN.Storage import Storage from moviepy.editor import VideoFileClip import config import hashlib import requests import os def file_hash_from_url(url, hash_algo='sha256'): h = hashlib.new(hash_algo) response = requests.get(url, stream=True) if response.status_code == 200: for chunk in response.iter_content(8192): h.update(chunk) return h.hexdigest() else: raise Exception(f"Failed to download file: Status code {response.status_code}") def get_video_duration(file_path): """ Returns the duration of the video file in seconds. :param file_path: Path to the video file :return: Duration in seconds """ try: with VideoFileClip(file_path) as video: return video.duration except: return 0 def file_hash(filename, hash_algo='sha256'): """ Compute the hash of a file. :param filename: Path to the file. :param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5'). :return: Hexadecimal hash string. """ # Create a hash object h = hashlib.new(hash_algo) # Open the file in binary mode and read in chunks with open(filename, 'rb') as file: while chunk := file.read(8192): h.update(chunk) # Return the hexadecimal digest of the hash return h.hexdigest() # the hash of the images are different due to optimizer #obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins') obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave') db, cursor = config.gen_connection() cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';") results = cursor.fetchall() count = 0 print(f"Found {len(results)} files to process.") cacheDir = 'cache' for result in results: count += 1 videoID = result[0] mediaID = result[1] mediaURL = result[2] extension = mediaURL.split('.')[-1] serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/') localFilePath = os.path.join(cacheDir, os.path.basename(serverPath)) if os.path.exists(localFilePath): print(f"File already exists: {localFilePath}") else: obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir) duration = get_video_duration(localFilePath) if duration == 0: print(f"Failed to get duration for {localFilePath}") continue if duration < 1: duration = 1 cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0])) db.commit() print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")