Altpins-Instagram/fixduration.py

from BunnyCDN.Storage import Storage
from moviepy.editor import VideoFileClip
import config
import hashlib
import requests
import os

def file_hash_from_url(url, hash_algo='sha256'):
    h = hashlib.new(hash_algo)

    response = requests.get(url, stream=True)

    if response.status_code == 200:
        for chunk in response.iter_content(8192):
            h.update(chunk)
        return h.hexdigest()
    else:
        raise Exception(f"Failed to download file: Status code {response.status_code}")

def get_video_duration(file_path):
    """
    Returns the duration of the video file in seconds.

    :param file_path: Path to the video file
    :return: Duration in seconds
    """
    try:
        with VideoFileClip(file_path) as video:
            return video.duration
    except:
        return 0

def file_hash(filename, hash_algo='sha256'):
    """
    Compute the hash of a file.

    :param filename: Path to the file.
    :param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
    :return: Hexadecimal hash string.
    """
    # Create a hash object
    h = hashlib.new(hash_algo)

    # Open the file in binary mode and read in chunks
    with open(filename, 'rb') as file:
        while chunk := file.read(8192):
            h.update(chunk)

    # Return the hexadecimal digest of the hash
    return h.hexdigest()

# the hash of the images are different due to optimizer

#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

db, cursor = config.gen_connection()

cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
results = cursor.fetchall()

count = 0
print(f"Found {len(results)} files to process.")

cacheDir = 'cache'
for result in results:
    count += 1
    videoID = result[0]
    mediaID = result[1]
    mediaURL = result[2]
    extension = mediaURL.split('.')[-1]
    
    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
    
    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
    
    if os.path.exists(localFilePath):
        print(f"File already exists: {localFilePath}")
    else:
        obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)

    duration = get_video_duration(localFilePath)

    if duration == 0:
        print(f"Failed to get duration for {localFilePath}")
        continue

    if duration < 1:
        duration = 1

    cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
    db.commit()

    print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")
update 11 months ago			`from BunnyCDN.Storage import Storage`
			`from moviepy.editor import VideoFileClip`
			`import config`
			`import hashlib`
			`import requests`
			`import os`

			`def file_hash_from_url(url, hash_algo='sha256'):`
			`h = hashlib.new(hash_algo)`

			`response = requests.get(url, stream=True)`

			`if response.status_code == 200:`
			`for chunk in response.iter_content(8192):`
			`h.update(chunk)`
			`return h.hexdigest()`
			`else:`
			`raise Exception(f"Failed to download file: Status code {response.status_code}")`

			`def get_video_duration(file_path):`
			`"""`
			`Returns the duration of the video file in seconds.`

			`:param file_path: Path to the video file`
			`:return: Duration in seconds`
			`"""`
			`try:`
			`with VideoFileClip(file_path) as video:`
			`return video.duration`
			`except:`
			`return 0`

			`def file_hash(filename, hash_algo='sha256'):`
			`"""`
			`Compute the hash of a file.`

			`:param filename: Path to the file.`
			`:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').`
			`:return: Hexadecimal hash string.`
			`"""`
			`# Create a hash object`
			`h = hashlib.new(hash_algo)`

			`# Open the file in binary mode and read in chunks`
			`with open(filename, 'rb') as file:`
			`while chunk := file.read(8192):`
			`h.update(chunk)`

			`# Return the hexadecimal digest of the hash`
			`return h.hexdigest()`

			`# the hash of the images are different due to optimizer`

			`#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')`
			`obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')`

			`db, cursor = config.gen_connection()`

			`cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")`
			`results = cursor.fetchall()`

			`count = 0`
			`print(f"Found {len(results)} files to process.")`

			`cacheDir = 'cache'`
			`for result in results:`
			`count += 1`
			`videoID = result[0]`
			`mediaID = result[1]`
			`mediaURL = result[2]`
			`extension = mediaURL.split('.')[-1]`

			`serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')`

			`localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))`

			`if os.path.exists(localFilePath):`
			`print(f"File already exists: {localFilePath}")`
			`else:`
			`obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)`

			`duration = get_video_duration(localFilePath)`

			`if duration == 0:`
			`print(f"Failed to get duration for {localFilePath}")`
			`continue`

			`if duration < 1:`
			`duration = 1`

			`cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))`
			`db.commit()`

			`print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")`