You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
2.7 KiB
Python
94 lines
2.7 KiB
Python
|
11 months ago
|
from BunnyCDN.Storage import Storage
|
||
|
|
from moviepy.editor import VideoFileClip
|
||
|
|
import config
|
||
|
|
import hashlib
|
||
|
|
import requests
|
||
|
|
import os
|
||
|
|
|
||
|
|
def file_hash_from_url(url, hash_algo='sha256'):
|
||
|
|
h = hashlib.new(hash_algo)
|
||
|
|
|
||
|
|
response = requests.get(url, stream=True)
|
||
|
|
|
||
|
|
if response.status_code == 200:
|
||
|
|
for chunk in response.iter_content(8192):
|
||
|
|
h.update(chunk)
|
||
|
|
return h.hexdigest()
|
||
|
|
else:
|
||
|
|
raise Exception(f"Failed to download file: Status code {response.status_code}")
|
||
|
|
|
||
|
|
def get_video_duration(file_path):
|
||
|
|
"""
|
||
|
|
Returns the duration of the video file in seconds.
|
||
|
|
|
||
|
|
:param file_path: Path to the video file
|
||
|
|
:return: Duration in seconds
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with VideoFileClip(file_path) as video:
|
||
|
|
return video.duration
|
||
|
|
except:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
def file_hash(filename, hash_algo='sha256'):
|
||
|
|
"""
|
||
|
|
Compute the hash of a file.
|
||
|
|
|
||
|
|
:param filename: Path to the file.
|
||
|
|
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
|
||
|
|
:return: Hexadecimal hash string.
|
||
|
|
"""
|
||
|
|
# Create a hash object
|
||
|
|
h = hashlib.new(hash_algo)
|
||
|
|
|
||
|
|
# Open the file in binary mode and read in chunks
|
||
|
|
with open(filename, 'rb') as file:
|
||
|
|
while chunk := file.read(8192):
|
||
|
|
h.update(chunk)
|
||
|
|
|
||
|
|
# Return the hexadecimal digest of the hash
|
||
|
|
return h.hexdigest()
|
||
|
|
|
||
|
|
# the hash of the images are different due to optimizer
|
||
|
|
|
||
|
|
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
|
||
|
|
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
||
|
|
|
||
|
|
db, cursor = config.gen_connection()
|
||
|
|
|
||
|
|
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
|
||
|
|
results = cursor.fetchall()
|
||
|
|
|
||
|
|
count = 0
|
||
|
|
print(f"Found {len(results)} files to process.")
|
||
|
|
|
||
|
|
cacheDir = 'cache'
|
||
|
|
for result in results:
|
||
|
|
count += 1
|
||
|
|
videoID = result[0]
|
||
|
|
mediaID = result[1]
|
||
|
|
mediaURL = result[2]
|
||
|
|
extension = mediaURL.split('.')[-1]
|
||
|
|
|
||
|
|
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
|
||
|
|
|
||
|
|
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
|
||
|
|
|
||
|
|
if os.path.exists(localFilePath):
|
||
|
|
print(f"File already exists: {localFilePath}")
|
||
|
|
else:
|
||
|
|
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
|
||
|
|
|
||
|
|
duration = get_video_duration(localFilePath)
|
||
|
|
|
||
|
|
if duration == 0:
|
||
|
|
print(f"Failed to get duration for {localFilePath}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
if duration < 1:
|
||
|
|
duration = 1
|
||
|
|
|
||
|
|
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
|
||
|
|
db.commit()
|
||
|
|
|
||
|
|
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")
|