You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
4.5 KiB
Python

11 months ago
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
from moviepy.editor import VideoFileClip
def scan_dupes(folder_path):
newCursor.execute("SELECT hash FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
if media_id:
fileHash = calculate_file_hash(filepath)
if fileHash in existing_files:
print(f'Duplicate')
os.remove(filepath)
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def upload_file(filepath, username, media_type='image', post_type = 'story'):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
dirtype = 'stories' if post_type == 'story' else 'posts'
#dirtype = 'profile'
fileHash = calculate_file_hash(filepath)
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
return True
except: media_id = uuid.uuid4().hex
server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
duration = 0
if media_type == 'image':
try:
with Image.open(filepath) as img:
width, height = img.size
except:
os.remove(filepath)
return
else:
width, height = get_video_dimensions(filepath)
duration = get_video_duration(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, hash, filename, media_id, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, fileHash, filename, media_id, duration)
newCursor.execute(query, values)
newDB.commit()
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
with VideoFileClip(file_path) as video:
return video.duration
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
post_type = 'post' if 'post' in folder_path.lower() else 'story'
for filename in os.listdir(folder_path):
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = 0
while chunk != b'':
chunk = file.read(8192)
h.update(chunk)
return h.hexdigest()
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
storiesPath = 'StorySave/'
dump_instagram(storiesPath)
print("Processing completed.")