You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

105 lines
3.7 KiB
Python

from BunnyCDN.Storage import Storage
import os, uuid, config, funcs
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for filename in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, filename)):
continue
username = filename.split("'")[0]
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
if "FB_IMG" in filename:
media_id = filename.split("_")[2].split(".")[0]
else:
media_id = uuid.uuid4().hex
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
try:
obj_storage.PutFile(filepath, server_path)
except Exception as e:
print(f"Failed to upload {filepath} to storage: {e}")
return False
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
if post_type == 'stories':
post_type = 'story'
else:
post_type = 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
except Exception as e:
print(f"Database error: {e}")
return False
try:
if newCursor.rowcount > 0:
os.remove(filepath)
except Exception as e:
print(f"Failed to remove local file {filepath}: {e}")
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('facebook/')
print("Processing completed.")