You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
4.7 KiB
Python

from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = 0
while chunk != b'':
chunk = file.read(8192)
h.update(chunk)
return h.hexdigest()
def extract_file_info(filename):
try:
username = filename.split("~")[0]
timestamp = filename.split("~")[1]
user_id = filename.split("~")[2]
media_id, some2 = user_id.split("_")
user_id = some2.split(".")[0]
return username, media_id, user_id, timestamp
except:
return None, None, None, None
def extract_file_info2(filename):
try:
username = filename.split("~")[0]
elements = filename.split("~")[1].split("_")
media_id, user_id = elements[0], elements[1].split(".")[0]
return username, media_id, user_id
except:
return None, None, None
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
dirtype = 'stories' if post_type == 'story' else 'posts'
server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
fileHash = calculate_file_hash(filepath)
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
newCursor.execute(query, values)
newDB.commit()
existing_files.append(media_id)
if newCursor.rowcount == 0:
print('What the fuck just happend?')
obj_storage.PutFile(filepath, server_path)
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
if "~" not in filename:
continue
username, media_id, user_id, timestamp = extract_file_info(filename)
if None in [username, media_id, user_id, timestamp]:
username, media_id, user_id = extract_file_info2(filename)
if None in [username, media_id, user_id]:
print(f"Failed to extract info from {filename}")
continue
media_id = int(media_id) if media_id else None
if media_id in existing_files:
print(f'Duplicate, {filename}')
os.remove(os.path.join(folder_path, filename))
continue
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('StorySave/')
print("Processing completed.")