cleanup structure

main
oscar 11 months ago
parent 2e7b6a7ced
commit 96ebf0daac

BIN
old/.DS_Store vendored

Binary file not shown.

@ -1,141 +0,0 @@
import requests, hashlib, os
access_key = "471cd2e1-a943-4c61-ae69ddc6c2c2-c36d-4737"
video_library_id = 125094
def create_video(title):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
payload = f"{{\"title\":\"{title}\"}}"
headers = {
"accept": "application/json",
"content-type": "application/*+json",
"AccessKey": access_key
}
response = requests.post(url, data=payload, headers=headers)
return response
def generate_signature(library_id, api_key, expiration_time, video_id):
signature = hashlib.sha256((library_id + api_key + str(expiration_time) + video_id).encode()).hexdigest()
return signature
def upload_video_process(file_path, video_id):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {"accept": "application/json","AccessKey": access_key}
with open(file_path, "rb") as file:
file_data = file.read()
response = requests.put(url, headers=headers, data=file_data)
return response.status_code
def upload_video(file_path, title=None):
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(file_path, video_id)
return {
"embed_link": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8",
"animated_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp",
"default_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg",
}
def upload_video_recurbate(videoInfo):
title = f"{videoInfo['username']} {videoInfo['platform']}"
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(videoInfo['filename'], video_id)
videoInfo["embed_link"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8"
videoInfo["animated_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp"
videoInfo["default_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg"
return True
def delete_video(video_id):
video_id = video_id.replace('https://vz-58ca89f1-986.b-cdn.net/', '').replace('/playlist.m3u8', '')
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.delete(url, headers=headers)
return response.status_code
def list_videos():
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
params = {
"page": 1,
"itemsPerPage": 1000,
"orderBy": "date"
}
headers = {"accept": "application/json","AccessKey": access_key}
videos = []
while True:
response = requests.get(url, headers=headers, params=params)
data = response.json()
videos += data['items']
if len(videos) == data['totalItems']:
return videos
params['page'] += 1
def get_heatmap(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId/heatmap"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.get(url, headers=headers).json()
return response
def get_video(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.get(url, headers=headers).json()
return response
def download_video(video_id, directory):
download_url = f'https://storage.bunnycdn.com/vz-dd4ea005-7c2/{video_id}/'
params = {'download': '','accessKey': '5b1766f7-c1ab-463f-b05cce6f1f2e-1190-4c09'}
video_response = requests.get(download_url, params=params)
if video_response.status_code == 200:
content_disposition = video_response.headers.get('Content-Disposition')
if content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
ext = filename.split('.')[-1]
filename = f'{video_id}.{ext}'
filePath = os.path.join(directory, filename)
with open(filePath, 'wb') as video_file:
video_file.write(video_response.content)
print(f'Video downloaded successfully as {filePath}')
else:
print('Failed to download video', video_response.status_code, video_response.text)

@ -1,137 +0,0 @@
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = 0
while chunk != b'':
chunk = file.read(8192)
h.update(chunk)
return h.hexdigest()
def extract_file_info(filename):
try:
username = filename.split("~")[0]
timestamp = filename.split("~")[1]
user_id = filename.split("~")[2]
media_id, some2 = user_id.split("_")
user_id = some2.split(".")[0]
return username, media_id, user_id, timestamp
except:
return None, None, None, None
def extract_file_info2(filename):
try:
username = filename.split("~")[0]
elements = filename.split("~")[1].split("_")
media_id, user_id = elements[0], elements[1].split(".")[0]
return username, media_id, user_id
except:
return None, None, None
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story', user_id = None, date = None):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
dirtype = 'stories' if post_type == 'story' else 'posts'
server_path = f'users/{dirtype}/{username}/{media_id if media_id else uuid.uuid4().hex}.{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
fileHash = calculate_file_hash(filepath)
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, user_id, hash, date) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, user_id, fileHash, date)
newCursor.execute(query, values)
newDB.commit()
existing_files.append(media_id)
if newCursor.rowcount == 0:
print('What the fuck just happend?')
obj_storage.PutFile(filepath, server_path)
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
if "~" not in filename:
continue
username, media_id, user_id, timestamp = extract_file_info(filename)
if None in [username, media_id, user_id, timestamp]:
username, media_id, user_id = extract_file_info2(filename)
if None in [username, media_id, user_id]:
print(f"Failed to extract info from {filename}")
continue
media_id = int(media_id) if media_id else None
if media_id in existing_files:
print(f'Duplicate, {filename}')
os.remove(os.path.join(folder_path, filename))
continue
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, user_id = user_id,)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('StorySave/')
print("Processing completed.")

@ -1,110 +0,0 @@
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
def scan_dupes(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
if media_id:
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
except:
print(f'Error: {filepath}')
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story'):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
return True
except: media_id = uuid.uuid4().hex
dirtype = 'stories' if post_type == 'story' else 'posts'
server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type)
newCursor.execute(query, values)
newDB.commit()
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
post_type = 'story' if folder_path.split('\\')[0] == 'stories' else 'post'
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, post_type=post_type)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('media/posts')
dump_instagram('media/stories')
scan_dupes('media/posts')
scan_dupes('media/stories')
clean_empty_folders('media/posts')
clean_empty_folders('media/stories')
print("Processing completed.")

@ -1,110 +0,0 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs, cv2
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for filename in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, filename)):
continue
username = filename.split("'")[0]
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
else: media_id = uuid.uuid4().hex
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
thumbnail_url = None
if media_type == 'video':
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
if post_type == 'stories':
post_type = 'story'
else:
post_type = 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration, thumbnail_url)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
except Exception as e:
print(f"Database error: {e}")
return False
try:
if newCursor.rowcount > 0:
os.remove(filepath)
except Exception as e:
print(f"Failed to remove local file {filepath}: {e}")
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('facebook/')
print("Processing completed.")

@ -1,82 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs
from PIL import Image
def dump_instagram(folder_path):
for filename in os.listdir(folder_path):
parts = filename.split('_')
try:
username = '_'.join(parts[:-2]) # Join all except last two
timestamp = int(parts[-2]) # Second last is timestamp
user_id = int(parts[-1].split('.')[0]) # Last part before extension is user_id
except Exception as e:
print(f"Invalid filename: {filename}. Error: {e}")
continue
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
UploadMedia(username=username, media_type=mediatype, filepath=filepath, post_type=post_type, timestamp=timestamp, user_id=user_id)
def UploadMedia(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
if 'tero' in username:
pass
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = f'media/{dirtype}/{username}/{file_hash}{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
obj_storage.PutFile(filepath, server_path)
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('storysaver/missingdata/')
print("Processing completed.")

@ -1,67 +0,0 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
upload_file(username=username, filepath=filepath)
def upload_file(filepath, username):
filename = os.path.basename(filepath)
media_id = filename.split('.')[0]
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
if newCursor.rowcount > 0:
os.remove(filepath)
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('tiktok/')
print("Processing completed.")

@ -1,32 +0,0 @@
import os, funcs
from funcs import generate_phash
def get_username(image, ready_images):
for ready_image in ready_images:
if os.path.basename(image) in ready_image:
ready_image = ready_image.replace('\\', '/')
return ready_image.split('/')[1]
return None
ready_images = funcs.get_files('ready_to_upload')
ready_images = [image for image in ready_images if not image.endswith('.mp4')]
sorted_images = funcs.get_files('sorted')
sorted_images = [image for image in sorted_images if not image.endswith('.mp4')]
os.makedirs('already_processed', exist_ok=True)
for image in sorted_images:
image = image.replace('\\', '/')
username = image.split('/')[1]
filename = os.path.basename(image)
for ready_image in ready_images:
if filename in ready_image:
username = get_username(image, ready_images)
newpath = ready_image.replace('ready_to_upload', 'already_processed')
os.makedirs(os.path.dirname(newpath), exist_ok=True)
print(f'Moving {image} which is a match for {ready_image} to already_processed')
os.rename(image, newpath)
print(f'Moved {ready_image} to already_processed')
break

@ -1,56 +0,0 @@
from BunnyCDN.Storage import Storage
import os, config, requests
from moviepy.editor import VideoFileClip
def get_media_type(filename):
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
video_extensions = {".mp4", ".mov"}
extension = os.path.splitext(filename.lower())[1]
if extension in image_extensions:
return 'image'
elif extension in video_extensions:
return 'video'
else:
return 'unknown'
def determine_post_type(media_type):
# Assuming the post type is directly based on media type.
return media_type
def get_video_dimensions(filepath):
with VideoFileClip(filepath) as clip:
width, height = clip.size
return width, height
def download_file(url):
local_filename = url.split('/')[-1]
# Note: Stream=True to avoid loading the whole file into memory
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
if __name__ == '__main__':
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
posts = open('fucked', 'r')
for item in posts:
username, url = item.strip().split('~')
media_id = url.split('/')[-1].split('.')[0]
media_type = get_media_type(url)
query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
values = (username, media_type, 'facebook', url)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated.{url}')
except Exception as e:
print(f"Database error: {e}")
posts.close()

@ -1,40 +0,0 @@
import config, os, json
from PIL import Image
import imagehash
def find_file(filename, directory):
filename = filename.lower().split('.')[0]
for root, dirs, files in os.walk(directory):
for file in files:
if filename in file:
return os.path.join(root, file)
return None
def generate_phash(image_path):
image = Image.open(image_path)
return str(imagehash.phash(image))
count = 0
cacheDir = 'sorted'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
filepath = item['filepath']
if os.path.exists(filepath):
continue
newfilepath = find_file(os.path.basename(filepath), cacheDir)
if newfilepath:
print(f"Found file {newfilepath} for {filepath}")
item['filepath'] = newfilepath
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -1,28 +0,0 @@
import os, json
from funcs import generate_phash
count = 0
cacheDir = '_sort'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
if item['type'] == 'image':
filepath = item['filepath']
if 'phash' in item:
print(f"Skipping {count}/{len(medias)}: already processed.")
continue
if not os.path.exists(filepath):
print(f"File {filepath} does not exist, skipping.")
continue
phash = generate_phash(filepath)
item['phash'] = phash
print(f"Processed {count}/{len(medias)}: with pHash {phash}")
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -1,94 +0,0 @@
from BunnyCDN.Storage import Storage
from moviepy.editor import VideoFileClip
import config
import hashlib
import requests
import os
def file_hash_from_url(url, hash_algo='sha256'):
h = hashlib.new(hash_algo)
response = requests.get(url, stream=True)
if response.status_code == 200:
for chunk in response.iter_content(8192):
h.update(chunk)
return h.hexdigest()
else:
raise Exception(f"Failed to download file: Status code {response.status_code}")
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
try:
with VideoFileClip(file_path) as video:
return video.duration
except:
return 0
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
# Create a hash object
h = hashlib.new(hash_algo)
# Open the file in binary mode and read in chunks
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
# Return the hexadecimal digest of the hash
return h.hexdigest()
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
duration = get_video_duration(localFilePath)
if duration == 0:
print(f"Failed to get duration for {localFilePath}")
continue
if duration < 1:
duration = 1
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")

@ -1,47 +0,0 @@
from BunnyCDN.Storage import Storage
import config
import hashlib
import os
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
h = hashlib.new(hash_algo)
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
if not os.path.exists(localFilePath):
obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
filehash = file_hash(localFilePath)
cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")

@ -1,47 +0,0 @@
from BunnyCDN.Storage import Storage
import config, os, funcs
from PIL import Image
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
mediaType = funcs.get_media_type(localFilePath)
if mediaType == 'image':
with Image.open(localFilePath) as img:
width, height = img.size
elif mediaType == 'video':
width, height = funcs.get_video_dimensions(localFilePath)
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
db.commit()
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")

@ -1,63 +0,0 @@
from BunnyCDN.Storage import Storage
import config, os, cv2
from concurrent.futures import ThreadPoolExecutor
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias():
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
filePath = DownloadFile(serverPath, cacheDir)
cap = cv2.VideoCapture(localFilePath)
ret, frame = cap.read()
cv2.imwrite('thumbnail.jpg', frame)
cap.release()
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
db.commit()
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")

@ -1,36 +0,0 @@
import config
from funcs import generate_phash
count = 0
storage = config.get_storage()
db, cursor = config.gen_connection()
generate_for = 'media_url'
media_type = 'image'
cursor.execute(f"SELECT id, {generate_for} FROM media WHERE media_type = %s AND phash IS NULL;", [media_type])
medias = cursor.fetchall()
for item in medias:
count += 1
itemID = item[0]
media_url = item[1]
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
filepath = storage.DownloadFile(server_path, 'temp')
if not filepath:
print(f"Error downloading {server_path}")
continue
phash = generate_phash(filepath)
if not phash:
print(f"Error generating pHash for {filepath}")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
db.commit()
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")

@ -1,39 +0,0 @@
import config, os
from funcs import generate_phash
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash = 0;")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
if not mediaID:
print(f"Media ID is null, skipping.")
continue
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist, skipping.")
continue
phash = generate_phash(localFilePath)
if not phash:
print(f"Error generating pHash for {localFilePath}, skipping.")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
db.commit()
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")

@ -1,74 +0,0 @@
import config, os, threading, queue
from funcs import generate_phash
# Initialize database connection
db, cursor = config.gen_connection()
# Query the media table for unprocessed images
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash = '0';")
results = cursor.fetchall()
# Setup cache directory
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
# Thread-safe queue for processed media
processed_media_queue = queue.Queue()
def process_media():
"""Thread function to update database with processed pHash values."""
while True:
try:
item = processed_media_queue.get(timeout=10) # Timeout prevents infinite blocking
if item is None: # Sentinel value to exit the loop
break
itemID, phash = item
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
db.commit()
print(f"Updated database for ID {itemID} with pHash {phash}.")
except queue.Empty:
continue
# Start the database update thread
update_thread = threading.Thread(target=process_media, daemon=True)
update_thread.start()
# Main processing loop for generating pHash
count = 0
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
if not mediaID:
print(f"Media ID is null, skipping.")
continue
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist, skipping.")
continue
phash = generate_phash(localFilePath)
if not phash:
print(f"Error generating pHash for {localFilePath}, skipping.")
continue
# Add the processed media to the queue
processed_media_queue.put((itemID, phash))
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
# Signal the update thread to stop
processed_media_queue.put(None)
# Wait for the update thread to finish
update_thread.join()
print("Processing completed.")

@ -1,51 +0,0 @@
import os
import config
import cv2
from funcs import generate_phash
from BunnyCDN.Storage import Storage
db, cursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND phash = '0';")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
itemID = result[0]
media_id = result[1]
if not media_id:
print(f"Media ID is null, skipping.")
continue
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist, skipping.")
continue
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(localFilePath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
phash = generate_phash(thumbPath)
os.remove(thumbPath)
if not phash:
print(f"Error generating pHash for {localFilePath}, skipping.")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
db.commit()
print(f"Processed {count}/{len(results)}: {media_id} with pHash {phash}")

@ -1,43 +0,0 @@
import os
import json
import config
# Establish database connection
db, cursor = config.gen_connection()
# Fetch rows with file_size = 0
cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;")
results = cursor.fetchall()
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
update_data = []
for result in results:
itemID = result[0]
media_id = result[1]
if not media_id:
print(f"Media ID is null for ID {itemID}, skipping.")
continue
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist for ID {itemID}, skipping.")
continue
file_size = os.path.getsize(localFilePath)
update_data.append({"id": itemID, "file_size": file_size})
# Save the results to a JSON file
output_file = "update_data.json"
with open(output_file, 'w') as f:
json.dump(update_data, f, indent=4)
print(f"Saved {len(update_data)} updates to {output_file}.")
cursor.close()
db.close()

File diff suppressed because it is too large Load Diff

@ -1,29 +0,0 @@
import json
import config
# Establish database connection
db, cursor = config.gen_connection()
# Load update data from the JSON file
input_file = "update_data.json"
with open(input_file, 'r') as f:
update_data = json.load(f)
print(f"Loaded {len(update_data)} records to update.")
# Process each record one by one
for count, item in enumerate(update_data, start=1):
item_id = item["id"]
file_size = item["file_size"]
try:
cursor.execute("UPDATE media SET file_size = %s WHERE id = %s", (file_size, item_id))
db.commit()
print(f"Processed {count}/{len(update_data)}: ID {item_id} updated with file size {file_size}.")
except Exception as e:
print(f"Error updating ID {item_id}: {e}")
db.rollback()
print("All updates completed.")
cursor.close()
db.close()

@ -1,31 +0,0 @@
from BunnyCDN.Storage import Storage
import config, os
db, cursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
cursor.execute("SELECT id, media_id, media_url FROM media WHERE file_size = 0;")
results = cursor.fetchall()
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
itemID = result[0]
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
continue
file_size = os.path.getsize(localFilePath)
cursor.execute("UPDATE media SET file_size = %s WHERE id = %s;", (file_size, itemID))
db.commit()
print(f"Processed ID {itemID}: updated with file size {file_size}.")
cursor.close()
db.close()

@ -1,181 +0,0 @@
import os
import json
import config
import imagehash
from PIL import Image
from funcs import get_files, calculate_file_hash, remove_empty_folders # Assuming this is defined elsewhere
def generate_image_phash(filepath, hash_size=8):
try:
# Open the image using PIL
pil_image = Image.open(filepath)
# Compute pHash using the imagehash library
phash = imagehash.phash(pil_image, hash_size=hash_size)
return phash
except Exception as e:
print(f"Error processing image {filepath}: {e}")
return None
def are_phashes_duplicates(phash1, phash2, threshold=5):
try:
# Compute the Hamming distance between the pHashes
distance = phash1 - phash2
return distance <= threshold
except TypeError as e:
print(f"Error comparing pHashes: {e}")
return False
def get_media_by_phash(phash, username, existing_medias, threshold=5):
for media in existing_medias:
existing_phash_str = media[1]
existing_username = media[2]
if existing_username != username:
continue
# Convert stored pHash string to ImageHash object
existing_phash = imagehash.hex_to_hash(existing_phash_str)
# Check if the current pHash is a duplicate
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
return media
return None
def get_media_by_hash(hash, existing_medias):
for media in existing_medias:
existing_hash = media[1]
if hash == existing_hash:
return media
return None
def get_media_by_id(media_id, existing_medias):
for media in existing_medias:
existing_media_id = media[1]
if media_id == existing_media_id:
return media
return None
def get_data_by_filename(filename, data):
for item in data:
if filename in item['filepath']:
return item
return None
directory = 'images'
data = json.load(open('pins.json'))
files = get_files(directory)
knownExtensions = ['jpg', 'png', 'jpeg', 'gif', 'webp']
for file in files:
fileExt = file.split('.')[-1].lower()
if fileExt not in knownExtensions:
print(f"Data not found for {file}")
filehash = calculate_file_hash(file)
newfilename = f"{filehash}.jpg"
currentDir = os.path.dirname(file)
newfilepath = os.path.join(currentDir, newfilename)
os.rename(file, newfilepath)
files = get_files(directory)
# Sort files by username and move them into the directory folder where each subfolder is a username
for file in files:
item_data = get_data_by_filename(os.path.basename(file).split('.')[0], data)
if not item_data:
print(f"Data not found for {file}")
continue
username = item_data['username']
newpath = os.path.join(directory, username, os.path.basename(file))
if newpath == file:
continue
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(file, newpath)
# Database connection
db, cursor = config.gen_connection()
# now find dupes by media_id
cursor.execute("SELECT id, media_id, username FROM media WHERE media_type = %s AND media_id IS NOT NULL", ['image'])
items = cursor.fetchall()
media_ids = [item[1] for item in items]
files = get_files(directory)
for file in files:
try:
media_id = os.path.basename(file).split('.')[0]
media_id = int(media_id)
except:
print(f"Error parsing media_id from {file}")
continue
if media_id in media_ids:
media_item = get_media_by_id(media_id, items)
print(f"Duplicate found: https://altpins.com/pin/{media_item[0]}")
print(f"Duplicate file: {file}")
newpath = os.path.join('duplicates', media_item[2], os.path.basename(file))
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(file, newpath)
else:
print(f"Unique file: {file}")
cursor.execute("SELECT id, hash, username FROM media WHERE media_type = %s AND hash IS NOT NULL", ['image'])
items = cursor.fetchall()
hashes = [item[1] for item in items]
files = get_files(directory)
for file in files:
hash = calculate_file_hash(file)
if hash in hashes:
media_item = get_media_by_hash(hash, items)
print(f"Duplicate found: https://altpins.com/pin/{media_item[0]}")
print(f"Duplicate file: {file}")
newpath = os.path.join('duplicates', media_item[2], os.path.basename(file))
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(file, newpath)
else:
print(f"Unique file: {file}")
# Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
existing_medias = cursor.fetchall()
# Go through the directory folder where each subfolder is a username
files = get_files(directory)
for filepath in files:
image_filename = os.path.basename(filepath)
print(f'Processing {image_filename}...')
# Generate pHash for the image
phash = generate_image_phash(filepath, hash_size=8)
if phash is None:
continue # Skip this image if there's an issue
phash_str = str(phash)
item_data = get_data_by_filename(image_filename, data)
if not item_data:
print(f"Data not found for {image_filename}")
continue
username = item_data['username']
# Check if the image is a duplicate of any in the database
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
if duplicate_media:
print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
print(f'Duplicate image path: {filepath}')
newpath = os.path.join('duplicates', duplicate_media[2], image_filename)
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(filepath, newpath)
print(f'Moved {image_filename} to duplicates/')

@ -1,112 +0,0 @@
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
def scan_dupes(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
if media_id:
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
except:
pass
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def upload_file(filepath, username, media_id = None, media_type='image', post_type = 'story'):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
return True
except: media_id = uuid.uuid4().hex
dirtype = 'stories' if post_type == 'story' else 'posts'
server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type) VALUES (%s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type)
newCursor.execute(query, values)
newDB.commit()
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
post_type = 'story' if folder_path.split('\\')[0] == 'stories' else 'post'
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, media_id=media_id, post_type=post_type)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
scan_dupes('media/posts')
scan_dupes('media/stories')
scan_dupes('StorySave/')
dump_instagram('media/posts')
dump_instagram('media/stories')
dump_instagram('StorySave/')
clean_empty_folders('media/posts')
clean_empty_folders('media/stories')
clean_empty_folders('StorySave/')
print("Processing completed.")

@ -1,33 +0,0 @@
import bunny, json
medias = json.load(open('videos.json', 'r'))
videoIDS = [media['url'].split('/')[-1] for media in medias]
videos = bunny.list_videos()
with open('allVideos.json', 'w') as f:
json.dump(videos, f, indent=4)
missingVideos = []
for video in videos:
if video['guid'] in videoIDS:
continue
missingVideos.append(video)
datas = []
for video in missingVideos:
data = {
'guid': video['guid'],
'title': video['title'],
'length': video['length'],
'width': video['width'],
'height': video['height'],
'availableResolutions': video['availableResolutions'],
'storageSize': video['storageSize'],
'hasMP4Fallback': video['hasMP4Fallback'],
'category': video['category'],
}
datas.append(data)
with open('missing_videos.json', 'w') as f:
json.dump(datas, f, indent=4)

@ -1,27 +0,0 @@
from BunnyCDN.Storage import Storage
import os, json
altpins_obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
medias = json.load(open('db_pins.json', 'r'))
count = 0
print(f"Found {len(medias)} files to process.")
cacheDir = 'old_altpins_cache'
for media in medias:
count += 1
username = media['title']
mediaID = media['photo_id']
mediaURL = media['url']
extension = mediaURL.split('.')[-1]
serverPath = mediaURL.replace("https://altpins.b-cdn.net/", '').replace('//', '/').replace('\\', '/').replace('https://altpins.b-cdn.net/', '')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
continue
altpins_obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {count}/{len(medias)}: {localFilePath}")

@ -1,16 +0,0 @@
import json, bunny, os
from concurrent.futures import ThreadPoolExecutor
medias = json.load(open('missing_videos.json', 'r'))
#videoIDS = [media['url'].split('/')[-1] for media in medias]
videoIDS = [media['guid'] for media in medias]
with ThreadPoolExecutor(max_workers=10) as executor:
for id in videoIDS:
filePath = f"MISSING_STREAM_VIDEOS/{id}.zip"
if os.path.exists(filePath):
print(f'Video already exists as {filePath}. Skipping...')
continue
executor.submit(bunny.download_video, id)

@ -1,29 +0,0 @@
import os, json, config
# Load the data
pins = json.load(open('db_pins.json', 'r'))
files = os.listdir('STORAGE_IMPORTED/')
db, cursor = config.gen_connection()
cursor.execute('SELECT hash FROM media WHERE hash IS NOT NULL;')
existing_hashes = [hash[0] for hash in cursor.fetchall()]
for pin in pins:
if pin['hash'] in existing_hashes:
print(f"Found {pin['hash']} in the imported folder.")
pins.remove(pin)
alreadyImported = []
for pin in pins:
filepath = pin['filepath']
username = pin['title']
filename = os.path.basename(filepath)
if filename in files:
print(f"Found {filename} in the imported folder.")
alreadyImported.append(pins.pop(pins.index(pin)))
# Save to the file
json.dump(pins, open('db_pins.json', 'w'))
json.dump(alreadyImported, open('db_pins_imported.json', 'w'))

@ -1,14 +0,0 @@
import os, json, bunny
medias = json.load(open('allVideos.json', 'r'))
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
missing = json.load(open('missing_videos.json', 'r'))
count = 0
cacheDir = 'old_mp4fallback_cache'
print(f"Found {len(medias)} files to process.")
for media in mp4Medias:
count += 1
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')

@ -1,36 +0,0 @@
import os, json, bunny, config
db, cursor = config.gen_connection()
cursor.execute('SELECT media_id FROM media WHERE media_id IS NOT NULL;')
mediaIDS = cursor.fetchall()
pins = json.load(open('pins.json', 'r'))
videos = json.load(open('db_videos.json', 'r'))
pins = json.load(open('db_pins.json', 'r'))
ids = [video['id'] for video in videos]
for pin in pins:
if pin['id'] in ids:
pins.remove(pin)
# save to the file
json.dump(pins, open('db_pins.json', 'w'))
medias = json.load(open('allVideos.json', 'r'))
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
missing = json.load(open('missing_videos.json', 'r'))
count = 0
cacheDir = 'old_mp4fallback_cache'
print(f"Found {len(medias)} files to process.")
for media in mp4Medias:
count += 1
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')

@ -1,53 +0,0 @@
import os, json, funcs
STORAGE_IMPORTED = 'STORAGE_IMPORTED'
pins = json.load(open('db_pins.json', 'r'))
for pin in pins:
filename = pin['url'].split('/')[-1]
filepath = os.path.join(STORAGE_IMPORTED, filename)
pin['filename'] = filename
if not pin['hash']:
pin['hash'] = funcs.calculate_file_hash(filepath)
json.dump(pins, open('db_pins.json', 'w'), indent=4)
files = os.listdir(STORAGE_IMPORTED)
for file in files:
filepath = os.path.join(STORAGE_IMPORTED, file)
fileHash = funcs.calculate_file_hash(filepath)
if fileHash not in file:
print(f'Renaming {file} to {fileHash}')
os.rename(filepath, os.path.join(STORAGE_IMPORTED, fileHash))
pins_by_username = {}
for pin in pins:
username = pin['title']
if username not in pins_by_username:
pins_by_username[username] = []
pins_by_username[username].append(pin)
for username, username_pins in pins_by_username.items():
username_folder = os.path.join(STORAGE_IMPORTED, username)
os.makedirs(username_folder, exist_ok=True)
for pin in username_pins:
photo_id = pin['photo_id']
photo_url = pin['url']
fileHash = pin['hash']
if not fileHash:
continue
extension = photo_url.split('.')[-1]
filename = f'{fileHash}.{extension}'
filePath = os.path.join(STORAGE_IMPORTED, filename)
outputPath = os.path.join(STORAGE_IMPORTED, username, filename)
if os.path.exists(outputPath):
print(f'File {outputPath} already exists. Skipping...')
continue
print(f'Moving {photo_url} to {outputPath}')
os.rename(filePath, outputPath)

@ -1,57 +0,0 @@
import os
import hashlib
# Directories
fucked_dir = 'tiktoks/fucked/aleksandra'
source_dir = 'tiktoks/waiting_for_process/aleksandraverse'
def hash_file(filepath):
"""Generate MD5 hash of a file."""
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def get_file_hashes(directory):
"""Generate a dictionary of file hashes for all files in a directory."""
file_hashes = {}
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
file_hashes[file_path] = hash_file(file_path)
return file_hashes
def files_are_identical(file1, file2):
"""Compare two files byte-by-byte."""
with open(file1, "rb") as f1, open(file2, "rb") as f2:
while True:
chunk1 = f1.read(4096)
chunk2 = f2.read(4096)
if chunk1 != chunk2:
return False
if not chunk1: # End of file
return True
def remove_duplicates(fucked_dir, source_files):
"""Remove files in 'fucked' that are identical to those in 'source_files'."""
for root, _, files in os.walk(fucked_dir):
for file in files:
file_path = os.path.join(root, file)
for source_file in source_files:
if files_are_identical(file_path, source_file):
print(f"Duplicate found. Removing: {file_path}")
os.remove(file_path)
break
def main():
print("Scanning source directory for hashes...")
source_hashes = get_file_hashes(source_dir)
print("Scanning 'fucked' directory for duplicates...")
remove_duplicates(fucked_dir, source_hashes)
print("Cleanup complete.")
if __name__ == "__main__":
main()

@ -1,49 +0,0 @@
import json, os
from videohash import VideoHash
from moviepy.editor import VideoFileClip
def is_valid_video(file_path):
try:
with VideoFileClip(file_path) as video:
return True
except Exception as e:
print(f"Invalid video {file_path}: {str(e)}")
return False
def load_hashes(file_path):
try:
with open(file_path, 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def save_hashes(hashes, file_path):
with open(file_path, 'w') as file:
json.dump(hashes, file, indent=4)
hashes = load_hashes('video_hashes.json')
video_directory = 'STORAGE'
for username in os.listdir(video_directory):
user_dir = os.path.join(video_directory, username)
if not os.path.isdir(user_dir):
continue
for video_file in os.listdir(user_dir):
video_path = os.path.join(user_dir, video_file)
if not video_file.endswith(('.mp4', '.mkv', '.avi')) or not is_valid_video(video_path):
continue
if username in hashes and any(v[0] == video_file for v in hashes[username]):
continue
try:
video_hash = VideoHash(path=video_path)
if username in hashes:
hashes[username].append((video_file, video_hash.hash))
else:
hashes[username] = [(video_file, video_hash.hash)]
except Exception as e:
print(f"Error processing {video_file}: {str(e)}")
save_hashes(hashes, 'video_hashes.json')

@ -1,17 +0,0 @@
import os, config, funcs
db, cursor = config.gen_connection()
cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL")
phashes = set([x[0] for x in cursor.fetchall()])
files = funcs.get_files("check_if_exists")
for file in files:
image_phash = funcs.generate_phash(file)
if image_phash in phashes:
print(f"File {file} exists in the database")
os.remove(file)
funcs.cleanEmptyFolders("check_if_exists")

@ -1,159 +0,0 @@
from snapchat import get_data, get_stories, get_highlight_stories
from datetime import datetime
import requests
import config
import json
import os
"""
media_url_filename = url.split('/')[-1].split('?')[0]
etag = response.headers.get('ETag', '').replace('"', '')
filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
filepath = os.path.join(directory, 'highlights', filename)
"""
directory = "snapchat"
data_directory = "data"
def get_existing_snap_ids(directory):
existing_snap_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
snap_id = filename.split('~')[2]
existing_snap_ids.add(snap_id)
return existing_snap_ids
def find_duplicate_snap(existing_snaps, snap_id, username):
for snap in existing_snaps:
if username == snap[2]:
if snap_id in snap[1]:
return snap
return False
def archive_data(data, username):
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data))
print(f"Archived data for {username} at {data_filepath}")
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def extract_file_type(url):
file_types = {
'400': '.jpg',
'1322': '.mp4',
'1325': '.mp4',
'1034': '.mp4',
'1023': '.jpg'
}
base_url = url.split("?")[0] # Remove query string
snap_data = base_url.split('/')[-1]
# Extract the file type number
data_parts = snap_data.split('.')
if len(data_parts) > 1:
file_type_number = data_parts[1]
if file_type_number in file_types:
return file_types[file_type_number]
else:
print(f"Unexpected URL format: {base_url}")
return None
def download_media(url, filepath):
if os.path.exists(filepath):
print(f"File {filepath} already exists. Skipping download.")
return filepath
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to download media {url}")
return None
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
def main():
if not os.path.exists(directory):
os.makedirs(directory)
db, cursor = config.gen_connection()
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_medias = cursor.fetchall()
existing_snap_ids = get_existing_snap_ids(directory)
for username in usernames:
print(f"Getting stories for {username}...")
data = get_data(username)
if not data:
continue
archive_data(data, username)
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
if duplicate_snap:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Check if media already exists
if snap_id in existing_snap_ids:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Determine file extension using HEAD request.
# TODO: find a better way to determine file extension without downloading the file.
extension = extract_file_type(url)
if not extension:
continue
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File {filename} already exists. Skipping download.")
continue
# Download the media
filepath = download_media(url, filepath)
print(f"Downloaded {filename} at {timestamp}")
if __name__ == "__main__":
main()

@ -1,257 +0,0 @@
from concurrent.futures import ThreadPoolExecutor
from BunnyCDN.Storage import Storage
from instagrapi import Client
import requests
import config
import json
import os
from PIL import Image
import cv2
import getpass
import time
import hashlib
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
}
def login():
client = Client()
try:
client.load_settings("session_data.json")
except (FileNotFoundError, json.JSONDecodeError):
username = input("Enter your Instagram username: ")
password = getpass.getpass("Enter your Instagram password: ")
auth = input("Enter your 2FA code (leave blank if not enabled): ")
if auth:
client.login(username=username, password=password, verification_code=auth)
else:
client.login(username, password)
client.dump_settings("session_data.json")
return client
def get_media_details(media_item):
mediaTypes = {1: 'image', 2: 'video', 8: 'album'}
try:taken_at = media_item.taken_at
except:taken_at = None
try:post_type = media_item.product_type
except:post_type = None
mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]}
if media_item.media_type == 1: # Image
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['media_url'] = media_item.thumbnail_url
mediaInfo['filename'] = f"{media_item.pk}.jpg"
elif media_item.media_type == 2: # Video
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['media_url'] = media_item.video_url
try:mediaInfo['duration'] = media_item.video_duration
except:mediaInfo['duration'] = 0
mediaInfo['filename'] = f"{media_item.pk}.mp4"
else:
print(f"Unsupported media type with ID {media_item.pk}")
return None
return mediaInfo
def download_media(mediaInfo, save_dir, attempts=5):
try:
save_path = os.path.join(save_dir, mediaInfo['filename'])
if not os.path.exists(os.path.dirname(save_path)):
os.makedirs(os.path.dirname(save_path))
response = requests.get(mediaInfo['media_url'], stream=True, proxies=proxies)
response.raise_for_status()
with open(save_path, 'wb') as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {save_path}")
if mediaInfo['media_type'] == 'image':
with Image.open(save_path) as img:
mediaInfo['width'], mediaInfo['height'] = img.size
else:
mediaInfo['width'], mediaInfo['height'] = get_video_dimensions(save_path)
server_path = os.path.join('users', save_dir, mediaInfo['filename'])
upload_to_storage(save_path, server_path)
mediaInfo['server_url'] = f"https://storysave.b-cdn.net/{server_path}"
add_media_to_db(mediaInfo)
os.remove(save_path)
except Exception as e:
if attempts > 0:
print(f"Error when processing {mediaInfo['media_url']}. Error: {e}. Retrying...")
download_media(mediaInfo['media_url'], save_dir, mediaInfo['filename'], attempts-1)
else:
print(f"Unexpected error when processing {mediaInfo['media_url']}. Error: {e}")
def upload_to_storage(local_path, server_path):
try:
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage.PutFile(local_path, server_path)
print(f"Uploaded {local_path} to https://storysave.b-cdn.net/{server_path}")
except Exception as e:
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def add_media_to_db(mediaInfo):
media_id = mediaInfo['media_id']
user_id = mediaInfo['user_id']
username = mediaInfo['username']
date = mediaInfo['taken_at']
media_type = mediaInfo['media_type']
post_type = mediaInfo['post_type']
duration = mediaInfo.get('duration', 0)
media_url = mediaInfo['server_url']
width = mediaInfo['width']
height = mediaInfo['height']
try:
db, cursor = config.gen_connection()
query = """
INSERT INTO media (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(query, (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id))
db.commit()
print(f"Added media for {username} to the database.")
except Exception as e:
print(f"Failed to add media for {username} to the database. Error: {e}")
def insert_highlight_items(media_ids, highlight_id, title, user_id):
try:
db, cursor = config.gen_connection()
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
cursor.executemany(query, values)
db.commit()
if cursor.rowcount > 0:
print(f"Added {cursor.rowcount} highlight items to the database.")
except Exception as e:
print(f"Failed to add highlight items to the database. Error: {e}")
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
if __name__ == '__main__':
client = login()
client.set_proxy(proxies['https'])
db, cursor = config.gen_connection()
cursor.execute("SELECT instagram_username, instagram_user_id, last_checked FROM following ORDER BY id DESC;")
following = cursor.fetchall()
cursor.execute("SELECT media_id FROM media;")
existing_files = [media[0] for media in cursor.fetchall()]
with ThreadPoolExecutor(max_workers=10) as executor:
for user in following:
try:
firstImport = False
username, user_id, lastchecked = user
lastchecked = int(lastchecked.timestamp()) if lastchecked else None
lastchecked = time.time() - lastchecked
if not user_id:
firstImport = True
user_id = client.user_id_from_username(username)
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
db.commit()
print(f"Updated user ID for {username} to {user_id}")
profile_pic = client.user_info(user_id).profile_pic_url_hd
download_media({'media_url': profile_pic, 'filename': f"{user_id}.jpg"}, os.path.join('profile_pics', username))
print(f"[{username}]\nChecking: Stories")
if lastchecked > 3600:
stories = client.user_stories(user_id)
else:
stories = []
if firstImport:
highlights = client.user_highlights(user_id) # API request
for highlight in highlights:
try:
highlight_items = client.highlight_info_v1(highlight.pk).items # API request
except:
print(f"Failed to get highlight items for {highlight.pk}")
time.sleep(5)
media_ids = [item.pk for item in highlight_items]
executor.submit(insert_highlight_items, media_ids, highlight.pk, highlight.title, user_id)
stories.extend(highlight_items)
newStoryCount = 0
for story in stories:
mediaInfo = get_media_details(story)
if mediaInfo['media_id'] in existing_files:
continue
newStoryCount += 1
mediaInfo['user_id'] = user_id
mediaInfo['username'] = username
mediaInfo['post_type'] = 'story'
if mediaInfo['media_url'] and mediaInfo['filename']:
filePath = os.path.join('media', 'stories', username)
download_media(mediaInfo, filePath, mediaInfo['filename'])
print("Checking: Posts")
if lastchecked > 3600:
medias = client.user_medias(user_id, 9) # API request
else:
medias = []
posts = []
for post in medias:
if post.media_type == 8:
for item in post.resources:
posts.append(item)
continue
posts.append(post)
newPostsCount = 0
for post in posts:
mediaInfo = get_media_details(post)
if mediaInfo['media_id'] in existing_files:
continue
newPostsCount += 1
mediaInfo['user_id'] = user_id
mediaInfo['username'] = username
mediaInfo['post_type'] = 'post'
if mediaInfo['media_url'] and mediaInfo['filename']:
filePath = os.path.join('media', 'posts', username)
download_media(mediaInfo, filePath, mediaInfo['filename'])
if newStoryCount > 0 or newPostsCount > 0:
cursor.execute("UPDATE following SET last_checked = NOW() WHERE instagram_username = %s;", (username,))
db.commit()
print(f"New stories: {newStoryCount}\tNew Posts: {newPostsCount}")
print("=====================================")
time.sleep(5)
except:
print(f"Failed to get stories for {username}")
time.sleep(5)

@ -1,34 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Image Gallery</title>
<style>
.gallery {
display: flex;
flex-wrap: wrap;
}
.gallery img {
margin: 10px;
max-width: 200px;
height: auto;
}
.gallery div {
text-align: center;
margin: 10px;
}
</style>
</head>
<body>
<h1>Image Gallery</h1>
<div class="gallery">
{% for image in images %}
<div>
<h3>{{ image['username'] }}</h3>
<img src="{{ image['media_url'] }}" alt="Image for {{ image['username'] }}">
</div>
{% endfor %}
</div>
</body>
</html>

@ -1,84 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Media Gallery</title>
<style>
body {
display: flex;
justify-content: center;
}
.container {
max-width: 1600px;
width: 100%;
padding: 20px;
}
.media-container {
column-count: 4;
column-gap: 10px;
}
.media-item {
break-inside: avoid;
margin-bottom: 10px;
}
img, video {
width: 100%;
height: auto;
display: block;
}
</style>
</head>
<body>
<div class="container">
<h1>Media Gallery</h1>
<div class="media-container" id="media-container"></div>
</div>
<script>
let page = 0;
async function loadMore() {
const response = await fetch(`/load-more?page=${page}`);
const mediaFiles = await response.json();
const container = document.getElementById('media-container');
mediaFiles.forEach(file => {
const mediaItem = document.createElement('div');
mediaItem.className = 'media-item';
if (file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.gif')) {
const img = document.createElement('img');
img.src = `/media/${file}`;
img.alt = file;
mediaItem.appendChild(img);
} else if (file.endsWith('.mp4') || file.endsWith('.mkv') || file.endsWith('.mov')) {
const video = document.createElement('video');
video.controls = false;
video.autoplay = true;
video.muted = true;
video.loop = true;
const source = document.createElement('source');
source.src = `/media/${file}`;
source.type = 'video/mp4';
video.appendChild(source);
mediaItem.appendChild(video);
}
container.appendChild(mediaItem);
});
page += 1;
}
window.addEventListener('scroll', () => {
if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
loadMore();
}
});
// Initial load
loadMore();
</script>
</body>
</html>

@ -1,70 +0,0 @@
import os, requests, config
from snapchat import get_data, get_stories, get_highlight_stories
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
directory = "snapchat"
def get_existing_media_ids(directory):
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
existing_media_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
media_id = filename.split('~')[2]
existing_media_ids.add(media_id)
return existing_media_ids
def get_media_id(url):
return url.split('/')[-1].split('?')[0].split('.')[0]
def find_duplicate_snap(existing_snaps, snap_id):
for snap in existing_snaps:
if snap_id in snap[1]:
return snap
return False
def main():
if not os.path.exists(directory):
os.makedirs(directory)
db, cursor = config.gen_connection()
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' AND snap_id IS NULL")
existing_medias = cursor.fetchall()
existing_media_ids = get_existing_media_ids(directory)
for username in usernames:
print(f"Getting stories for {username}...")
data = get_data(username)
if not data:
continue
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
media_id = story['media_id']
url = story['url']
timestamp = story['timestamp']
snap_id = get_media_id(url)
duplicate_snap = find_duplicate_snap(existing_medias, snap_id)
if duplicate_snap:
snap_id = get_media_id(url)
cursor.execute("UPDATE media SET snap_id = %s WHERE id = %s", (snap_id, duplicate_snap[0]))
db.commit()
print(f"{cursor.rowcount} Media {snap_id} updated.")
continue
if __name__ == "__main__":
main()

@ -1,32 +0,0 @@
from flask import Flask, render_template, send_from_directory, jsonify, request
import os
app = Flask(__name__)
media_dir = 'storysaver'
MEDIA_PER_PAGE = 20
def get_media_files(start, count):
media_files = []
for root, dirs, files in os.walk(media_dir):
for filename in files:
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.mp4', '.mkv', '.mov')):
file_path = os.path.relpath(os.path.join(root, filename), media_dir)
media_files.append(file_path)
return media_files[start:start + count]
@app.route('/')
def index():
return render_template('index.html')
@app.route('/media/<path:filename>')
def media(filename):
return send_from_directory(media_dir, filename)
@app.route('/load-more')
def load_more():
page = int(request.args.get('page', 0))
media_files = get_media_files(page * MEDIA_PER_PAGE, MEDIA_PER_PAGE)
return jsonify(media_files)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

@ -1,133 +0,0 @@
from BunnyCDN.Storage import Storage
from PIL import Image
import os, uuid, cv2, config
import hashlib
from moviepy.editor import VideoFileClip
def scan_dupes(folder_path):
newCursor.execute("SELECT hash FROM media")
existing_files = [image[0] for image in newCursor.fetchall()]
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
folder_path = os.path.join(root, folder)
for filename in os.listdir(folder_path):
media_id = filename.replace('.mp4', '').replace('.jpg', '')
filepath = os.path.join(folder_path, filename)
if media_id:
fileHash = calculate_file_hash(filepath)
if fileHash in existing_files:
print(f'Duplicate')
os.remove(filepath)
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def upload_file(filepath, username, media_type='image', post_type = 'story'):
filename = os.path.basename(filepath)
file_extension = filename.split('.')[-1]
dirtype = 'stories' if post_type == 'story' else 'posts'
#dirtype = 'profile'
fileHash = calculate_file_hash(filepath)
try:
if int(media_id) in existing_files:
print(f'Duplicate')
os.remove(filepath)
return True
except: media_id = uuid.uuid4().hex
server_path = f'users/{dirtype}/{username}/{media_id}.{file_extension}'
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
duration = 0
if media_type == 'image':
try:
with Image.open(filepath) as img:
width, height = img.size
except:
os.remove(filepath)
return
else:
width, height = get_video_dimensions(filepath)
duration = get_video_duration(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, hash, filename, media_id, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, fileHash, filename, media_id, duration)
newCursor.execute(query, values)
newDB.commit()
os.remove(filepath)
print(f'[{newCursor.rowcount}]{os.path.basename(filepath)} {file_url}')
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
with VideoFileClip(file_path) as video:
return video.duration
def get_media_type(filename):
if filename.lower().endswith(".jpg") or filename.lower().endswith(".webp") or filename.lower().endswith(".jpeg") or filename.lower().endswith(".png") or filename.lower().endswith(".gif"):
return 'image'
if filename.lower().endswith(".mp4") or filename.lower().endswith(".mov"):
return 'video'
def dump_instagram(folder_path):
for root, dirs, files in os.walk(folder_path):
for folder in dirs:
username = folder
folder_path = os.path.join(root, folder)
post_type = 'post' if 'post' in folder_path.lower() else 'story'
for filename in os.listdir(folder_path):
filepath = os.path.join(folder_path, filename)
mediatype = get_media_type(filename)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = 0
while chunk != b'':
chunk = file.read(8192)
h.update(chunk)
return h.hexdigest()
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
storiesPath = 'StorySave/'
dump_instagram(storiesPath)
print("Processing completed.")
Loading…
Cancel
Save