main
oscar 11 months ago
parent 73889be10e
commit 89c8e35e3b

17
.gitignore vendored

@ -4,4 +4,19 @@ facebook/
media/
cache/
temp/
*.pyc
*.pyc
/old_altpins_cache
/__pycache__
/STORAGE_IMPORTED
/STREAM_VIDEOS
/STREAM_VIDEOS_IMPORTED
/STORAGE
/other
/Sort
*.pyc
/images
/sortlater
/videos
/duplicates
/ready_to_upload
/archive

@ -1,20 +1,8 @@
import mysql.connector
altpins_username = "xantorn"
altpins_password = "AVNS_lGiLOVTTyGMtoOoRn5Q"
altpins_host = "archivebate-db-do-user-13308724-0.b.db.ondigitalocean.com"
altpins_port = 25060
altpins_database = "altpins"
altpins_sslmode = "REQUIRED"
def altpins_gen_connection():
print("Connecting to database")
newDB = mysql.connector.connect(host=altpins_host, user=altpins_username, password=altpins_password, database=altpins_database, port=altpins_port)
print("Connected to database")
return newDB, newDB.cursor()
from BunnyCDN.Storage import Storage
username = "doadmin"
password = "AVNS_KNXK1IjScgTCe09gI9F"
password = "AVNS_2qeFJuiGRpBQXkJjlA6"
host = "storysave-do-user-13308724-0.c.db.ondigitalocean.com"
port = 25060
database = "storysave"
@ -25,3 +13,6 @@ def gen_connection():
newDB = mysql.connector.connect(host=host, user=username, password=password, database=database, port=port)
print("Connected to database")
return newDB, newDB.cursor()
def get_storage():
return Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')

@ -0,0 +1,12 @@
import os
def remove_empty_folders(folder):
for root, dirs, files in os.walk(folder):
for dir in dirs:
dirpath = os.path.join(root, dir)
if not os.listdir(dirpath):
print(f"Removing empty folder {dirpath}")
os.rmdir(dirpath)
folder = 'media'
remove_empty_folders(folder)

@ -0,0 +1,85 @@
import os
import config
import cv2
from funcs import get_files # Assuming this is defined elsewhere
import imagehash
from PIL import Image
def generate_thumbnail_phash(filepath, hash_size=8): # Set hash_size to 8
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cap.release()
if not ret:
print(f"Error reading frame from {filepath}")
return None
# Resize frame to a standard size
standard_size = (320, 240)
resized_frame = cv2.resize(frame, standard_size, interpolation=cv2.INTER_AREA)
# Convert OpenCV image (BGR) to PIL Image (RGB)
image_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image_rgb)
# Compute pHash
phash = imagehash.phash(pil_image, hash_size=hash_size)
return phash
def are_phashes_duplicates(phash1, phash2, threshold=5):
# Compute Hamming distance between the pHashes
try:
distance = phash1 - phash2
except TypeError as e:
print(f"Error comparing pHashes: {e}")
return False
return distance <= threshold
def get_media_by_phash(phash, username, existing_medias, threshold=5):
for media in existing_medias:
existing_phash_str = media[1]
existing_username = media[2]
if existing_username != username:
continue
# Convert stored phash string to ImageHash object
existing_phash = imagehash.hex_to_hash(existing_phash_str)
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
return media
return None
# Database connection
db, cursor = config.gen_connection()
# Fetch existing videos with pHashes
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
existing_medias = cursor.fetchall()
users = os.listdir('videos')
for username in users:
user_videos_path = os.path.join('videos', username)
if not os.path.isdir(user_videos_path):
continue
videos = os.listdir(user_videos_path)
for video in videos:
print(f'Processing {video}...')
filepath = os.path.join(user_videos_path, video)
phash = generate_thumbnail_phash(filepath, hash_size=8) # Use hash_size=8
if phash is None:
continue
phash_str = str(phash)
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
if duplicate_media:
print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
print(f'Duplicate video path: {filepath}')
newpath = filepath.replace('videos', 'duplicates')
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(filepath, newpath)
print(f'Moved {video} to duplicates/')

@ -0,0 +1,81 @@
import os
import config
import imagehash
from PIL import Image
from funcs import get_files # Assuming this is defined elsewhere
def generate_image_phash(filepath, hash_size=8):
try:
# Open the image using PIL
pil_image = Image.open(filepath)
# Compute pHash using the imagehash library
phash = imagehash.phash(pil_image, hash_size=hash_size)
return phash
except Exception as e:
print(f"Error processing image {filepath}: {e}")
return None
def are_phashes_duplicates(phash1, phash2, threshold=5):
try:
# Compute the Hamming distance between the pHashes
distance = phash1 - phash2
return distance <= threshold
except TypeError as e:
print(f"Error comparing pHashes: {e}")
return False
def get_media_by_phash(phash, username, existing_medias, threshold=6):
for media in existing_medias:
existing_phash_str = media[1]
existing_username = media[2]
if existing_username != username:
continue # Only compare with the same user's media
# Convert stored pHash string to ImageHash object
existing_phash = imagehash.hex_to_hash(existing_phash_str)
# Check if the current pHash is a duplicate
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
return media
return None
# Database connection
db, cursor = config.gen_connection()
# Fetch existing media with pHashes (assuming media are images, adjust media_type if needed)
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['image'])
existing_medias = cursor.fetchall()
# Go through the 'sorted' folder where each subfolder is a username
users = os.listdir('sorted')
for username in users:
user_images_path = os.path.join('sorted', username)
if not os.path.isdir(user_images_path):
continue # Skip non-directory files
# Get all images for the current user
images = get_files(user_images_path) # Assuming this gets all image files
for filepath in images:
image_filename = os.path.basename(filepath)
print(f'Processing {image_filename}...')
# Generate pHash for the image
phash = generate_image_phash(filepath, hash_size=8)
if phash is None:
continue # Skip this image if there's an issue
phash_str = str(phash)
# Check if the image is a duplicate of any in the database
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
if duplicate_media:
print(f'Duplicate found: https://altpins.com/pin/{duplicate_media[0]}')
print(f'Duplicate image path: {filepath}')
newpath = filepath.replace('sorted', 'duplicates')
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(filepath, newpath)
print(f'Moved {image_filename} to duplicates/')

@ -0,0 +1,76 @@
import cv2, os
import imagehash
from PIL import Image
from funcs import get_files
def is_static_video_phash_optimized(video_path, frame_sample_rate=30, hash_size=16, hamming_threshold=1):
"""
Determines if a video is static using perceptual hashing (pHash) by comparing consecutive frames.
Parameters:
- video_path: Path to the video file.
- frame_sample_rate: Number of frames to skip between comparisons.
- hash_size: Size of the hash; larger values increase sensitivity.
- hamming_threshold: Maximum Hamming distance between consecutive frames to consider the video static.
Returns:
- True if the video is static, False otherwise.
"""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print("Error: Cannot open video file.")
return False
ret, frame = cap.read()
if not ret:
print("Error: Cannot read video frames.")
cap.release()
return False
# Convert first frame to PIL Image and compute hash
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(frame_rgb)
previous_hash = imagehash.phash(pil_image, hash_size=hash_size)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
is_static = True
current_frame_number = 1
while True:
# Skip frames according to the sample rate
cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_number)
ret, frame = cap.read()
if not ret:
break
# Convert frame to PIL Image and compute hash
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(frame_rgb)
current_hash = imagehash.phash(pil_image, hash_size=hash_size)
# Compute Hamming distance between hashes
hamming_distance = previous_hash - current_hash
if hamming_distance > hamming_threshold:
is_static = False
break
# Update the previous hash
previous_hash = current_hash
# Move to the next frame according to the sample rate
current_frame_number += frame_sample_rate
cap.release()
return is_static
directory = 'videos'
files = get_files(directory)
for video_file in files:
if video_file.endswith('.mp4'):
if is_static_video_phash_optimized(video_file):
print("The video is static: " + video_file)

@ -0,0 +1,40 @@
import config, os, json
from PIL import Image
import imagehash
def find_file(filename, directory):
filename = filename.lower().split('.')[0]
for root, dirs, files in os.walk(directory):
for file in files:
if filename in file:
return os.path.join(root, file)
return None
def generate_phash(image_path):
image = Image.open(image_path)
return str(imagehash.phash(image))
count = 0
cacheDir = 'sorted'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
filepath = item['filepath']
if os.path.exists(filepath):
continue
newfilepath = find_file(os.path.basename(filepath), cacheDir)
if newfilepath:
print(f"Found file {newfilepath} for {filepath}")
item['filepath'] = newfilepath
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -1,22 +1,124 @@
from moviepy.editor import VideoFileClip
import os, cv2, hashlib
import os, cv2, hashlib, requests
from PIL import Image
import numpy as np
import imagehash
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"}
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def generate_phash(image_path):
try:
image = Image.open(image_path)
return str(imagehash.phash(image))
except:
return False
def cleanEmptyFolders(path):
for root, dirs, fs in os.walk(path):
for d in dirs:
cleanEmptyFolders(os.path.join(root, d))
if not os.listdir(root):
os.rmdir(root)
def get_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
import cv2
import numpy as np
def compare_images(image_path1, image_path2):
# Load the images in grayscale
img1 = cv2.imread(image_path1, cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread(image_path2, cv2.IMREAD_GRAYSCALE)
if img1 is None or img2 is None:
print("Error loading images!")
return False # Or you could raise an exception
# Initialize SIFT detector
sift = cv2.SIFT_create()
# Find keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
# Check if descriptors are None
if des1 is None or des2 is None:
return False
# FLANN parameters
index_params = dict(algorithm=1, trees=5)
search_params = dict(checks=50)
# FLANN based matcher
flann = cv2.FlannBasedMatcher(index_params, search_params)
# Matching descriptor vectors using KNN algorithm
matches = flann.knnMatch(des1, des2, k=2)
# Apply ratio test
good = []
for m, n in matches:
if m.distance < 0.6 * n.distance: # More stringent ratio
good.append(m)
# Minimum number of matches
MIN_MATCH_COUNT = 15 # Adjust this threshold as needed
if len(good) > MIN_MATCH_COUNT:
# Extract location of good matches
src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
# Find homography
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matchesMask = mask.ravel().tolist()
if np.sum(matchesMask) > 10: # Check if enough points agree on homography
return True
else:
return False
else:
return False
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers)
response.raise_for_status()
directory = os.path.dirname(filePath)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filePath, "wb") as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {filePath}")
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
def determine_post_type(filepath, mediatype):
if mediatype == 'image':
with Image.open(filepath) as img:
width, height = img.size
try:
with Image.open(filepath) as img:
width, height = img.size
except:
print(f"Error opening image {filepath}")
return False
elif mediatype == 'video':
width, height = get_video_dimensions(filepath)
else:
return False
if 0 in (width, height):
return False
aspect_ratio = width / height
if aspect_ratio > 0.5 and aspect_ratio < 0.6:
return 'stories'
@ -43,6 +145,24 @@ def get_video_duration(file_path):
except Exception as e:
print(f"Error getting duration for {file_path}: {e}")
return 0
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
def get_video_data(video_path):
data = {'duration': 0, 'width': 0, 'height': 0}
try:
with VideoFileClip(video_path) as video:
data['duration'] = video.duration
data['width'] = video.size[0]
data['height'] = video.size[1]
except Exception as e:
print(f"Error getting video data for {video_path}: {e}")
return data
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
@ -51,4 +171,4 @@ def calculate_file_hash(file_path, hash_func='sha256'):
while chunk:
h.update(chunk)
chunk = file.read(8192)
return h.hexdigest()
return h.hexdigest()

@ -0,0 +1,141 @@
import requests, hashlib, os
access_key = "471cd2e1-a943-4c61-ae69ddc6c2c2-c36d-4737"
video_library_id = 125094
def create_video(title):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
payload = f"{{\"title\":\"{title}\"}}"
headers = {
"accept": "application/json",
"content-type": "application/*+json",
"AccessKey": access_key
}
response = requests.post(url, data=payload, headers=headers)
return response
def generate_signature(library_id, api_key, expiration_time, video_id):
signature = hashlib.sha256((library_id + api_key + str(expiration_time) + video_id).encode()).hexdigest()
return signature
def upload_video_process(file_path, video_id):
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {"accept": "application/json","AccessKey": access_key}
with open(file_path, "rb") as file:
file_data = file.read()
response = requests.put(url, headers=headers, data=file_data)
return response.status_code
def upload_video(file_path, title=None):
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(file_path, video_id)
return {
"embed_link": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8",
"animated_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp",
"default_thumbnail": f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg",
}
def upload_video_recurbate(videoInfo):
title = f"{videoInfo['username']} {videoInfo['platform']}"
video_item = create_video(title)
if video_item.status_code != 200:
return False
video_id = video_item.json()['guid']
upload_video_process(videoInfo['filename'], video_id)
videoInfo["embed_link"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/playlist.m3u8"
videoInfo["animated_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/preview.webp"
videoInfo["default_thumbnail"] = f"https://vz-58ca89f1-986.b-cdn.net/{video_id}/thumbnail.jpg"
return True
def delete_video(video_id):
video_id = video_id.replace('https://vz-58ca89f1-986.b-cdn.net/', '').replace('/playlist.m3u8', '')
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos/{video_id}"
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.delete(url, headers=headers)
return response.status_code
def list_videos():
url = f"https://video.bunnycdn.com/library/{video_library_id}/videos"
params = {
"page": 1,
"itemsPerPage": 1000,
"orderBy": "date"
}
headers = {"accept": "application/json","AccessKey": access_key}
videos = []
while True:
response = requests.get(url, headers=headers, params=params)
data = response.json()
videos += data['items']
if len(videos) == data['totalItems']:
return videos
params['page'] += 1
def get_heatmap(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId/heatmap"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.get(url, headers=headers).json()
return response
def get_video(video_id):
url = "https://video.bunnycdn.com/library/libraryId/videos/videoId"
url = url.replace('libraryId', str(video_library_id)).replace('videoId', str(video_id))
headers = {"accept": "application/json","AccessKey": access_key}
response = requests.get(url, headers=headers).json()
return response
def download_video(video_id, directory):
download_url = f'https://storage.bunnycdn.com/vz-dd4ea005-7c2/{video_id}/'
params = {'download': '','accessKey': '5b1766f7-c1ab-463f-b05cce6f1f2e-1190-4c09'}
video_response = requests.get(download_url, params=params)
if video_response.status_code == 200:
content_disposition = video_response.headers.get('Content-Disposition')
if content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
ext = filename.split('.')[-1]
filename = f'{video_id}.{ext}'
filePath = os.path.join(directory, filename)
with open(filePath, 'wb') as video_file:
video_file.write(video_response.content)
print(f'Video downloaded successfully as {filePath}')
else:
print('Failed to download video', video_response.status_code, video_response.text)

@ -0,0 +1,23 @@
import json
with open('bunny_data/missing_videos.json', 'r') as f:
missing_videos = json.load(f)
with open('bunny_data/allVideos.json', 'r') as f:
all_videos = json.load(f)
all_videos_guids = {video['guid'] for video in all_videos}
for video in missing_videos:
if video['guid'] in all_videos_guids:
video['imported'] = True
combined_data = {
"missing_videos": missing_videos,
"all_videos": all_videos
}
with open('bunny_data/combined_videos.json', 'w') as f:
json.dump(combined_data, f, indent=4)
print("Combined data has been written to bunny_data/combined_videos.json")

@ -0,0 +1,16 @@
import os, json
pins = open('db_pins.json', 'r')
pins = json.load(pins)
importedPins = open('db_pins_imported.json', 'r')
importedPins = json.load(importedPins)
allPins = pins + importedPins
print(len(allPins))
finalPins = open('allPins.json', 'r')
finalPins = json.load(finalPins)
print(len(finalPins))

@ -0,0 +1,110 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs, cv2
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for filename in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, filename)):
continue
username = filename.split("'")[0]
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
else: media_id = uuid.uuid4().hex
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
thumbnail_url = None
if media_type == 'video':
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
if post_type == 'stories':
post_type = 'story'
else:
post_type = 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration, thumbnail_url)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
except Exception as e:
print(f"Database error: {e}")
return False
try:
if newCursor.rowcount > 0:
os.remove(filepath)
except Exception as e:
print(f"Failed to remove local file {filepath}: {e}")
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('facebook/')
print("Processing completed.")

@ -0,0 +1,67 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
upload_file(username=username, filepath=filepath)
def upload_file(filepath, username):
filename = os.path.basename(filepath)
media_id = filename.split('.')[0]
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
if newCursor.rowcount > 0:
os.remove(filepath)
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('tiktok/')
print("Processing completed.")

@ -0,0 +1,38 @@
import os, json
def getMedia(filename, list):
for item in list:
if filename.split('.')[0] in item['filepath']:
return item
return None
data = json.loads(open('oldpins.json').read())
files = os.listdir('STORAGE')
count = 0
for file in files:
filepath = f'STORAGE/{file}'
if os.path.isdir(filepath):
continue
media = getMedia(file, data)
if not media:
continue
username = media['title']
filetype = media['type']
filetype = 'jpg' if filetype == 'image' else 'mp4'
filename = media['filepath'].split('/')[-1] + '.' + filetype
output = os.path.join('STORAGE', username, filename)
os.makedirs(os.path.dirname(output), exist_ok=True)
if os.path.exists(output):
os.remove(output)
output = os.path.join('STORAGE', username, file)
os.rename(filepath, output)
count += 1
print(f'File: {file}')
print(f'Total: {count}')

@ -0,0 +1,45 @@
import funcs, json, os, config
db, newCursor = config.gen_connection()
newCursor.execute("SELECT hash FROM media")
hashes = [hash[0] for hash in newCursor.fetchall()]
file = 'bunnyVideos.json'
data = json.loads(open(file).read())
for media in data:
if media['imported'] == True:
if os.path.exists(media['filepath']):
print(f'File {media["filepath"]} does not exist. Skipping...')
continue
countImported = 0
countSkipped = 0
for media in data:
filepath = os.path.join('STREAM_VIDEOS_IMPORTED', media['guid'] + '.mp4')
if media['imported'] == True:
countImported += 1
print('File already imported. Skipping...')
continue
countSkipped += 1
if not os.path.exists(filepath):
print(f'File {filepath} does not exist. Skipping...')
continue
hash = funcs.calculate_file_hash(filepath)
if '67caa15e-390c-4223-b7b9-4d7842f3b443' in filepath:
print(f'File {filepath} does not exist. Skipping...')
continue
if hash in hashes:
print('Duplicate file detected. Removing...')
print(f'Imported: {countImported}')
print(f'Skipped: {countSkipped}')

@ -0,0 +1,17 @@
from funcs import get_files, generate_phash
import os, config
db, cursor = config.gen_connection()
cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL;")
phashes = [x[0] for x in cursor.fetchall()]
files = get_files('images')
for item in files:
phash = generate_phash(item)
if phash in phashes:
print(item)
newpath = item.replace('images', 'duplicates')
newdir = os.path.dirname(newpath)
os.makedirs(newdir, exist_ok=True)
os.rename(item, newpath)

@ -0,0 +1,56 @@
from BunnyCDN.Storage import Storage
import os, config, requests
from moviepy.editor import VideoFileClip
def get_media_type(filename):
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
video_extensions = {".mp4", ".mov"}
extension = os.path.splitext(filename.lower())[1]
if extension in image_extensions:
return 'image'
elif extension in video_extensions:
return 'video'
else:
return 'unknown'
def determine_post_type(media_type):
# Assuming the post type is directly based on media type.
return media_type
def get_video_dimensions(filepath):
with VideoFileClip(filepath) as clip:
width, height = clip.size
return width, height
def download_file(url):
local_filename = url.split('/')[-1]
# Note: Stream=True to avoid loading the whole file into memory
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
if __name__ == '__main__':
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
posts = open('fucked', 'r')
for item in posts:
username, url = item.strip().split('~')
media_id = url.split('/')[-1].split('.')[0]
media_type = get_media_type(url)
query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
values = (username, media_type, 'facebook', url)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated.{url}')
except Exception as e:
print(f"Database error: {e}")
posts.close()

@ -0,0 +1,94 @@
from BunnyCDN.Storage import Storage
from moviepy.editor import VideoFileClip
import config
import hashlib
import requests
import os
def file_hash_from_url(url, hash_algo='sha256'):
h = hashlib.new(hash_algo)
response = requests.get(url, stream=True)
if response.status_code == 200:
for chunk in response.iter_content(8192):
h.update(chunk)
return h.hexdigest()
else:
raise Exception(f"Failed to download file: Status code {response.status_code}")
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
try:
with VideoFileClip(file_path) as video:
return video.duration
except:
return 0
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
# Create a hash object
h = hashlib.new(hash_algo)
# Open the file in binary mode and read in chunks
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
# Return the hexadecimal digest of the hash
return h.hexdigest()
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
duration = get_video_duration(localFilePath)
if duration == 0:
print(f"Failed to get duration for {localFilePath}")
continue
if duration < 1:
duration = 1
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")

@ -0,0 +1,47 @@
from BunnyCDN.Storage import Storage
import config
import hashlib
import os
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
h = hashlib.new(hash_algo)
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
if not os.path.exists(localFilePath):
obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
filehash = file_hash(localFilePath)
cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")

@ -0,0 +1,41 @@
import config, os
from PIL import Image
import imagehash
def generate_phash(image_path):
image = Image.open(image_path)
return str(imagehash.phash(image))
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist, skipping.")
continue
try:
phash = generate_phash(localFilePath)
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
db.commit()
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
except Exception as e:
print(f"Error processing {mediaID}: {e}")

@ -0,0 +1,47 @@
from BunnyCDN.Storage import Storage
import config, os, funcs
from PIL import Image
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
mediaType = funcs.get_media_type(localFilePath)
if mediaType == 'image':
with Image.open(localFilePath) as img:
width, height = img.size
elif mediaType == 'video':
width, height = funcs.get_video_dimensions(localFilePath)
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
db.commit()
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")

@ -0,0 +1,63 @@
from BunnyCDN.Storage import Storage
import config, os, cv2
from concurrent.futures import ThreadPoolExecutor
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias():
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
filePath = DownloadFile(serverPath, cacheDir)
cap = cv2.VideoCapture(localFilePath)
ret, frame = cap.read()
cv2.imwrite('thumbnail.jpg', frame)
cap.release()
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
db.commit()
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")

@ -0,0 +1,35 @@
from concurrent.futures import ThreadPoolExecutor
from BunnyCDN.Storage import Storage
import config, os
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias(results):
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
print(f"Found {len(results)} files to process.")
ImportMedias(results)

@ -0,0 +1,24 @@
import os, json
from funcs import generate_phash
count = 0
cacheDir = 'cache'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
if item['type'] == 'image':
filepath = item['filepath']
if not os.path.exists(filepath):
print(f"File {filepath} does not exist, skipping.")
continue
phash = generate_phash(filepath)
item['phash'] = phash
print(f"Processed {count}/{len(medias)}: with pHash {phash}")
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -0,0 +1,33 @@
import config
from funcs import generate_phash
count = 0
storage = config.get_storage()
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_url FROM media WHERE media_type = %s AND phash IS NULL;", ['image'])
medias = cursor.fetchall()
for item in medias:
count += 1
itemID = item[0]
media_url = item[1]
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
filepath = storage.DownloadFile(server_path, 'temp')
if not filepath:
print(f"Error downloading {server_path}")
continue
phash = generate_phash(filepath)
if not phash:
print(f"Error generating pHash for {filepath}")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
db.commit()
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")

@ -0,0 +1,33 @@
import config
from funcs import generate_phash
count = 0
storage = config.get_storage()
db, cursor = config.gen_connection()
cursor.execute("SELECT id, thumbnail FROM media WHERE media_type = %s AND phash IS NULL AND thumbnail IS NOT NULL;", ['video'])
medias = cursor.fetchall()
for item in medias:
count += 1
itemID = item[0]
media_url = item[1]
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
filepath = storage.DownloadFile(server_path, 'temp')
if not filepath:
print(f"Error downloading {server_path}")
continue
phash = generate_phash(filepath)
if not phash:
print(f"Error generating pHash for {filepath}")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
db.commit()
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")

@ -0,0 +1,24 @@
import config
altpins_db, altpins_cursor = config.altpins_gen_connection()
db, cursor = config.gen_connection()
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
common_items = set(altpins_results.keys()) & set(media_results.keys())
for title, hash_value in common_items:
altpins_id, altpins_url = altpins_results[(title, hash_value)]
media_id, media_url = media_results[(title, hash_value)]
print(f"Found a match for hash {hash_value} with title {title}")
print(f"Altpins URL: {altpins_url}")
print(f"Media URL: {media_url}")
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
altpins_db.commit()
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")

@ -0,0 +1,33 @@
import bunny, json
medias = json.load(open('videos.json', 'r'))
videoIDS = [media['url'].split('/')[-1] for media in medias]
videos = bunny.list_videos()
with open('allVideos.json', 'w') as f:
json.dump(videos, f, indent=4)
missingVideos = []
for video in videos:
if video['guid'] in videoIDS:
continue
missingVideos.append(video)
datas = []
for video in missingVideos:
data = {
'guid': video['guid'],
'title': video['title'],
'length': video['length'],
'width': video['width'],
'height': video['height'],
'availableResolutions': video['availableResolutions'],
'storageSize': video['storageSize'],
'hasMP4Fallback': video['hasMP4Fallback'],
'category': video['category'],
}
datas.append(data)
with open('missing_videos.json', 'w') as f:
json.dump(datas, f, indent=4)

@ -0,0 +1,27 @@
from BunnyCDN.Storage import Storage
import os, json
altpins_obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
medias = json.load(open('db_pins.json', 'r'))
count = 0
print(f"Found {len(medias)} files to process.")
cacheDir = 'old_altpins_cache'
for media in medias:
count += 1
username = media['title']
mediaID = media['photo_id']
mediaURL = media['url']
extension = mediaURL.split('.')[-1]
serverPath = mediaURL.replace("https://altpins.b-cdn.net/", '').replace('//', '/').replace('\\', '/').replace('https://altpins.b-cdn.net/', '')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
continue
altpins_obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {count}/{len(medias)}: {localFilePath}")

@ -0,0 +1,16 @@
import json, bunny, os
from concurrent.futures import ThreadPoolExecutor
medias = json.load(open('missing_videos.json', 'r'))
#videoIDS = [media['url'].split('/')[-1] for media in medias]
videoIDS = [media['guid'] for media in medias]
with ThreadPoolExecutor(max_workers=10) as executor:
for id in videoIDS:
filePath = f"MISSING_STREAM_VIDEOS/{id}.zip"
if os.path.exists(filePath):
print(f'Video already exists as {filePath}. Skipping...')
continue
executor.submit(bunny.download_video, id)

@ -0,0 +1,29 @@
import os, json, config
# Load the data
pins = json.load(open('db_pins.json', 'r'))
files = os.listdir('STORAGE_IMPORTED/')
db, cursor = config.gen_connection()
cursor.execute('SELECT hash FROM media WHERE hash IS NOT NULL;')
existing_hashes = [hash[0] for hash in cursor.fetchall()]
for pin in pins:
if pin['hash'] in existing_hashes:
print(f"Found {pin['hash']} in the imported folder.")
pins.remove(pin)
alreadyImported = []
for pin in pins:
filepath = pin['filepath']
username = pin['title']
filename = os.path.basename(filepath)
if filename in files:
print(f"Found {filename} in the imported folder.")
alreadyImported.append(pins.pop(pins.index(pin)))
# Save to the file
json.dump(pins, open('db_pins.json', 'w'))
json.dump(alreadyImported, open('db_pins_imported.json', 'w'))

@ -0,0 +1,14 @@
import os, json, bunny
medias = json.load(open('allVideos.json', 'r'))
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
missing = json.load(open('missing_videos.json', 'r'))
count = 0
cacheDir = 'old_mp4fallback_cache'
print(f"Found {len(medias)} files to process.")
for media in mp4Medias:
count += 1
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')

@ -0,0 +1,36 @@
import os, json, bunny, config
db, cursor = config.gen_connection()
cursor.execute('SELECT media_id FROM media WHERE media_id IS NOT NULL;')
mediaIDS = cursor.fetchall()
pins = json.load(open('pins.json', 'r'))
videos = json.load(open('db_videos.json', 'r'))
pins = json.load(open('db_pins.json', 'r'))
ids = [video['id'] for video in videos]
for pin in pins:
if pin['id'] in ids:
pins.remove(pin)
# save to the file
json.dump(pins, open('db_pins.json', 'w'))
medias = json.load(open('allVideos.json', 'r'))
mp4Medias = [media for media in medias if media['hasMP4Fallback'] == True]
missing = json.load(open('missing_videos.json', 'r'))
count = 0
cacheDir = 'old_mp4fallback_cache'
print(f"Found {len(medias)} files to process.")
for media in mp4Medias:
count += 1
filePath = os.path.join(cacheDir, media['guid'] + '.mp4')

@ -0,0 +1,53 @@
import os, json, funcs
STORAGE_IMPORTED = 'STORAGE_IMPORTED'
pins = json.load(open('db_pins.json', 'r'))
for pin in pins:
filename = pin['url'].split('/')[-1]
filepath = os.path.join(STORAGE_IMPORTED, filename)
pin['filename'] = filename
if not pin['hash']:
pin['hash'] = funcs.calculate_file_hash(filepath)
json.dump(pins, open('db_pins.json', 'w'), indent=4)
files = os.listdir(STORAGE_IMPORTED)
for file in files:
filepath = os.path.join(STORAGE_IMPORTED, file)
fileHash = funcs.calculate_file_hash(filepath)
if fileHash not in file:
print(f'Renaming {file} to {fileHash}')
os.rename(filepath, os.path.join(STORAGE_IMPORTED, fileHash))
pins_by_username = {}
for pin in pins:
username = pin['title']
if username not in pins_by_username:
pins_by_username[username] = []
pins_by_username[username].append(pin)
for username, username_pins in pins_by_username.items():
username_folder = os.path.join(STORAGE_IMPORTED, username)
os.makedirs(username_folder, exist_ok=True)
for pin in username_pins:
photo_id = pin['photo_id']
photo_url = pin['url']
fileHash = pin['hash']
if not fileHash:
continue
extension = photo_url.split('.')[-1]
filename = f'{fileHash}.{extension}'
filePath = os.path.join(STORAGE_IMPORTED, filename)
outputPath = os.path.join(STORAGE_IMPORTED, username, filename)
if os.path.exists(outputPath):
print(f'File {outputPath} already exists. Skipping...')
continue
print(f'Moving {photo_url} to {outputPath}')
os.rename(filePath, outputPath)

@ -0,0 +1,27 @@
import os, json
folderPath = 'STREAM_IMPORTED'
jsonFile = 'bunnyVideos.json'
data = json.load(open(jsonFile))
for item in data:
username = item['title']
filepath = os.path.join(folderPath, item['guid'] + '.mp4')
if username in filepath:
continue
username = item['title']
output = os.path.join(folderPath, username, os.path.basename(filepath))
os.makedirs(os.path.dirname(output), exist_ok=True)
if os.path.exists(filepath):
os.rename(filepath, output)
item['filepath'] = output
# save to fiel
with open(jsonFile, 'w') as f:
json.dump(data, f, indent=4)

@ -0,0 +1,49 @@
import json, os
from videohash import VideoHash
from moviepy.editor import VideoFileClip
def is_valid_video(file_path):
try:
with VideoFileClip(file_path) as video:
return True
except Exception as e:
print(f"Invalid video {file_path}: {str(e)}")
return False
def load_hashes(file_path):
try:
with open(file_path, 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def save_hashes(hashes, file_path):
with open(file_path, 'w') as file:
json.dump(hashes, file, indent=4)
hashes = load_hashes('video_hashes.json')
video_directory = 'STORAGE'
for username in os.listdir(video_directory):
user_dir = os.path.join(video_directory, username)
if not os.path.isdir(user_dir):
continue
for video_file in os.listdir(user_dir):
video_path = os.path.join(user_dir, video_file)
if not video_file.endswith(('.mp4', '.mkv', '.avi')) or not is_valid_video(video_path):
continue
if username in hashes and any(v[0] == video_file for v in hashes[username]):
continue
try:
video_hash = VideoHash(path=video_path)
if username in hashes:
hashes[username].append((video_file, video_hash.hash))
else:
hashes[username] = [(video_file, video_hash.hash)]
except Exception as e:
print(f"Error processing {video_file}: {str(e)}")
save_hashes(hashes, 'video_hashes.json')

@ -0,0 +1,44 @@
from moviepy.editor import VideoFileClip
import json
def is_valid_video(file_path):
try:
with VideoFileClip(file_path) as video:
return True
except Exception as e:
print(f"Invalid video {file_path}: {str(e)}")
return False
def load_hashes(file_path):
try:
with open(file_path, 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def save_hashes(hashes, file_path):
with open(file_path, 'w') as file:
json.dump(hashes, file, indent=4)
def find_duplicates(video_hashes):
hash_map = {}
for video, v_hash in video_hashes:
if v_hash in hash_map:
hash_map[v_hash].append(video)
else:
hash_map[v_hash] = [video]
duplicates = {h: vids for h, vids in hash_map.items() if len(vids) > 1}
return duplicates
hashes = load_hashes('video_hashes.json')
for username, user_hashes in hashes.items():
print(f"Checking for duplicates in '{username}' videos:")
duplicates = find_duplicates(user_hashes)
if duplicates:
for dup_hash, dup_videos in duplicates.items():
print(f"Duplicate hash: {dup_hash}")
for vid in dup_videos:
print(f" - {vid}")
else:
print("No duplicates found.")

@ -0,0 +1,48 @@
from videohash import VideoHash
import os
# Directory containing videos grouped by username
video_directory = '/path/to/videos'
hashes = {}
for username in os.listdir(video_directory):
user_dir = os.path.join(video_directory, username)
if os.path.isdir(user_dir):
for video_file in os.listdir(user_dir):
if video_file.endswith(('.mp4', '.mkv', '.avi')): # Ensure it's a video file
video_path = os.path.join(user_dir, video_file)
try:
# Calculate the hash for each video
video_hash = VideoHash(path=video_path)
print(f"Hash for {video_file}: {video_hash.hash}")
# Store hashes in a dictionary
if username in hashes:
hashes[username].append((video_file, video_hash.hash))
else:
hashes[username] = [(video_file, video_hash.hash)]
except Exception as e:
print(f"Error processing {video_file}: {str(e)}")
def find_duplicates(hashes):
duplicate_videos = []
all_hashes = [(user, video, hsh) for user, videos in hashes.items() for video, hsh in videos]
hash_dict = {}
for user, video, hsh in all_hashes:
if hsh in hash_dict:
hash_dict[hsh].append((user, video))
else:
hash_dict[hsh] = [(user, video)]
for videos in hash_dict.values():
if len(videos) > 1:
duplicate_videos.append(videos)
return duplicate_videos
duplicates = find_duplicates(hashes)
for duplicate in duplicates:
print("Duplicate videos found:")
for video_info in duplicate:
print(f"User: {video_info[0]}, Video: {video_info[1]}")

@ -0,0 +1,49 @@
import os, json
def get_file_type(filepath):
if filepath.endswith('.jpg') or filepath.endswith('.png'):
return 'image'
elif filepath.endswith('.mp4'):
return 'video'
else:
return None
def get_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
files = get_files('STORAGE/')
os.makedirs('images', exist_ok=True)
os.makedirs('videos', exist_ok=True)
for filepath in files:
if not os.path.exists(filepath):
print(f"File {filepath} does not exist, skipping.")
continue
# Extract the username from the filepath assuming the structure STORAGE/{username}/{filename}
filepath = filepath.replace('\\', '/') # Replace backslashes with forward slashes
parts = filepath.split('/') # Split the path by the system's separator
if len(parts) < 3 or parts[0] != 'STORAGE': # Check if the structure is valid
print(f"Unexpected filepath format: {filepath}")
continue
username = parts[1] # Extract the username from the second part
fileType = get_file_type(filepath) # Determine the type of the file
if not fileType:
print(f"Unknown file type for {filepath}")
continue
if fileType == 'image':
newpath = os.path.join('images', username, os.path.basename(filepath))
elif fileType == 'video':
newpath = os.path.join('videos', username, os.path.basename(filepath))
else:
print(f"Unknown media type {fileType} for {filepath}")
continue
os.makedirs(os.path.dirname(newpath), exist_ok=True) # Create directory structure if it doesn't exist
os.rename(filepath, newpath) # Move the file to the new location

@ -0,0 +1,34 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Image Gallery</title>
<style>
.gallery {
display: flex;
flex-wrap: wrap;
}
.gallery img {
margin: 10px;
max-width: 200px;
height: auto;
}
.gallery div {
text-align: center;
margin: 10px;
}
</style>
</head>
<body>
<h1>Image Gallery</h1>
<div class="gallery">
{% for image in images %}
<div>
<h3>{{ image['username'] }}</h3>
<img src="{{ image['media_url'] }}" alt="Image for {{ image['username'] }}">
</div>
{% endfor %}
</div>
</body>
</html>

@ -0,0 +1,84 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Media Gallery</title>
<style>
body {
display: flex;
justify-content: center;
}
.container {
max-width: 1600px;
width: 100%;
padding: 20px;
}
.media-container {
column-count: 4;
column-gap: 10px;
}
.media-item {
break-inside: avoid;
margin-bottom: 10px;
}
img, video {
width: 100%;
height: auto;
display: block;
}
</style>
</head>
<body>
<div class="container">
<h1>Media Gallery</h1>
<div class="media-container" id="media-container"></div>
</div>
<script>
let page = 0;
async function loadMore() {
const response = await fetch(`/load-more?page=${page}`);
const mediaFiles = await response.json();
const container = document.getElementById('media-container');
mediaFiles.forEach(file => {
const mediaItem = document.createElement('div');
mediaItem.className = 'media-item';
if (file.endsWith('.png') || file.endsWith('.jpg') || file.endsWith('.jpeg') || file.endsWith('.gif')) {
const img = document.createElement('img');
img.src = `/media/${file}`;
img.alt = file;
mediaItem.appendChild(img);
} else if (file.endsWith('.mp4') || file.endsWith('.mkv') || file.endsWith('.mov')) {
const video = document.createElement('video');
video.controls = false;
video.autoplay = true;
video.muted = true;
video.loop = true;
const source = document.createElement('source');
source.src = `/media/${file}`;
source.type = 'video/mp4';
video.appendChild(source);
mediaItem.appendChild(video);
}
container.appendChild(mediaItem);
});
page += 1;
}
window.addEventListener('scroll', () => {
if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
loadMore();
}
});
// Initial load
loadMore();
</script>
</body>
</html>

@ -0,0 +1,32 @@
from flask import Flask, render_template, send_from_directory, jsonify, request
import os
app = Flask(__name__)
media_dir = 'storysaver'
MEDIA_PER_PAGE = 20
def get_media_files(start, count):
media_files = []
for root, dirs, files in os.walk(media_dir):
for filename in files:
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.mp4', '.mkv', '.mov')):
file_path = os.path.relpath(os.path.join(root, filename), media_dir)
media_files.append(file_path)
return media_files[start:start + count]
@app.route('/')
def index():
return render_template('index.html')
@app.route('/media/<path:filename>')
def media(filename):
return send_from_directory(media_dir, filename)
@app.route('/load-more')
def load_more():
page = int(request.args.get('page', 0))
media_files = get_media_files(page * MEDIA_PER_PAGE, MEDIA_PER_PAGE)
return jsonify(media_files)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

@ -0,0 +1,26 @@
import json, requests
def findPost(filePath = 'test.json'):
params = {'av': '17841401225494803','__a': '1','__req': '1','__hs': '19906.HYP:instagram_web_pkg.2.1..0.1','dpr': '1','__ccg': 'UNKNOWN','__rev': '1014609539','__s': 'guk60j:651i2v:pmhu0r','__hsi': '7386834689999716220','__dyn': '7xe5WwlEnwn8K2Wmm1twpUnwgU7S6EdF8aUco38w5ux609vCwjE1xoswaq0yE6u0nS4oaEd86a3a1YwBgao1aU2swbOU2zxe2GewGw9a362W2K0zEnwhEe82mwww4cwJCwLyES1TwTwFwIwbS1LwTwKG1pg2Xwr86C1mwrd6goK3ibxKi2K7ErwYCz8rwHw','__csr': 'igAzIj5OgR5YBHdRtivbkyFv-zJIZE_ykzfahdAydeHCHAAAqyk4pqBgDzeV4-qlbBF29UlCxFpVokDwAyosyV9KWUmx6iu58WqdwSDCDAFwHxi3C00lWy2FG4k583NxW8yFE0bUyxd06lxO5C2a8yFm2u290ejg1JU2Gw2rQ061U','__comet_req': '7','fb_dtsg': 'NAcPDfX2XufdLkctek6zNxz3DWxPW4t-cJzz39QtOQ5KS-_Rq3erT4A:17843708194158284:1719013044','jazoest': '26262','lsd': 'D0zmaX16yIQu_GwDXKTbMc','__spin_r': '1014609539','__spin_b': 'trunk','__spin_t': '1719881474','__jssesw': '1','fb_api_caller_class': 'RelayModern','fb_api_req_friendly_name': 'PolarisProfilePageContentDirectQuery', 'variables': '{"id":"57771591453","render_surface":"PROFILE"}','server_timestamps': 'true','doc_id': '7663723823674585'}
data = requests.get('https://www.instagram.com/graphql/query')
posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
posts = [post['node'] for post in posts]
return max(posts, key=lambda post: max(c['width'] * c['height'] for c in post['image_versions2']['candidates']))
def getHDProfilePicture():
url = 'https://www.save-free.com/process'
zoom_data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'zoom'}
data = {'instagram_url': 'natahalieeee','type': 'profile','resource': 'save'}
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36','Referer' : 'https://www.save-free.com/profile-downloader/',}
response = requests.post(url, data=data, headers=headers)
response = requests.post(url, data=zoom_data, headers=headers)
with open('image.jpg', 'wb') as f:
f.write(response.content)

@ -0,0 +1,149 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
highlight_id = media['highlight_id']
thumbnail_url = None
phash = None
if media_id and int(media_id) in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, media_type)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
file_hash = funcs.calculate_file_hash(filepath)
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
newFilename = f'{media_id}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
if highlight_id:
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def get_user_id(username):
username = username.lower()
if username in existing_users:
return existing_users[username]
return None
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 4:
return False
username = parts[0]
timestamp = parts[1]
media_id = parts[2]
user_id = parts[3].split('_')[-1].split('.')[0]
highlight_id = user_id.replace('highlight', '') if 'highlight' in user_id else None
if highlight_id:
user_id = get_user_id(username)
try:
media_id = int(media_id)
except:
print(f'Invalid media_id for file {filename}. Skipping...')
media_id = None
data = {'username': username, 'timestamp': timestamp, 'media_id': media_id, 'user_id': user_id, 'filepath': filepath, 'highlight_id': highlight_id}
return data
def get_media(folder_path):
medias = []
for root, dirs, files in os.walk(folder_path):
for filename in files:
filepath = os.path.join(root, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram('storysaver/')
print("Processing completed.")

@ -0,0 +1,137 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
post_date = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
highlight_id = media['highlight_id']
post_type = media['post_type']
thumbnail_url = None
phash = None
if media_id and int(media_id) in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
if media_id:
newFilename = f'{media_id}{file_extension}'
else:
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
if highlight_id:
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def get_user_id(username):
username = username.lower()
if username in existing_users:
return existing_users[username]
return None
def get_media():
medias = []
post_types = {
'posts': 'post',
'stories': 'story',
'profile': 'profile',
}
for post_type in os.listdir('media'):
users = os.listdir(f'media/{post_type}')
for user in users:
user_path = f'media/{post_type}/{user}'
for filename in os.listdir(user_path):
data = {}
filepath = os.path.join(user_path, filename)
data['post_type'] = post_types[post_type]
data['username'] = user
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
if 'com.instagram.android__' in filename:
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
data['filepath'] = filepath
data['media_id'] = None
data['user_id'] = get_user_id(data['username'])
data['highlight_id'] = None
medias.append(data)
return medias
def dump_instagram():
medias = get_media()
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram()
print("Processing completed.")

@ -0,0 +1,36 @@
import os, shutil, time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class DownloadHandler(FileSystemEventHandler):
def process_file(self, file_path):
file = os.path.basename(file_path)
if 'crdownload' not in file and file.count('~') == 3:
print(f'Moving {file}...')
outputPath = os.path.join('storysaver', file)
try:
shutil.move(file_path, outputPath)
except Exception as e:
print(f'Failed to move file: {e}')
def on_created(self, event):
if not event.is_directory and 'crdownload' not in event.src_path:
self.process_file(event.src_path)
def on_moved(self, event):
if not event.is_directory and 'crdownload' not in event.dest_path:
self.process_file(event.dest_path)
if __name__ == "__main__":
downloadPath = os.path.join(os.path.expanduser('~'), 'Downloads')
event_handler = DownloadHandler()
observer = Observer()
observer.schedule(event_handler, downloadPath, recursive=False)
observer.start()
try:
while True:
time.sleep(1) # Add a 1-second sleep to reduce CPU usage
except KeyboardInterrupt:
observer.stop()
observer.join()

@ -50,9 +50,6 @@ def login(force=False):
else:
raise FileNotFoundError
except (FileNotFoundError, json.JSONDecodeError):
# username = input("Enter your Instagram username: ")
# password = getpass.getpass("Enter your Instagram password: ")
with open("p.enc", "rb") as encrypted_file:
encrypted_data = encrypted_file.read()

@ -1,30 +1,33 @@
import cv2, os, json, config, hashlib, requests
from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import VideoFileClip
from cryptography.fernet import Fernet
from BunnyCDN.Storage import Storage
from instagrapi import Client
from uuid import uuid4
from PIL import Image
import os, config, funcs
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/","https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"}
def file_hash(filename):
h = hashlib.new('sha256')
def insert_highlight_items(media_ids, highlight_id, title, user_id):
try:
db, cursor = config.gen_connection()
with open(filename, "rb") as file:
while chunk := file.read(8192):
h.update(chunk)
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
return h.hexdigest()
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
cursor.executemany(query, values)
db.commit()
if cursor.rowcount > 0:
print(f"Added {cursor.rowcount} highlight items to the database.")
except Exception as e:
print(f"Failed to add highlight items to the database. Error: {e}")
def get_video_duration(file_path):
def upload_to_storage(local_path, server_path):
try:
with VideoFileClip(file_path) as video:
return video.duration
except:return 0
obj_storage = Storage("345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e", "storysave")
obj_storage.PutFile(local_path, server_path)
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
except Exception as e:
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def login():
@ -67,33 +70,6 @@ def parse_media_data(media_item):
return mediaInfo
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers)
response.raise_for_status()
directory = os.path.dirname(filePath)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filePath, "wb") as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {filePath}")
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
def upload_to_storage(local_path, server_path):
try:
obj_storage = Storage("345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e", "storysave")
obj_storage.PutFile(local_path, server_path)
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
except Exception as e:
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def add_media_to_db(mediaInfo):
media_id = mediaInfo["media_id"]
user_id = mediaInfo["user_id"]
@ -124,30 +100,22 @@ def add_media_to_db(mediaInfo):
print(f"Failed to add media for {username} to the database. Error: {e}")
def insert_highlight_items(media_ids, highlight_id, title, user_id):
try:
db, cursor = config.gen_connection()
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
cursor.executemany(query, values)
db.commit()
if cursor.rowcount > 0:
print(f"Added {cursor.rowcount} highlight items to the database.")
except Exception as e:
print(f"Failed to add highlight items to the database. Error: {e}")
def get_profile_picture(client, user_id, username):
mediaInfo = {}
mediaInfo['mediaDir'] = 'profile'
mediaInfo['username'] = username
mediaInfo['user_id'] = user_id
mediaInfo['media_id'] = None
mediaInfo['media_type'] = 'image'
mediaInfo['post_type'] = 'profile'
mediaInfo['media_url'] = client.user_info(user_id).profile_pic_url_hd
mediaInfo['duration'] = 0
mediaInfo['filename'] = f"{uuid4()}.jpg"
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
return mediaInfo
def getAllStories(client, user_id, firstImport=False):
def get_all_stories(client, user_id, firstImport=False):
stories = client.user_stories(user_id)
highlights = client.user_highlights(user_id)
@ -163,7 +131,7 @@ def getAllStories(client, user_id, firstImport=False):
return stories
def getAllPosts(client, user_id):
def get_all_posts(client, user_id):
posts = client.user_medias(user_id, 36)
medias = []
@ -187,25 +155,13 @@ if __name__ == "__main__":
db, cursor = config.gen_connection()
cursor.execute("SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY id DESC;")
cursor.execute("SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY favorite DESC;")
following = cursor.fetchall()
new_following = []
for user in following:
username, user_id, favorite = user
if bool(favorite):
new_following.insert(0, user)
else:
new_following.append(user)
following = new_following
cursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL;")
existing_files = [media[0] for media in cursor.fetchall()]
continueFromLast = input("Continue from the last user? (y/N): ").lower() == "y"
if continueFromLast:
cursor.execute("SELECT username FROM media ORDER BY id DESC LIMIT 1;")
lastUser = cursor.fetchone()
@ -218,76 +174,56 @@ if __name__ == "__main__":
for user in following:
while True:
try:
firstImport = False
username, user_id, isFavorite = user
if not user_id:
firstImport = True
user_id = client.user_id_from_username(username)
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
db.commit()
print(f"Updated user ID for {username} to {user_id}")
#################### profile picture ####################
#profilePath = os.path.join('media', 'profile', username, 'profile.jpg')
#profileURL = client.user_info(user_id).profile_pic_url_hd
#download_file(profileURL, profilePath)
#fileHash = file_hash(profilePath)
#serverPath = os.path.join(os.path.dirname(profilePath), f"{fileHash}.jpg")
#upload_to_storage(profilePath, serverPath)
#mediaInfo = {
# 'username': username,
# 'user_id': user_id,
# 'media_id': None,
# 'media_type': 'image',
# 'post_type': 'profile',
# 'media_url': f"https://storysave.b-cdn.net/{serverPath}",
# 'duration': 0,
# 'hash': fileHash
#}
#add_media_to_db(mediaInfo)
#################### profile picture ####################
allStories = getAllStories(client, user_id, firstImport)
allPosts = getAllPosts(client, user_id)
medias = allStories + allPosts
for media in medias:
mediaInfo = parse_media_data(media)
mediaType = "stories" if mediaInfo["post_type"] == "story" else "posts"
filePath = os.path.join('media', mediaType, username, mediaInfo['filename'])
mediaInfo["hash"] = file_hash(filePath)
download_file(mediaInfo['media_url'], filePath)
if mediaInfo["media_type"] == "image":
with Image.open(filePath) as img:
mediaInfo["width"], mediaInfo["height"] = img.size
else:
mediaInfo["width"], mediaInfo["height"] = get_video_dimensions(filePath)
mediaInfo["duration"] = get_video_duration(filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo["media_id"])
print("=====================================")
break
except Exception as e:
if "login_required" in str(e):
print("Please log in to your account again.")
os.remove("session_data.json")
client = login()
try:
firstImport = False
username, user_id, isFavorite = user
if not user_id:
firstImport = True
user_id = client.user_id_from_username(username)
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
db.commit()
print(f"Updated user ID for {username} to {user_id}")
profile = get_profile_picture(client, user_id, username)
allStories = get_all_stories(client, user_id, firstImport)
allPosts = get_all_posts(client, user_id)
medias = allStories + allPosts
for mediaInfo in medias:
filePath = os.path.join('media', mediaInfo['mediaDir'], username, mediaInfo['filename'])
funcs.download_file(mediaInfo['media_url'], filePath)
mediaInfo["hash"] = funcs.calculate_file_hash(filePath)
if mediaInfo["media_type"] == "image":
with Image.open(filePath) as img:
mediaInfo["width"], mediaInfo["height"] = img.size
else:
print("An unexpected error occurred:", e)
break
mediaInfo["width"], mediaInfo["height"] = funcs.get_video_dimensions(filePath)
mediaInfo["duration"] = funcs.get_video_duration(filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo["media_id"])
except Exception as e:
if "login_required" in str(e):
print("Please log in to your account again.")
os.remove("session_data.json")
client = login()
else:
print("An unexpected error occurred:", e)
#https://www.instagram.com/anya_shtril/
#https://www.instagram.com/anyarodionov/
#https://www.instagram.com/neomi_hanukayev/
#https://www.instagram.com/osher_yakir/
#https://www.instagram.com/m1ry2m_/
#https://www.instagram.com/4m1t_f1shpot/
#https://www.instagram.com/yarden.bengigi/
#https://www.instagram.com/a.roniiiiii/
#https://www.instagram.com/nonsalemwitch/
Loading…
Cancel
Save