new altpins update

main
oscar 11 months ago
parent 1d8bb3c85f
commit e9f3404d1c

BIN
.DS_Store vendored

Binary file not shown.

48
.gitignore vendored

@ -1,25 +1,25 @@
# Content
storysaver/
facebook/
media/
cache/
temp/
*.pyc
/old_altpins_cache
/__pycache__
/STORAGE_IMPORTED
/STREAM_VIDEOS
/STREAM_VIDEOS_IMPORTED
/STORAGE
/other
/Sort
# python files
*.pyc
/images
/sortlater
/videos
/duplicates
/ready_to_upload
/archive
/images
/images
/sorted
__pycache__
# Content
storysaver
facebook
media
cache
temp
ready_to_upload
check_if_exists
data
media
old_snapchats
OnlyFans
process
processed_tiktoks
screenshot_stories
snapchat
snapchat_new
sorted
uploadlater

@ -1,24 +0,0 @@
import config
altpins_db, altpins_cursor = config.altpins_gen_connection()
db, cursor = config.gen_connection()
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
common_items = set(altpins_results.keys()) & set(media_results.keys())
for title, hash_value in common_items:
altpins_id, altpins_url = altpins_results[(title, hash_value)]
media_id, media_url = media_results[(title, hash_value)]
print(f"Found a match for hash {hash_value} with title {title}")
print(f"Altpins URL: {altpins_url}")
print(f"Media URL: {media_url}")
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
altpins_db.commit()
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")

@ -1,17 +0,0 @@
you have not failed enough
you care what other people think
you think youre smarter than everyone
you lack curiousity
you dont ask enough qestions
you cant handle the truth
you dont see opportunities
resell to the people you already sold
staff either save you time or make you money
sell on people's weaknesses, insecurities and addictions
make people think they NEED your product
any business can be started with no money
business is money IN and not money OUT
take money, not make money
use whatever you've got
act with speed

99
_todo

@ -1,99 +0,0 @@
fix recorder
recover missing streams
re-encode all videos to 10 seconds
scan last dood videos with missing download_link
delete all "deleted" self-hosted videosdump all useless self-hosted videos to mix/dood
fix camsrip crawler
move camsrip to new server
finish converting download_link faster
check failed mixdrop uploads
add cache for .ts files bunny/nvme
manage all domains
pay for onlycats domain
onlyfans crawler
telegram crawler
optimize instagram crawler
do ethernet cables
get access to xn
paint light switches/phone case/pc cases
microscope shorts
fix / sell scooter
paperless ngx
do hand scan
go to psychiatrist
do general checkup on body
fix and brush teeth
SFP and NTP
phising ig
xss tate shop
finish and improve opsec
delete internet t
clean cry
warm up pay
install wasabi
install / try gaming linux
finish atrazat on
set up nas
dump last stories
photoshop originals
finish ab recoverer/cleaner
fix controller
fix hdd 100% load on video server
replace exoclick
fake comments bot
advanced tags/streamer data bot
self host all thumbs with bunny
reupload all dmca'd videos with new id's
generate shorts
use user's tokens to record private shows
create alert system
set up streaming server
minimize amount of scripts i need
normalize database
load balancers for web server
set up recu downloader
handle premium expired / purchases and upgrades
create bunny-like database and api for videos
save file sizes for videos
add payment options like paypal, usdt and more
re-generate thumbs for all videos self-hosted
download all mixdrop/dood/xpo videos
add streamate and cherrytv to recorder and website
delete stripchat dupes
delete "fav" dupes
blacklist ruta and other dmca agencies's crawlers
send emails to potential premiums
fix streamers db having 2 queries with and wuthout gender
create storage manager for recorder
visualize nginx logs to track dmca bots
append all cutoff streams
add ssh keys
frontend:
add forums
add width sections for video player
coins/credit system (basically affiliate)
enable user uploaded content
performer accounts
advanced search system
affiliate system - optimize and create a panel where i can easily manage all
sort by dmca and most popular on /profile
change comments, follow and save to js
add payment options
optimize history/following
create contests and affiliates for premium
"copy" saved videos
keep views uncached on main page
add heatmap for player
fix missing animated thumbs in saved page
fix duplicates in saved videos page
add ip logging for security
require phone numbers for logging in?
add recu affiliate?
fix history dupes
try node.js to get the mp4 url from mixdrop
add profile pictures in search
add collections
mark premium videos
add credit card payment with skrill or others

@ -1,62 +0,0 @@
import os
import tarfile
from datetime import datetime
import sys # Import sys for command line arguments
from BunnyCDN.Storage import Storage
def is_hidden(path):
"""
Check if the given path is a hidden folder or file.
"""
return path.startswith('.') or '/.' in path
def should_exclude(path, excluded_items):
"""
Check if the given path should be excluded.
"""
if is_hidden(path):
return True
for item in excluded_items:
if path.startswith(item):
return True
return False
def backup(folder_path, excluded_folders=[], excluded_files=[]):
"""
Create a compressed backup of the specified folder, excluding specified items and hidden folders.
"""
timestamp = int(datetime.timestamp(datetime.now()))
backup_file = os.path.join(folder_path, f'backup-{timestamp}.tar.gz')
with tarfile.open(backup_file, "w:gz") as tar:
for root, dirs, file_names in os.walk(folder_path):
if should_exclude(root, excluded_folders):
continue
for file_name in file_names:
file_path = os.path.join(root, file_name)
if should_exclude(file_path, excluded_files):
continue
print("Adding %s" % file_path)
tar.add(file_path, arcname=os.path.relpath(file_path, start=folder_path))
return backup_file
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script.py <folder_path>")
sys.exit(1)
folder_path = sys.argv[1]
if not os.path.isdir(folder_path):
print(f"Error: The folder '{folder_path}' does not exist.")
sys.exit(1)
backup_file = backup(folder_path)
obj_storage = Storage('99f4c72b-2674-4e6a-a1825c269cc0-b959-48a1', 'ab-backups')
obj_storage.PutFile(backup_file, f'backups/{os.path.basename(backup_file)}')
print("Backup and upload successful.")

@ -0,0 +1,79 @@
import os, requests, config
from snapchat import get_stories, get_highlight_stories, get_all_users_data
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
import re
def extract_file_type(url):
# Use a regular expression to extract the file type number
match = re.search(r"/d/[^.]+\.([0-9]+)\.", url)
if match:
return match.group(1) # Return the number as a string
return None
def map_file_type_to_extension(urls):
file_type_to_extension = {}
seen_file_types = set()
for url in urls:
# Extract the file type number
file_type_number = extract_file_type(url)
if not file_type_number:
continue
# Skip if we've already checked this file type
if file_type_number in seen_file_types:
continue
# Use the get_file_extension function to determine the extension
file_extension = get_file_extension(url)
if file_extension:
file_type_to_extension[file_type_number] = file_extension
seen_file_types.add(file_type_number)
return file_type_to_extension
def main():
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
snapchat_users_data = get_all_users_data(usernames)
all_stories = [get_stories(data) + get_highlight_stories(data) for data in snapchat_users_data.values()]
processed_stories = []
for stories in all_stories:
processed_stories.extend(stories)
all_urls = [story['url'] for story in processed_stories]
# Map file type numbers to extensions
file_type_to_extension = map_file_type_to_extension(all_urls)
# Print the mapping
print("File Type to Extension Mapping:")
for file_type, extension in file_type_to_extension.items():
print(f"File Type {file_type}: {extension}")
if __name__ == '__main__':
print('Starting snappy...')
db, cursor = config.gen_connection()
obj_storage = config.get_storage()
main()
print("Processing completed.")

@ -1,10 +1,8 @@
import os
import json
import config
import imagehash
from PIL import Image
from funcs import get_files, calculate_file_hash
from funcs import get_files
def generate_image_phash(filepath, hash_size=8):
try:

@ -1,99 +0,0 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for filename in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, filename)):
continue
username = filename.split("'")[0]
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
upload_file(username=username, media_type=mediatype, filepath=filepath, post_type=post_type)
def upload_file(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
if "FB_IMG" in filename: media_id = filename.split("_")[2].split(".")[0]
else: media_id = uuid.uuid4().hex
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
if post_type == 'stories':
post_type = 'story'
else:
post_type = 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, platform, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, 'facebook', file_hash, filename, duration)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
except Exception as e:
print(f"Database error: {e}")
return False
try:
if newCursor.rowcount > 0:
os.remove(filepath)
except Exception as e:
print(f"Failed to remove local file {filepath}: {e}")
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='facebook' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('facebook/')
print("Processing completed.")

@ -1,19 +0,0 @@
from BunnyCDN.Storage import Storage
import dump_instagram as storysaver
import time, config
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
while True:
print("Processing...")
storysaver.dump_instagram('storysaver/')
print("Processing completed.")
time.sleep(15)

@ -1,133 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, media_type)
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
thumbnail_url = None
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
except:
print('Error generating thumbnail. Skipping...')
return False
server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
if user_id and 'highlight' in user_id:
highlight_id = user_id.replace('highlight', '')
user_id = None
try:
newCursor.execute("SELECT user_id FROM media WHERE username=%s", (username,))
user_id = newCursor.fetchall()[0][0]
except:
print(f'User {username} not found in database. Skipping...')
user_id = None
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
obj_storage.PutFile(filepath, server_path)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def getMedias(folder_path):
medias = []
for filename in os.listdir(folder_path):
parts = filename.split('~')
if len(parts) < 4:
continue
username = parts[0]
timestamp = parts[1]
media_id = parts[2]
user_id = parts[3].split('_')[-1].split('.')[0]
filepath = os.path.join(folder_path, filename)
if not media_id:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
try:media_id = int(media_id)
except:
print(f'Invalid media_id for file {filename}. Skipping...')
continue
data = {
'username': username,
'timestamp': timestamp,
'media_id': media_id,
'user_id': user_id,
'filepath': filepath
}
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = getMedias(folder_path)
for media in medias:
if media['media_id'] in existing_files:
print('Duplicate file detected. Removing...')
os.remove(media['filepath'])
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE platform='instagram' AND media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('storysaver/')
print("Processing completed.")

@ -1,82 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs
from PIL import Image
def dump_instagram(folder_path):
for filename in os.listdir(folder_path):
parts = filename.split('_')
try:
username = '_'.join(parts[:-2]) # Join all except last two
timestamp = int(parts[-2]) # Second last is timestamp
user_id = int(parts[-1].split('.')[0]) # Last part before extension is user_id
except ValueError as e:
print(f"Invalid filename: {filename}. Error: {e}")
continue
filepath = os.path.join(folder_path, filename)
mediatype = funcs.get_media_type(filename)
post_type = funcs.determine_post_type(filepath, mediatype)
UploadMedia(username=username, media_type=mediatype, filepath=filepath, post_type=post_type, timestamp=timestamp, user_id=user_id)
def UploadMedia(filepath, username, media_type='image', post_type='story', timestamp=None, user_id=None):
if 'tero' in username:
pass
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = f'media/{dirtype}/{username}/{file_hash}{file_extension}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
obj_storage.PutFile(filepath, server_path)
if media_type == 'image':
with Image.open(filepath) as img:
width, height = img.size
else:
width, height = funcs.get_video_dimensions(filepath)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_instagram('storysaver/missing/')
print("Processing completed.")

@ -1,67 +0,0 @@
from BunnyCDN.Storage import Storage
import os, uuid, config, funcs
from datetime import datetime
from PIL import Image
def dump_facebook(folder_path):
for folder in os.listdir(folder_path):
if os.path.isdir(os.path.join(folder_path, folder)):
username = folder
for filename in os.listdir(os.path.join(folder_path, folder)):
filepath = os.path.join(folder_path, folder, filename)
upload_file(username=username, filepath=filepath)
def upload_file(filepath, username):
filename = os.path.basename(filepath)
media_id = filename.split('.')[0]
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
dirtype = funcs.determine_post_type(filepath, media_type)
server_path = os.path.join('media', dirtype, username, f'{media_id}{file_extension}')
obj_storage.PutFile(filepath, server_path)
file_url = f"https://storysave.b-cdn.net/{server_path}"
if file_hash in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return False
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, platform, hash, filename, duration, media_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'tiktok', file_hash, filename, duration, media_id)
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
if newCursor.rowcount > 0:
os.remove(filepath)
return True
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT hash FROM media WHERE platform='tiktok' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
dump_facebook('tiktok/')
print("Processing completed.")

@ -1,13 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs
from PIL import Image
if __name__ == '__main__':
print('Starting processing...')
files = os.listdir('')
for file in files:
filePath = os.path.join('storysaver/missing_data/', file)
file_hash = funcs.calculate_file_hash(filePath)

@ -1,22 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs
from PIL import Image
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
files = os.listdir('storysaver/missing_data/')
for file in files:
filePath = os.path.join('storysaver/missing_data/', file)
file_hash = funcs.calculate_file_hash(filePath)
if file_hash in existing_files:
print(f'Duplicate file detected. Removing {filePath}...')
os.rename(filePath, f'storysaver/dupes/{file}')

@ -1,12 +0,0 @@
import os
def remove_empty_folders(folder):
for root, dirs, files in os.walk(folder):
for dir in dirs:
dirpath = os.path.join(root, dir)
if not os.listdir(dirpath):
print(f"Removing empty folder {dirpath}")
os.rmdir(dirpath)
folder = 'media'
remove_empty_folders(folder)

@ -1,10 +0,0 @@
ChallengeResolve: Unknown step_name "submit_phone" for "olivercury" in challenge resolver: {'step_name': 'submit_phone', 'step_data': {'phone_number': '+972522618221', 'show_whatsapp_otp_choice': True, 'whatsapp': False}, 'flow_render_type': 3, 'bloks_action': 'com.instagram.challenge.navigation.take_challenge', 'cni': 18436897147040850, 'challenge_context': 'Af6pVKkiomiOMxWvLzouGukazqMMhFbzNERezSMhBU-dHrO_DNGfTJpUPp8-di6HHm8WfAfL6_PQaLkV6sOkb6CC68ugfQtLMd3OgMVasZkOI5O6YdnoqMtBzNBGd944VtUNEEkl9bNVM5yQbfMskCuKTUf7AQOIYD2zEuvd8wC-AUBPziP105a1xq3GbaSeyJ9QnEJHHWgpFenBURUNbdLvQ9lzs5j62zCxo_0fe4Fw', 'challenge_type_enum_str': 'SMS', 'status': 'ok'}
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://www.instagram.com/graphql/query/?variables=%7B%22user_id%22%3A%226208321762%22%2C%22include_reel%22%3Atrue%7D&query_hash=ad99dd9d3646cc3c0dda65debcd266a7
During handling of the above exception, another exception occurred:
instagrapi.exceptions.ClientUnauthorizedError: 401 Client Error: Unauthorized for url: https://www.instagram.com/graphql/query/?variables=%7B%22user_id%22%3A%226208321762%22%2C%22include_reel%22%3Atrue%7D&query_hash=ad99dd9d3646cc3c0dda65debcd266a7
During handling of the above exception, another exception occurred:
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://i.instagram.com/api/v1/users/6208321762/info/

@ -1,53 +0,0 @@
import os
import shutil
import hashlib
def clean_empty_folders(directory):
for foldername, subfolders, filenames in os.walk(directory, topdown=False):
for subfolder in subfolders:
folder_path = os.path.join(foldername, subfolder)
if not os.listdir(folder_path):
os.rmdir(folder_path)
print(f"Removed empty folder: {folder_path}")
def calculate_file_hash(file_path, hash_func='sha256'):
h = hashlib.new(hash_func)
with open(file_path, 'rb') as file:
chunk = file.read(8192)
while chunk:
h.update(chunk)
chunk = file.read(8192)
return h.hexdigest()
def get_media_type(filename):
extensions = {
'.jpg': 'image', '.jpeg': 'image', '.webp': 'image', '.png': 'image', '.gif': 'image',
'.mp4': 'video', '.mov': 'video'
}
for ext, media_type in extensions.items():
if filename.lower().endswith(ext):
return media_type
return None
def move_files(source_root, destination_root):
for root, dirs, files in os.walk(source_root):
for file in files:
if "~" in file or 'FB_IMG' in file or 's instagram' in file:
username = file.split("'")[0]
source_path = os.path.join(root, file)
rel_path = os.path.relpath(root, source_root)
destination_path = os.path.join(destination_root, username, rel_path)
if not os.path.exists(destination_path):
os.makedirs(destination_path)
shutil.move(source_path, os.path.join(destination_path, file))
print(f"Moved {file} to {destination_path}")
if __name__ == '__main__':
print('Starting processing...')
source_directory = 'StorySave_Sort/Sort/StorySave'
destination_directory = 'StorySave_Sort/Final/Stories'
move_files(source_directory, destination_directory)
clean_empty_folders(source_directory)
print("Processing completed.")

@ -1,85 +0,0 @@
import os
import config
import cv2
from funcs import get_files # Assuming this is defined elsewhere
import imagehash
from PIL import Image
def generate_thumbnail_phash(filepath, hash_size=8): # Set hash_size to 8
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cap.release()
if not ret:
print(f"Error reading frame from {filepath}")
return None
# Resize frame to a standard size
standard_size = (320, 240)
resized_frame = cv2.resize(frame, standard_size, interpolation=cv2.INTER_AREA)
# Convert OpenCV image (BGR) to PIL Image (RGB)
image_rgb = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image_rgb)
# Compute pHash
phash = imagehash.phash(pil_image, hash_size=hash_size)
return phash
def are_phashes_duplicates(phash1, phash2, threshold=5):
# Compute Hamming distance between the pHashes
try:
distance = phash1 - phash2
except TypeError as e:
print(f"Error comparing pHashes: {e}")
return False
return distance <= threshold
def get_media_by_phash(phash, username, existing_medias, threshold=5):
for media in existing_medias:
existing_phash_str = media[1]
existing_username = media[2]
if existing_username != username:
continue
# Convert stored phash string to ImageHash object
existing_phash = imagehash.hex_to_hash(existing_phash_str)
if are_phashes_duplicates(phash, existing_phash, threshold=threshold):
return media
return None
# Database connection
db, cursor = config.gen_connection()
# Fetch existing videos with pHashes
cursor.execute("SELECT id, phash, username FROM media WHERE media_type = %s AND phash IS NOT NULL", ['video'])
existing_medias = cursor.fetchall()
users = os.listdir('videos')
for username in users:
user_videos_path = os.path.join('videos', username)
if not os.path.isdir(user_videos_path):
continue
videos = os.listdir(user_videos_path)
for video in videos:
print(f'Processing {video}...')
filepath = os.path.join(user_videos_path, video)
phash = generate_thumbnail_phash(filepath, hash_size=8) # Use hash_size=8
if phash is None:
continue
phash_str = str(phash)
duplicate_media = get_media_by_phash(phash, username, existing_medias, threshold=5)
if duplicate_media:
print(f'Duplicate url found: https://altpins.com/pin/{duplicate_media[0]}')
print(f'Duplicate video path: {filepath}')
newpath = filepath.replace('videos', 'duplicates')
os.makedirs(os.path.dirname(newpath), exist_ok=True)
os.rename(filepath, newpath)
print(f'Moved {video} to duplicates/')

@ -1,2 +0,0 @@
https://www.instagram.com/anya_shtril/
https://www.instagram.com/anyarodionov/

@ -1,40 +0,0 @@
import config, os, json
from PIL import Image
import imagehash
def find_file(filename, directory):
filename = filename.lower().split('.')[0]
for root, dirs, files in os.walk(directory):
for file in files:
if filename in file:
return os.path.join(root, file)
return None
def generate_phash(image_path):
image = Image.open(image_path)
return str(imagehash.phash(image))
count = 0
cacheDir = 'sorted'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
filepath = item['filepath']
if os.path.exists(filepath):
continue
newfilepath = find_file(os.path.basename(filepath), cacheDir)
if newfilepath:
print(f"Found file {newfilepath} for {filepath}")
item['filepath'] = newfilepath
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -1,94 +0,0 @@
from BunnyCDN.Storage import Storage
from moviepy.editor import VideoFileClip
import config
import hashlib
import requests
import os
def file_hash_from_url(url, hash_algo='sha256'):
h = hashlib.new(hash_algo)
response = requests.get(url, stream=True)
if response.status_code == 200:
for chunk in response.iter_content(8192):
h.update(chunk)
return h.hexdigest()
else:
raise Exception(f"Failed to download file: Status code {response.status_code}")
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
try:
with VideoFileClip(file_path) as video:
return video.duration
except:
return 0
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
# Create a hash object
h = hashlib.new(hash_algo)
# Open the file in binary mode and read in chunks
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
# Return the hexadecimal digest of the hash
return h.hexdigest()
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE duration = 0 AND media_type = 'video' AND status != 'deleted';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
duration = get_video_duration(localFilePath)
if duration == 0:
print(f"Failed to get duration for {localFilePath}")
continue
if duration < 1:
duration = 1
cursor.execute("UPDATE media SET duration = %s WHERE id = %s;", (duration, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {duration}, {cursor.rowcount}")

@ -1,47 +0,0 @@
from BunnyCDN.Storage import Storage
import config, os, funcs
from PIL import Image
# the hash of the images are different due to optimizer
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
for result in results:
count += 1
videoID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
else:
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
mediaType = funcs.get_media_type(localFilePath)
if mediaType == 'image':
with Image.open(localFilePath) as img:
width, height = img.size
elif mediaType == 'video':
width, height = funcs.get_video_dimensions(localFilePath)
cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
db.commit()
print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")

@ -1,63 +0,0 @@
from BunnyCDN.Storage import Storage
import config, os, cv2
from concurrent.futures import ThreadPoolExecutor
# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
cacheDir = 'cache'
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias():
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
extension = mediaURL.split('.')[-1]
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
filePath = DownloadFile(serverPath, cacheDir)
cap = cv2.VideoCapture(localFilePath)
ret, frame = cap.read()
cv2.imwrite('thumbnail.jpg', frame)
cap.release()
thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
db.commit()
print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")

@ -129,8 +129,10 @@ def download_file(url, filePath):
out_file.write(chunk)
print(f"Downloaded {filePath}")
return True
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
return False
def determine_post_type(filepath):
width, height = get_media_dimensions(filepath)
@ -164,7 +166,6 @@ def get_video_duration(file_path):
video_types = {".mp4", ".mov", ".mkv"}
extension = os.path.splitext(file_path.lower())[1]
if extension not in video_types:
print(f"File is not a video: {file_path}")
return 0
try:

@ -1,19 +0,0 @@
skit idea for movie avigail and the hackers at 05:58
import subprocess
import tkinter as tk
window = tk.Tk()
window.title("ENTER PIN BOOM BOOM HURUMPH HACKER OOOOHHHH")
label = tk.Label(window, text="Enter PIN to hack:")
label.pack()
pin_entry = tk.Entry(window, show=".")
pin_entry.pack()
pin_entry.bind("<Return>", lambda event: subprocess.run(["python", "hack.py", pin_entry.get()]))
while True:
window.update()

BIN
old/.DS_Store vendored

Binary file not shown.

@ -1,23 +0,0 @@
import json
with open('bunny_data/missing_videos.json', 'r') as f:
missing_videos = json.load(f)
with open('bunny_data/allVideos.json', 'r') as f:
all_videos = json.load(f)
all_videos_guids = {video['guid'] for video in all_videos}
for video in missing_videos:
if video['guid'] in all_videos_guids:
video['imported'] = True
combined_data = {
"missing_videos": missing_videos,
"all_videos": all_videos
}
with open('bunny_data/combined_videos.json', 'w') as f:
json.dump(combined_data, f, indent=4)
print("Combined data has been written to bunny_data/combined_videos.json")

@ -1,16 +0,0 @@
import os, json
pins = open('db_pins.json', 'r')
pins = json.load(pins)
importedPins = open('db_pins_imported.json', 'r')
importedPins = json.load(importedPins)
allPins = pins + importedPins
print(len(allPins))
finalPins = open('allPins.json', 'r')
finalPins = json.load(finalPins)
print(len(finalPins))

@ -1,19 +0,0 @@
import os, config, funcs
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
newCursor.execute("SELECT hash FROM media WHERE platform='instagram' AND hash IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
files = os.listdir('storysaver/missingdata/')
for file in files:
filePath = os.path.join('storysaver/missingdata/', file)
file_hash = funcs.calculate_file_hash(filePath)
if file_hash in existing_files:
print(f'Duplicate file detected. Removing {filePath}...')
os.rename(filePath, f'storysaver/dupes/{file}')

@ -1,38 +0,0 @@
import os, json
def getMedia(filename, list):
for item in list:
if filename.split('.')[0] in item['filepath']:
return item
return None
data = json.loads(open('oldpins.json').read())
files = os.listdir('STORAGE')
count = 0
for file in files:
filepath = f'STORAGE/{file}'
if os.path.isdir(filepath):
continue
media = getMedia(file, data)
if not media:
continue
username = media['title']
filetype = media['type']
filetype = 'jpg' if filetype == 'image' else 'mp4'
filename = media['filepath'].split('/')[-1] + '.' + filetype
output = os.path.join('STORAGE', username, filename)
os.makedirs(os.path.dirname(output), exist_ok=True)
if os.path.exists(output):
os.remove(output)
output = os.path.join('STORAGE', username, file)
os.rename(filepath, output)
count += 1
print(f'File: {file}')
print(f'Total: {count}')

@ -1,45 +0,0 @@
import funcs, json, os, config
db, newCursor = config.gen_connection()
newCursor.execute("SELECT hash FROM media")
hashes = [hash[0] for hash in newCursor.fetchall()]
file = 'bunnyVideos.json'
data = json.loads(open(file).read())
for media in data:
if media['imported'] == True:
if os.path.exists(media['filepath']):
print(f'File {media["filepath"]} does not exist. Skipping...')
continue
countImported = 0
countSkipped = 0
for media in data:
filepath = os.path.join('STREAM_VIDEOS_IMPORTED', media['guid'] + '.mp4')
if media['imported'] == True:
countImported += 1
print('File already imported. Skipping...')
continue
countSkipped += 1
if not os.path.exists(filepath):
print(f'File {filepath} does not exist. Skipping...')
continue
hash = funcs.calculate_file_hash(filepath)
if '67caa15e-390c-4223-b7b9-4d7842f3b443' in filepath:
print(f'File {filepath} does not exist. Skipping...')
continue
if hash in hashes:
print('Duplicate file detected. Removing...')
print(f'Imported: {countImported}')
print(f'Skipped: {countSkipped}')

@ -1,17 +0,0 @@
from funcs import get_files, generate_phash
import os, config
db, cursor = config.gen_connection()
cursor.execute("SELECT phash FROM media WHERE phash IS NOT NULL;")
phashes = [x[0] for x in cursor.fetchall()]
files = get_files('images')
for item in files:
phash = generate_phash(item)
if phash in phashes:
print(item)
newpath = item.replace('images', 'duplicates')
newdir = os.path.dirname(newpath)
os.makedirs(newdir, exist_ok=True)
os.rename(item, newpath)

@ -1,56 +0,0 @@
from BunnyCDN.Storage import Storage
import os, config, requests
from moviepy.editor import VideoFileClip
def get_media_type(filename):
image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
video_extensions = {".mp4", ".mov"}
extension = os.path.splitext(filename.lower())[1]
if extension in image_extensions:
return 'image'
elif extension in video_extensions:
return 'video'
else:
return 'unknown'
def determine_post_type(media_type):
# Assuming the post type is directly based on media type.
return media_type
def get_video_dimensions(filepath):
with VideoFileClip(filepath) as clip:
width, height = clip.size
return width, height
def download_file(url):
local_filename = url.split('/')[-1]
# Note: Stream=True to avoid loading the whole file into memory
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
if __name__ == '__main__':
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
posts = open('fucked', 'r')
for item in posts:
username, url = item.strip().split('~')
media_id = url.split('/')[-1].split('.')[0]
media_type = get_media_type(url)
query = "INSERT IGNORE INTO media (username, media_type, platform, media_url) VALUES (%s, %s, %s, %s)"
values = (username, media_type, 'facebook', url)
try:
newCursor.execute(query, values)
newDB.commit()
print(f'[{newCursor.rowcount}] records updated.{url}')
except Exception as e:
print(f"Database error: {e}")
posts.close()

@ -1,41 +0,0 @@
import config, os
from PIL import Image
import imagehash
def generate_phash(image_path):
image = Image.open(image_path)
return str(imagehash.phash(image))
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
os.makedirs(cacheDir, exist_ok=True)
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
itemID = result[0]
mediaID = result[1]
mediaURL = result[2]
serverPath = mediaURL.replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if not os.path.exists(localFilePath):
print(f"File {localFilePath} does not exist, skipping.")
continue
try:
phash = generate_phash(localFilePath)
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", (phash, itemID))
db.commit()
print(f"Processed {count}/{len(results)}: {mediaID} with pHash {phash}")
except Exception as e:
print(f"Error processing {mediaID}: {e}")

@ -1,35 +0,0 @@
from concurrent.futures import ThreadPoolExecutor
from BunnyCDN.Storage import Storage
import config, os
def DownloadFile(serverPath, cacheDir):
localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
if os.path.exists(localFilePath):
print(f"File already exists: {localFilePath}")
return localFilePath
obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
print(f"Downloaded {serverPath} to {localFilePath}")
return localFilePath
def ImportMedias(results):
with ThreadPoolExecutor(max_workers=10) as executor:
for video in results:
serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
executor.submit(DownloadFile, serverPath, cacheDir)
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'image' AND phash IS NULL;")
results = cursor.fetchall()
count = 0
cacheDir = 'cache'
print(f"Found {len(results)} files to process.")
ImportMedias(results)

@ -1,47 +0,0 @@
from BunnyCDN.Storage import Storage
import config
import hashlib
import os
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
h = hashlib.new(hash_algo)
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_id, media_url FROM media WHERE hash IS NULL;")
results = cursor.fetchall()
count = 0
print(f"Found {len(results)} files to process.")
for result in results:
count += 1
serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
localFilePath = os.path.join(os.getcwd(), 'temp', os.path.basename(serverPath))
if not os.path.exists(localFilePath):
obj_storage.DownloadFile(storage_path=serverPath, download_path=os.path.join(os.getcwd(), 'temp'))
filehash = file_hash(localFilePath)
cursor.execute("UPDATE media SET hash = %s WHERE id = %s;", (filehash, result[0]))
db.commit()
print(f"[{count}/{len(results)}] {result[1]}: {filehash}, {cursor.rowcount}")

@ -1,24 +0,0 @@
import os, json
from funcs import generate_phash
count = 0
cacheDir = 'cache'
dataPath = 'pins.json'
os.makedirs(cacheDir, exist_ok=True)
medias = json.load(open(dataPath))
for item in medias:
count += 1
if item['type'] == 'image':
filepath = item['filepath']
if not os.path.exists(filepath):
print(f"File {filepath} does not exist, skipping.")
continue
phash = generate_phash(filepath)
item['phash'] = phash
print(f"Processed {count}/{len(medias)}: with pHash {phash}")
with open(dataPath, 'w') as f:
json.dump(medias, f)

@ -1,33 +0,0 @@
import config
from funcs import generate_phash
count = 0
storage = config.get_storage()
db, cursor = config.gen_connection()
cursor.execute("SELECT id, media_url FROM media WHERE media_type = %s AND phash IS NULL;", ['image'])
medias = cursor.fetchall()
for item in medias:
count += 1
itemID = item[0]
media_url = item[1]
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
filepath = storage.DownloadFile(server_path, 'temp')
if not filepath:
print(f"Error downloading {server_path}")
continue
phash = generate_phash(filepath)
if not phash:
print(f"Error generating pHash for {filepath}")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
db.commit()
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")

@ -1,33 +0,0 @@
import config
from funcs import generate_phash
count = 0
storage = config.get_storage()
db, cursor = config.gen_connection()
cursor.execute("SELECT id, thumbnail FROM media WHERE media_type = %s AND phash IS NULL AND thumbnail IS NOT NULL;", ['video'])
medias = cursor.fetchall()
for item in medias:
count += 1
itemID = item[0]
media_url = item[1]
server_path = media_url.replace('https://storysave.b-cdn.net/', '').replace('\\', '/')
filepath = storage.DownloadFile(server_path, 'temp')
if not filepath:
print(f"Error downloading {server_path}")
continue
phash = generate_phash(filepath)
if not phash:
print(f"Error generating pHash for {filepath}")
continue
cursor.execute("UPDATE media SET phash = %s WHERE id = %s", [phash, itemID])
db.commit()
print(f"[{cursor.rowcount}] Processed {count}/{len(medias)}: with pHash {phash}")

@ -1,24 +0,0 @@
import config
altpins_db, altpins_cursor = config.altpins_gen_connection()
db, cursor = config.gen_connection()
altpins_cursor.execute("SELECT id, title, hash, url FROM pins WHERE hash IS NOT NULL;")
altpins_results = { (row[1], row[2]): (row[0], row[3]) for row in altpins_cursor.fetchall() }
cursor.execute("SELECT id, username, hash, media_url FROM media WHERE hash IS NOT NULL;")
media_results = { (row[1], row[2]): (row[0], row[3]) for row in cursor.fetchall() }
common_items = set(altpins_results.keys()) & set(media_results.keys())
for title, hash_value in common_items:
altpins_id, altpins_url = altpins_results[(title, hash_value)]
media_id, media_url = media_results[(title, hash_value)]
print(f"Found a match for hash {hash_value} with title {title}")
print(f"Altpins URL: {altpins_url}")
print(f"Media URL: {media_url}")
altpins_cursor.execute("DELETE FROM pins WHERE id = %s;", [altpins_id])
altpins_db.commit()
print(f"Deleted pin {altpins_id}. {altpins_cursor.rowcount} rows affected")

@ -1,27 +0,0 @@
import os, json
folderPath = 'STREAM_IMPORTED'
jsonFile = 'bunnyVideos.json'
data = json.load(open(jsonFile))
for item in data:
username = item['title']
filepath = os.path.join(folderPath, item['guid'] + '.mp4')
if username in filepath:
continue
username = item['title']
output = os.path.join(folderPath, username, os.path.basename(filepath))
os.makedirs(os.path.dirname(output), exist_ok=True)
if os.path.exists(filepath):
os.rename(filepath, output)
item['filepath'] = output
# save to fiel
with open(jsonFile, 'w') as f:
json.dump(data, f, indent=4)

@ -1,44 +0,0 @@
from moviepy.editor import VideoFileClip
import json
def is_valid_video(file_path):
try:
with VideoFileClip(file_path) as video:
return True
except Exception as e:
print(f"Invalid video {file_path}: {str(e)}")
return False
def load_hashes(file_path):
try:
with open(file_path, 'r') as file:
return json.load(file)
except FileNotFoundError:
return {}
def save_hashes(hashes, file_path):
with open(file_path, 'w') as file:
json.dump(hashes, file, indent=4)
def find_duplicates(video_hashes):
hash_map = {}
for video, v_hash in video_hashes:
if v_hash in hash_map:
hash_map[v_hash].append(video)
else:
hash_map[v_hash] = [video]
duplicates = {h: vids for h, vids in hash_map.items() if len(vids) > 1}
return duplicates
hashes = load_hashes('video_hashes.json')
for username, user_hashes in hashes.items():
print(f"Checking for duplicates in '{username}' videos:")
duplicates = find_duplicates(user_hashes)
if duplicates:
for dup_hash, dup_videos in duplicates.items():
print(f"Duplicate hash: {dup_hash}")
for vid in dup_videos:
print(f" - {vid}")
else:
print("No duplicates found.")

@ -1,48 +0,0 @@
from videohash import VideoHash
import os
# Directory containing videos grouped by username
video_directory = '/path/to/videos'
hashes = {}
for username in os.listdir(video_directory):
user_dir = os.path.join(video_directory, username)
if os.path.isdir(user_dir):
for video_file in os.listdir(user_dir):
if video_file.endswith(('.mp4', '.mkv', '.avi')): # Ensure it's a video file
video_path = os.path.join(user_dir, video_file)
try:
# Calculate the hash for each video
video_hash = VideoHash(path=video_path)
print(f"Hash for {video_file}: {video_hash.hash}")
# Store hashes in a dictionary
if username in hashes:
hashes[username].append((video_file, video_hash.hash))
else:
hashes[username] = [(video_file, video_hash.hash)]
except Exception as e:
print(f"Error processing {video_file}: {str(e)}")
def find_duplicates(hashes):
duplicate_videos = []
all_hashes = [(user, video, hsh) for user, videos in hashes.items() for video, hsh in videos]
hash_dict = {}
for user, video, hsh in all_hashes:
if hsh in hash_dict:
hash_dict[hsh].append((user, video))
else:
hash_dict[hsh] = [(user, video)]
for videos in hash_dict.values():
if len(videos) > 1:
duplicate_videos.append(videos)
return duplicate_videos
duplicates = find_duplicates(hashes)
for duplicate in duplicates:
print("Duplicate videos found:")
for video_info in duplicate:
print(f"User: {video_info[0]}, Video: {video_info[1]}")

@ -1,49 +0,0 @@
import os, json
def get_file_type(filepath):
if filepath.endswith('.jpg') or filepath.endswith('.png'):
return 'image'
elif filepath.endswith('.mp4'):
return 'video'
else:
return None
def get_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
files = get_files('STORAGE/')
os.makedirs('images', exist_ok=True)
os.makedirs('videos', exist_ok=True)
for filepath in files:
if not os.path.exists(filepath):
print(f"File {filepath} does not exist, skipping.")
continue
# Extract the username from the filepath assuming the structure STORAGE/{username}/{filename}
filepath = filepath.replace('\\', '/') # Replace backslashes with forward slashes
parts = filepath.split('/') # Split the path by the system's separator
if len(parts) < 3 or parts[0] != 'STORAGE': # Check if the structure is valid
print(f"Unexpected filepath format: {filepath}")
continue
username = parts[1] # Extract the username from the second part
fileType = get_file_type(filepath) # Determine the type of the file
if not fileType:
print(f"Unknown file type for {filepath}")
continue
if fileType == 'image':
newpath = os.path.join('images', username, os.path.basename(filepath))
elif fileType == 'video':
newpath = os.path.join('videos', username, os.path.basename(filepath))
else:
print(f"Unknown media type {fileType} for {filepath}")
continue
os.makedirs(os.path.dirname(newpath), exist_ok=True) # Create directory structure if it doesn't exist
os.rename(filepath, newpath) # Move the file to the new location

@ -1,19 +0,0 @@
import re
def process_func(input_hex):
keywords = ['set', 'b64d', 'href', 'domain', 'decode', '5', '.com/', 'document', 'prototype', '?id=', 'giabk', 'innerHeight', 'ver', 'gdd', '2000226', 'gcu', 'oSu', 'gdn', 'memory', 'instantiate', '37420168dpUfmN', 'isy', 'oCu', 'head', 'oDlu', '=([a-z.]+)&?', 'ast', 'then', '1155005PQhArT', 'from', '4896414PJJfCB', 'location', 'length', 'createElement', 'ghrde', '7127624hswjPR', 'navigator', 'ins', '2', 'buffer', '1482980WeuWEm', 'AGFzbQEAAAABHAVgAAF/YAN/f38Bf2ADf39/AX5gAX8AYAF/AX8DCQgAAQIBAAMEAAQFAXABAQEFBgEBgAKAAgYJAX8BQcCIwAILB2cHBm1lbW9yeQIAA3VybAADGV9faW5kaXJlY3RfZnVuY3Rpb25fdGFibGUBABBfX2Vycm5vX2xvY2F0aW9uAAcJc3RhY2tTYXZlAAQMc3RhY2tSZXN0b3JlAAUKc3RhY2tBbGxvYwAGCroFCCEBAX9BuAhBuAgoAgBBE2xBoRxqQYfC1y9wIgA2AgAgAAuTAQEFfxAAIAEgAGtBAWpwIABqIgQEQEEAIQBBAyEBA0AgAUEDIABBA3AiBxshARAAIgZBFHBBkAhqLQAAIQMCfyAFQQAgBxtFBEBBACAGIAFwDQEaIAZBBnBBgAhqLQAAIQMLQQELIQUgACACaiADQawILQAAazoAACABQQFrIQEgAEEBaiIAIARJDQALCyACIARqC3ECA38CfgJAIAFBAEwNAANAIARBAWohAyACIAUgACAEai0AAEEsRmoiBUYEQCABIANMDQIDQCAAIANqMAAAIgdCLFENAyAGQgp+IAd8QjB9IQYgA0EBaiIDIAFHDQALDAILIAMhBCABIANKDQALCyAGC+sCAgl/An5BuAggACABQQMQAiIMQbAIKQMAIg0gDCANVBtBqAgoAgAiA0EyaiIEIARsQegHbK2AIg0gA0EOaiIJIANBBGsgDEKAgPHtxzBUIgobrYA+AgAQABoQABogAkLo6NGDt87Oly83AABBB0EKIAxCgJaineUwVCIFG0ELQQwgBRsgAkEIahABIQMQABojAEEQayIEJAAgA0EuOgAAIARB4961AzYCDCADQQFqIQZBACEDIARBDGoiCy0AACIHBEADQCADIAZqIAc6AAAgCyADQQFqIgNqLQAAIgcNAAsLIARBEGokACADIAZqIQNBuAggDSAJrYBCAEKAgIAgQoCAgDBCgICAGCAMQoCYxq7PMVQbIAUbIAobhCAAIAFBBRACQhuGhD4CABAAGkECQQQQAEEDcCIAGyEBA0AgA0EvOgAAIAAgCEYhBCABQQUgA0EBahABIQMgCEEBaiEIIARFDQALIAMgAmsLBAAjAAsGACAAJAALEAAjACAAa0FwcSIAJAAgAAsFAEG8CAsLOwMAQYAICwaeoqassrYAQZAICxSfoKGjpKWnqKmqq62ur7Cxs7S1twBBqAgLDgoAAAA9AAAAAKzMX48B', 'src', 'match', '=(\d+)', 'userAgent', '__ab', 'oRu', '4936011fRStfE', 'type', 'gru', 'appendChild', 'oAu', '2zLdXaM', 'join', 'gfu', 'url', 'resolve', '__cngfg', 'concat', 'win', 'gfco', 'gau', 'hostname', 'time', 'script', 'gdlu', 'exports', 'sessionStorage', 'gcuk', '7461560KheCri'];
tricky_var = (int(input_hex, 16) - 0x154) % len(keywords)
changing_var = keywords[tricky_var]
return changing_var
with open("TEST.HTML", "r", encoding='utf-8') as file:
content = file.read()
pattern = r'processFunc\(0x([0-9a-fA-F]+)\)'
matches = re.findall(pattern, content)
for hex_val in set(matches):
replacement = process_func(hex_val)
content = re.sub(f'processFunc\(0x{hex_val}\)', f"'{replacement}'", content)
with open("TEST.HTML", "w", encoding='utf-8') as file:
file.write(content)

@ -0,0 +1,21 @@
requests
terminaltables
pyzmq
flask
termcolor
beautifulsoup4
websocket-client
ffmpy
m3u8
bunnycdnpython
mysql-connector-python
requests_toolbelt
opencv-python
lxml
undetected_chromedriver
python-telegram-bot
tqdm
webdriver-manager
moviepy
instagrapi
ImageHash

@ -1,42 +0,0 @@
import os
from PIL import Image
def resize_image(image_path, max_width, max_height):
try:
image = Image.open(image_path)
width, height = image.size
if width > max_width or height > max_height:
aspect_ratio = width / height
if width > max_width:
new_width = max_width
new_height = int(new_width / aspect_ratio)
else:
new_height = max_height
new_width = int(new_height * aspect_ratio)
resized_image = image.resize(new_width, new_height)
resized_image.save(image_path)
print("Image resized successfully:", image_path)
else:
print("Image dimensions are within the desired limits:", image_path)
except Exception as e:
print('failed', e)
def process_images_in_folder(folder_path, max_width, max_height):
for root, _, files in os.walk(folder_path):
for file_name in files:
if file_name.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif")):
image_path = os.path.join(root, file_name)
resize_image(image_path, max_width, max_height)
folder_path = input('Path to folder:')
max_width = 720
max_height = 1280
process_images_in_folder(folder_path, max_width, max_height)

@ -1,42 +0,0 @@
https://rule34.xxx/index.php?page=post&s=view&id=8829721
https://rule34.xxx/index.php?page=post&s=view&id=9416031
https://rule34.xxx/index.php?page=post&s=view&id=10105236
https://rule34.xxx/index.php?page=post&s=view&id=9885293
https://rule34.xxx/index.php?page=post&s=view&id=10034199
https://rule34.xxx/index.php?page=post&s=view&id=10102882
https://rule34.xxx/index.php?page=post&s=view&id=10125394
https://rule34.xxx/index.php?page=post&s=view&id=7225351
https://rule34.xxx/index.php?page=post&s=view&id=8648800
https://rule34.xxx/index.php?page=post&s=view&id=8805292
https://rule34.xxx/index.php?page=post&s=view&id=9279505
https://rule34.xxx/index.php?page=post&s=view&id=9443010
https://rule34.xxx/index.php?page=post&s=view&id=9609049
https://rule34.xxx/index.php?page=post&s=view&id=9955496
https://rule34.xxx/index.php?page=post&s=view&id=9745604
https://rule34.xxx/index.php?page=post&s=view&id=9669668
https://rule34.xxx/index.php?page=post&s=view&id=9670073
https://rule34.xxx/index.php?page=post&s=view&id=9900309
https://rule34.xxx/index.php?page=post&s=view&id=10114922
https://rule34.xxx/index.php?page=post&s=view&id=9900309
https://rule34.xxx/index.php?page=post&s=view&id=9530599
https://rule34.xxx/index.php?page=post&s=view&id=7983487
https://rule34.xxx/index.php?page=post&s=view&id=9664965
https://rule34.xxx/index.php?page=post&s=view&id=10025400
https://rule34.xxx/index.php?page=post&s=view&id=4710252
https://rule34.xxx/index.php?page=post&s=view&id=8858439
https://rule34.xxx/index.php?page=post&s=view&id=9423465
https://rule34.xxx/index.php?page=post&s=list&tags=checkpik+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=pewposterous+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=realistic+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=speedosausage
https://rule34.xxx/index.php?page=post&s=list&tags=animated+cute
https://rule34.xxx/index.php?page=post&s=list&tags=lerico213+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=ivan_e_recshun+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=chloeangelva+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=zmsfm+animated+
https://rule34.xxx/index.php?page=post&s=list&tags=d.va+animated
https://rule34.xxx/index.php?page=post&s=list&tags=youngiesed
https://rule34.xxx/index.php?page=post&s=list&tags=dzooworks+animated
https://rule34.xxx/index.php?page=post&s=list&tags=sageofosiris+animated
https://rule34.xxx/index.php?page=post&s=list&tags=shirami_%28artist%29+animated+

@ -1,70 +1,95 @@
import os, requests, json
import requests, json
from bs4 import BeautifulSoup
from funcs import download_file
from concurrent.futures import ThreadPoolExecutor, as_completed
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
def get_data(username):
url = f"https://www.snapchat.com/add/{username}"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
data = soup.find("script", id="__NEXT_DATA__")
data = json.loads(data.string)
data_script = soup.find("script", id="__NEXT_DATA__")
if not data_script:
print(f"No data found for {username}.")
return None
data = json.loads(data_script.string)
return data
def get_all_users_data(usernames):
all_data = {}
# Define a helper function for threading
def fetch_data(username):
return username, get_data(username)
# Use ThreadPoolExecutor for concurrent fetching
with ThreadPoolExecutor() as executor:
futures = {executor.submit(fetch_data, username): username for username in usernames}
for future in as_completed(futures):
username = futures[future]
try:
username, data = future.result()
all_data[username] = data
except Exception as e:
print(f"Error fetching data for {username}: {e}")
all_data[username] = None
return all_data
def parse_stories(stories):
parsed_stories = []
for story in stories:
snap_id = story['snapId']['value']
snap_url = story['snapUrls']['mediaUrl']
timestamp = story['timestampInSec']['value']
parsed_stories.append({"media_id": snap_id, "url": snap_url, "timestamp": timestamp})
for story in stories:
parsed_story = parse_story(story)
parsed_stories.append(parsed_story)
return parsed_stories
def get_stories(data):
stories = data['props']['pageProps']['story']['snapList']
stories = parse_stories(stories)
return stories
try:
stories = data['props']['pageProps']['story']['snapList']
return parse_stories(stories)
except KeyError:
return []
def get_highlights(data):
highlights = data['props']['pageProps']['curatedHighlights']
highlights = []
page_props = data.get('props', {}).get('pageProps', {})
# Possible keys that might contain highlights
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
for key in possible_highlight_keys:
highlight_data = page_props.get(key, [])
if highlight_data:
highlights.extend(highlight_data)
return highlights
def get_highlight_stories(data):
highlights = get_highlights(data)
stories = []
for highlight in highlights:
stories.extend(parse_stories(highlight['snapList']))
return stories
def main():
directory = "snapchat_stories"
usernames = ['little.warren1', 'neiima22', 'awesome.nads', 'noordabash', 'aleximarianna', ]
for username in usernames:
print(f"Getting stories for {username}...")
data = get_data(username)
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
media_id = story['media_id']
url = story['url']
timestamp = story['timestamp']
def parse_story(story):
original_snap_id = story.get('snapId', {}).get('value', '')
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
timestamp = story.get('timestampInSec', {}).get('value', '')
filename = f"{media_id}.jpg"
filepath = os.path.join(directory, filename)
return {
"original_snap_id": original_snap_id,
"snap_id": get_snap_id(snap_url),
"url": snap_url,
"timestamp": timestamp,
"platform": "snapchat",
"type": "story",
}
download_file(url, filepath)
def get_snap_id(url):
return url.split('/')[-1].split('.')[0]
print(f"Downloaded {filename} at {timestamp}")
if __name__ == "__main__":
main()
def get_highlight_stories(data):
stories = []
highlights = get_highlights(data)
for highlight in highlights:
snap_list = highlight.get('snapList', [])
for snap in snap_list:
story = parse_story(snap)
stories.append(story)
return stories

@ -1,99 +1,85 @@
import os
import requests
import json
from bs4 import BeautifulSoup
def get_data(username):
url = f"https://www.snapchat.com/add/{username}"
headers = {
"user-agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/129.0.0.0 Safari/537.36")
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
data_script = soup.find("script", id="__NEXT_DATA__")
if not data_script:
print(f"No data found for {username}.")
return None
data = json.loads(data_script.string)
return data
def parse_stories(stories):
parsed_stories = []
for story in stories:
snap_id = story.get('snapId', {}).get('value', '')
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
timestamp = story.get('timestampInSec', {}).get('value', '')
if snap_url and timestamp and snap_id:
parsed_stories.append({
"media_id": snap_id,
"url": snap_url,
"timestamp": timestamp
})
return parsed_stories
def get_stories(data):
try:
stories = data['props']['pageProps']['story']['snapList']
return parse_stories(stories)
except KeyError:
return []
def get_highlights(data):
highlights = []
page_props = data.get('props', {}).get('pageProps', {})
# Possible keys that might contain highlights
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
for key in possible_highlight_keys:
highlight_data = page_props.get(key, [])
if highlight_data:
highlights.extend(highlight_data)
return highlights
def get_highlight_stories(data):
stories = []
highlights = get_highlights(data)
for highlight in highlights:
snap_list = highlight.get('snapList', [])
for snap in snap_list:
timestamp = snap.get('timestampInSec', {}).get('value', '')
snap_url = snap.get('snapUrls', {}).get('mediaUrl', '')
stories.append({
"media_id": snap.get('snapId', {}).get('value', ''),
"url": snap_url,
"timestamp": timestamp
})
return stories
def get_existing_media_ids(directory):
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
existing_media_ids = set()
from datetime import datetime
import os, requests, config, json
from snapchat import get_data, get_stories, get_highlight_stories
"""
media_url_filename = url.split('/')[-1].split('?')[0]
etag = response.headers.get('ETag', '').replace('"', '')
filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
filepath = os.path.join(directory, 'highlights', filename)
"""
directory = "snapchat"
data_directory = "data"
def get_existing_snap_ids(directory):
existing_snap_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
media_id = filename.split('~')[2]
existing_media_ids.add(media_id)
return existing_media_ids
snap_id = filename.split('~')[2]
existing_snap_ids.add(snap_id)
return existing_snap_ids
def find_duplicate_snap(existing_snaps, snap_id, username):
for snap in existing_snaps:
if username == snap[2]:
if snap_id in snap[1]:
return snap
return False
def archive_data(data, username):
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data))
print(f"Archived data for {username} at {data_filepath}")
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def download_media(url, filepath):
if os.path.exists(filepath):
print(f"File {filepath} already exists. Skipping download.")
return filepath
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to download media {url}")
return None
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
def main():
directory = "snapchat"
if not os.path.exists(directory):
os.makedirs(directory)
usernames = [
'aleximarianna', 'little.warren1', 'neiima22', 'awesome.nads', 'noordabash',
'jaynagirl', 'sierracannon', 'stefaniedra6',
'ciaoxxw', 'nadia-stone', 'c.aitknight', 'aimeejaiii',
'leonanaomii', 'ratskelet0n',
]
existing_media_ids = get_existing_media_ids(directory)
db, cursor = config.gen_connection()
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_medias = cursor.fetchall()
existing_snap_ids = get_existing_snap_ids(directory)
for username in usernames:
print(f"Getting stories for {username}...")
@ -101,6 +87,8 @@ def main():
if not data:
continue
archive_data(data, username)
print("Getting stories...")
stories = get_stories(data)
@ -108,56 +96,36 @@ def main():
stories.extend(get_highlight_stories(data))
for story in stories:
media_id = story['media_id']
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
# Check if media already exists
if media_id in existing_media_ids:
print(f"Media {media_id} already exists. Skipping download.")
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
if duplicate_snap:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Determine file extension using HEAD request
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {media_id}")
# Check if media already exists
if snap_id in existing_snap_ids:
print(f"Media {snap_id} already exists. Skipping download.")
continue
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
extension = '.jpg'
elif 'video' in content_type:
extension = '.mp4'
else:
print(f"Unknown content type for media {media_id}")
# Determine file extension using HEAD request.
# TODO: find a better way to determine file extension without downloading the file.
extension = get_file_extension(url)
if not extension:
continue
if media_id:
filename = f"{username}~{timestamp}~{media_id}{extension}"
filepath = os.path.join(directory, filename)
else:
media_url_filename = url.split('/')[-1].split('?')[0]
etag = response.headers.get('ETag', '').replace('"', '')
filename = f"{username}~{timestamp}-{media_url_filename}~{etag}{extension}"
filepath = os.path.join(directory, 'highlights', filename)
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File {filename} already exists. Skipping download.")
continue
# Download the media
response = requests.get(url, stream=True)
if response.status_code != 200:
print(f"Failed to download media {media_id}")
continue
# Save the file
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
filepath = download_media(url, filepath)
print(f"Downloaded {filename} at {timestamp}")
if __name__ == "__main__":

@ -1,24 +1,26 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
from uuid import uuid4
directory = 'snapchat'
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
filepath = media['filepath']
filename = os.path.basename(filepath)
media_id = media['media_id']
thumbnail_url = None
phash = None
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
if filename in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
if media_id in existing_files:
print('Duplicate file detected. Removing...')
return True
media_type = funcs.get_media_type(filename)
@ -26,32 +28,29 @@ def UploadMedia(media):
if '-' in timestamp:
timestamp = timestamp.split('-')[0]
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
duration = funcs.get_video_duration(filepath)
if media_type == 'video':
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumbPath = f'temp/{file_hash}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{file_hash}.jpg') # slower
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/snaps/{username}/{newFilename}'
file_extension = os.path.splitext(filename)[1].lower()
new_filename = f'{file_hash}{file_extension}'
server_path = f'media/snaps/{username}/{filename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
@ -67,6 +66,14 @@ def UploadMedia(media):
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
@ -75,8 +82,10 @@ def get_media_data(filepath):
username = parts[0]
timestamp = parts[1]
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath}
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id}
return data
@ -99,22 +108,32 @@ def dump(folder_path):
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
def process_snap_ids(filenames):
snap_ids = []
for filename in filenames:
snap_id = filename.split('~')[2]
snap_id = os.path.splitext(snap_id)[0]
if snap_id not in snap_ids:
snap_ids.append(snap_id)
return snap_ids
directory = 'snapchat/'
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage = config.get_storage()
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
existing_files = [image[0] for image in newCursor.fetchall()]
existing_files = process_snap_ids(existing_files)
dump(directory)
dump(directory)
print("Processing completed.")
print("Processing completed.")

@ -0,0 +1,313 @@
from uuid import uuid4
from datetime import datetime
import os, requests, config, json, funcs, cv2, re
from snapchat import get_stories, get_highlight_stories, get_all_users_data
directory = "snapchat"
data_directory = "data"
def find_duplicate_snap(existing_snaps, snap_id, username):
"""
Find a snap in the existing_snaps list on database.s
"""
for snap in existing_snaps:
if username == snap[2]:
if snap_id in snap[1]:
return snap
return False
def archive_data(data, username):
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data))
print(f"Archived data for {username} at {data_filepath}")
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def extract_file_type(url):
file_types = {
'400': '.jpg',
'1322': '.mp4',
'1325': '.mp4',
'1034': '.mp4',
'1023': '.jpg'
}
base_url = url.split("?")[0] # Remove query string
snap_data = base_url.split('/')[-1]
# Extract the file type number
data_parts = snap_data.split('.')
if len(data_parts) > 1:
file_type_number = data_parts[1]
if file_type_number in file_types:
return file_types[file_type_number]
else:
print(f"Unexpected URL format: {base_url}")
return None
def download_media(url, filepath):
if os.path.exists(filepath):
print(f"File {filepath} already exists. Skipping download.")
return filepath
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to download media {url}")
return None
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
def get_all_stories(usernames):
snapchat_users_data = get_all_users_data(usernames)
all_stories = []
for username in usernames:
print(f"Getting stories for {username}...")
data = snapchat_users_data.get(username)
if not data:
print(f"Failed to get data for {username}. Skipping.")
continue
archive_data(data, username)
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
# Determine file extension using HEAD request.
extension = extract_file_type(url)
if not extension:
print(f"Failed to determine file extension for {url}. Skipping.")
continue
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
media = {
'username': username,
'timestamp': timestamp,
'filepath': filepath,
'snap_id': snap_id,
'original_snap_id': story['original_snap_id'],
'media_url': url,
}
all_stories.append(media)
print(f"Media {snap_id} ready for download.")
all_stories.extend(stories)
return all_stories
def get_snapchat_stories():
os.makedirs(directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
existing_medias = cursor.fetchall()
snapchat_users_data = get_all_users_data(usernames)
ready_stories = []
for username in usernames:
print(f"Getting stories for {username}...")
data = snapchat_users_data.get(username)
if not data:
print(f"Failed to get data for {username}. Skipping.")
continue
archive_data(data, username)
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
if duplicate_snap:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Determine file extension using HEAD request.
extension = extract_file_type(url)
if not extension:
print(f"Failed to determine file extension for {url}. Skipping.")
continue
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
media = {
'username': username,
'timestamp': timestamp,
'filepath': filepath,
'snap_id': snap_id,
'original_snap_id': story['original_snap_id'],
'media_url': url,
}
ready_stories.append(media)
print(f"Media {snap_id} ready for download.")
# sort ready_stories by timestamp from oldest to newest
ready_stories.sort(key=lambda x: x['timestamp'])
return ready_stories
def download_stories(stories):
for story in stories:
# Download the media
filepath = story['filepath']
url = story['media_url'] if 'media_url' in story else None
filename = os.path.basename(filepath)
timestamp = story['timestamp']
filepath = download_media(url, filepath)
print(f"Downloaded {filename} at {timestamp}")
if not filepath:
continue
story['filepath'] = filepath
UploadMedia(story)
def main():
ready_stories = get_snapchat_stories()
stories_from_files = funcs.get_files(directory)
stories_from_files = [get_media_data(filepath) for filepath in stories_from_files]
stories_from_files = [story for story in stories_from_files if story]
ready_stories.extend(stories_from_files)
download_stories(ready_stories)
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
filepath = media['filepath']
filename = os.path.basename(filepath)
snap_id = media['snap_id']
original_snap_id = media['original_snap_id']
thumbnail_url = None
phash = None
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
post_date = datetime.fromtimestamp(int(timestamp))
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{filename}')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
server_path = f'media/snaps/{username}/{filename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id)
cursor.execute(query, values)
db.commit()
print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'snap_id': snap_id, 'original_snap_id': None}
return data
def process_snap_ids(filenames):
snap_ids = []
for filename in filenames:
snap_id = filename.split('~')[2]
snap_id = os.path.splitext(snap_id)[0]
if snap_id not in snap_ids:
snap_ids.append(snap_id)
return snap_ids
if __name__ == '__main__':
print('Starting snappy...')
db, cursor = config.gen_connection()
obj_storage = config.get_storage()
main()
print("Processing completed.")

@ -0,0 +1,239 @@
from uuid import uuid4
from datetime import datetime
import os, requests, config, json, funcs, cv2
from snapchat import get_stories, get_highlight_stories, get_all_users_data
directory = "snapchat"
data_directory = "data"
def get_existing_snap_ids(directory):
existing_snap_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
snap_id = filename.split('~')[2]
existing_snap_ids.add(snap_id)
return existing_snap_ids
def find_duplicate_snap(existing_snaps, snap_id, username):
for snap in existing_snaps:
if username == snap[2]:
if snap_id in snap[1]:
return snap
return False
def archive_data(data, username):
data_filename = f"{username}~{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.json"
data_filepath = os.path.join(data_directory, data_filename)
with open(data_filepath, 'w') as f:
f.write(json.dumps(data))
print(f"Archived data for {username} at {data_filepath}")
def get_file_extension(url):
response = requests.head(url)
if response.status_code != 200:
print(f"Failed to access media {url}")
return None
content_type = response.headers.get('Content-Type', '')
if 'image' in content_type:
return '.jpg'
elif 'video' in content_type:
return '.mp4'
else:
print(f"Unknown content type for media {url}")
return None
def download_media(url, filepath):
if os.path.exists(filepath):
print(f"File {filepath} already exists. Skipping download.")
return filepath
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to download media {url}")
return None
with open(filepath, 'wb') as f:
f.write(response.content)
return filepath
def main():
os.makedirs(directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat' ORDER BY id DESC")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename, username FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' ORDER BY id DESC")
existing_medias = cursor.fetchall()
existing_snap_ids = get_existing_snap_ids(directory)
snapchat_users_data = get_all_users_data(usernames)
ready_stories = []
for username in usernames:
print(f"Getting stories for {username}...")
data = snapchat_users_data.get(username)
if not data:
print(f"Failed to get data for {username}. Skipping.")
continue
archive_data(data, username)
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
snap_id = story['snap_id']
url = story['url']
timestamp = story['timestamp']
duplicate_snap = find_duplicate_snap(existing_medias, snap_id, username)
if duplicate_snap:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Check if media already exists
if snap_id in existing_snap_ids:
print(f"Media {snap_id} already exists. Skipping download.")
continue
# Determine file extension using HEAD request.
extension = get_file_extension(url)
if not extension:
continue
filename = f"{username}~{timestamp}~{snap_id}{extension}"
filepath = os.path.join(directory, filename)
# Check if file already exists
if os.path.exists(filepath):
print(f"File {filename} already exists. Skipping download.")
continue
media = {
'username': username,
'timestamp': timestamp,
'filepath': filepath,
'snap_id': snap_id,
'original_snap_id': story['original_snap_id'],
'media_url': url,
}
ready_stories.append(media)
print(f"Media {snap_id} ready for download.")
for media in ready_stories:
# Download the media
filepath = download_media(url, filepath)
print(f"Downloaded {filename} at {timestamp}")
if not filepath:
continue
media['filepath'] = filepath
UploadMedia(media)
def UploadMedia(media):
username = media['username']
timestamp = media['timestamp']
filepath = media['filepath']
filename = os.path.basename(filepath)
snap_id = media['snap_id']
original_snap_id = media['original_snap_id']
thumbnail_url = None
phash = None
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{filename}')
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{filename}"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
server_path = f'media/snaps/{username}/{filename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path)
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, snap_id, original_snap_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat', snap_id, original_snap_id)
cursor.execute(query, values)
db.commit()
print(f'[{cursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) < 3:
return False
username = parts[0]
timestamp = parts[1]
snap_id = parts[2]
snap_id = os.path.splitext(snap_id)[0]
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id}
return data
def process_snap_ids(filenames):
snap_ids = []
for filename in filenames:
snap_id = filename.split('~')[2]
snap_id = os.path.splitext(snap_id)[0]
if snap_id not in snap_ids:
snap_ids.append(snap_id)
return snap_ids
if __name__ == '__main__':
print('Starting snappy...')
db, cursor = config.gen_connection()
obj_storage = config.get_storage()
main()
print("Processing completed.")

@ -1,8 +1,11 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from uuid import uuid4
directory = 'storysaver'
def UploadMedia(media):
platform = 'Instagram'
media_id = media['media_id']
username = media['username']
timestamp = media['timestamp']
@ -43,17 +46,13 @@ def UploadMedia(media):
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except Exception as e:
print(f'Error generating thumbnail: {e}. Skipping...')
return False
newFilename = f'{media_id}{file_extension}'
@ -70,7 +69,7 @@ def UploadMedia(media):
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram', file_size)
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
newCursor.execute(query, values) # slower
newDB.commit()
@ -80,6 +79,14 @@ def UploadMedia(media):
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_user_id(username):
username = username.lower()
if username in existing_users:
@ -127,22 +134,47 @@ def get_media(folder_path):
def dump_instagram(folder_path):
medias = get_media(folder_path)
if cleanup_dupe_stories(medias):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
def cleanup_dupe_stories(medias):
removed_count = 0
for media in medias:
media_id = media['media_id']
filepath = media['filepath']
if not media_id:
print(f'Invalid media_id for file {filepath}. Skipping...')
continue
if media_id in existing_files:
removed_count += 1
print(f'Found duplicate file {filepath}. Removing...')
os.remove(filepath)
if '(1)' in filepath:
removed_count += 1
print(f'Found duplicate file {filepath}. Removing...')
os.remove(filepath)
print(f'Removed {removed_count} duplicate files.')
return removed_count
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir('storysaver/'):
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage = config.get_storage()
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
@ -150,6 +182,6 @@ if __name__ == '__main__':
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram('storysaver/')
dump_instagram(directory)
print("Processing completed.")

@ -1,8 +1,7 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
directory = 'storysaver'
def UploadMedia(media):
media_id = media['media_id']
@ -27,9 +26,9 @@ def UploadMedia(media):
file_hash = funcs.calculate_file_hash(filepath)
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
duration = funcs.get_video_duration(filepath)
if media_type == 'video':
try:
@ -117,18 +116,17 @@ def dump_instagram():
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir('storysaver/'):
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage = config.get_storage()
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
@ -136,6 +134,6 @@ if __name__ == '__main__':
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram()
dump_instagram(directory)
print("Processing completed.")

@ -1,137 +0,0 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
post_date = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
highlight_id = media['highlight_id']
post_type = media['post_type']
thumbnail_url = None
phash = None
if media_id and int(media_id) in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
if media_id:
newFilename = f'{media_id}{file_extension}'
else:
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
if highlight_id:
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def get_user_id(username):
username = username.lower()
if username in existing_users:
return existing_users[username]
return None
def get_media():
medias = []
post_types = {
'posts': 'post',
'stories': 'story',
'profile': 'profile',
}
for post_type in os.listdir('media'):
users = os.listdir(f'media/{post_type}')
for user in users:
user_path = f'media/{post_type}/{user}'
for filename in os.listdir(user_path):
data = {}
filepath = os.path.join(user_path, filename)
data['post_type'] = post_types[post_type]
data['username'] = user
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
if 'com.instagram.android__' in filename:
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
data['filepath'] = filepath
data['media_id'] = None
data['user_id'] = get_user_id(data['username'])
data['highlight_id'] = None
medias.append(data)
return medias
def dump_instagram():
medias = get_media()
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
if __name__ == '__main__':
print('Starting processing...')
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram()
print("Processing completed.")

@ -1,38 +1,137 @@
import os
from datetime import datetime
import os, config, funcs, cv2
from uuid import uuid4
directory = 'processed_tiktoks'
# file name : masstik_caammmyyy_1310_655_going blonde wednesdayyyy.mp4
# file name : masstiktok_aleksandraverse__#fyp #trending #viral #foryou.mp4
# where the first item is prefix, second is username and after those is the tiktok title
def UploadMedia(media):
platform = 'TikTok'
username = media['username']
filepath = media['filepath']
file_size = os.path.getsize(filepath)
thumbnail_url = None
phash = None
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
processed_dir = 'processed_tiktoks'
os.makedirs(processed_dir, exist_ok=True)
media_type = funcs.get_media_type(filename)
if not media_type:
print(f'Error determining media type for {filename}. Skipping...')
return False
users = os.listdir('tiktoks')
post_type = funcs.determine_post_type(filepath)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
for user in users:
files = os.path.join('tiktoks', user)
for file in os.listdir(files):
if 'masstik' not in file and 'masstiktok' not in file:
print(f"Skipping {file}")
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_hashes:
print(f'File {filename} already exists. Skipping...')
return False
post_date = datetime.now()
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/tiktoks/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) == 3:
username, title, tiktok_id = parts
elif len(parts) == 2:
username, title = parts
tiktok_id = None
else:
return False
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
return data
def get_media(folder_path):
medias = []
users = os.listdir(folder_path)
for user in users:
user_folder = os.path.join(folder_path, user)
if not os.path.isdir(user_folder):
print(f"Skipping {user}")
continue
filepath = os.path.join(files, file)
file_ext = os.path.splitext(file)[1]
data = file.split('_')
prefix = data[0]
username = data[1]
username = username.replace('@', '')
title = ' '.join(data[2:])
title = os.path.splitext(title)[0]
print("="*100)
title = title.encode('utf-8', 'ignore').decode('utf-8')
print(f"Prefix: {prefix}\nUsername: {username}\nTitle: {title}")
print("="*100)
new_filename = f"{username}~{title}.{file_ext}"
new_filepath = os.path.join(processed_dir, new_filename)
os.rename(filepath, new_filepath)
print(f"Renamed {file} to {new_filename}")
files = os.listdir(user_folder)
for filename in files:
filepath = os.path.join(user_folder, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = config.get_storage()
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
existing_hashes = [row[0] for row in newCursor.fetchall()]
dump_instagram(directory)
print("Processing completed.")

@ -0,0 +1,62 @@
import os
from uuid import uuid4
import uuid
def is_valid_uuid(uuid_to_test, version=4):
try:
uuid_obj = uuid.UUID(uuid_to_test, version=version)
except ValueError:
return False
return str(uuid_obj) == uuid_to_test
# file name : masstik_caammmyyy_1310_655_going blonde wednesdayyyy.mp4
# file name : masstiktok_aleksandraverse__#fyp #trending #viral #foryou.mp4
# where the first item is prefix, second is username and after those is the tiktok title
source_dir = 'tiktoks/'
processed_dir = 'processed_tiktoks'
os.makedirs(processed_dir, exist_ok=True)
users = os.listdir(source_dir)
for user in users:
user_dir = os.path.join(source_dir, user)
if not os.path.isdir(user_dir):
print(f"Skipping {user}")
continue
for file in os.listdir(user_dir):
filename = os.path.splitext(file)[0]
filepath = os.path.join(user_dir, file)
file_ext = os.path.splitext(file)[1]
tiktok_id = str(uuid4())
username = user
if is_valid_uuid(filename):
title = ''
tiktok_id = filename
elif 'masstik' in file or 'masstiktok' in file:
data = file.split('_')
title = filename.split('_')[-1]
else:
title = filename
print("="*100)
title = title.encode('utf-8', 'ignore').decode('utf-8')
print(f"Username: {username}\nTitle: {title}")
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
new_filepath = os.path.join(processed_dir, username, new_filename)
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
if not os.path.exists(new_filepath):
os.rename(filepath, new_filepath)
print(f"Renamed {file} to {new_filepath}")
else:
print("File with the same name already exists. Renaming aborted.")
print("="*100)

@ -1,20 +1,23 @@
from BunnyCDN.Storage import Storage
from datetime import datetime
import os, config, funcs, cv2
from PIL import Image
from uuid import uuid4
directory = 'ready_to_upload/'
directory = 'ready_to_upload'
def UploadMedia(username, user_id, filepath):
thumbnail_url = None
phash = None
platform = 'Instagram'
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
if not media_type:
print(f'Error determining media type for {filename}. Skipping...')
return False
post_type = funcs.determine_post_type(filepath, media_type)
post_type = funcs.determine_post_type(filepath)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
@ -23,26 +26,22 @@ def UploadMedia(username, user_id, filepath):
post_date = datetime.now()
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
duration = funcs.get_video_duration(filepath)
if media_type == 'video':
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumbPath = f'temp/{file_hash}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{file_hash}.jpg') # slower
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
@ -53,7 +52,7 @@ def UploadMedia(username, user_id, filepath):
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram')
values = (username, media_type, file_url, width, height, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, platform)
newCursor.execute(query, values) # slower
newDB.commit()
@ -63,6 +62,14 @@ def UploadMedia(username, user_id, filepath):
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_user_id(username):
username = username.lower()
if username in existing_users:
@ -99,7 +106,7 @@ if __name__ == '__main__':
newDB, newCursor = config.gen_connection()
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage = config.get_storage()
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}

@ -1,383 +0,0 @@
import cv2, os, json, config, time, hashlib, requests
from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import VideoFileClip
from cryptography.fernet import Fernet
from BunnyCDN.Storage import Storage
from instagrapi import Client
from PIL import Image
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
proxies={
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
}
def file_hash(filename, hash_algo='sha256'):
"""
Compute the hash of a file.
:param filename: Path to the file.
:param hash_algo: Hashing algorithm to use (e.g., 'sha256', 'md5').
:return: Hexadecimal hash string.
"""
h = hashlib.new(hash_algo)
with open(filename, 'rb') as file:
while chunk := file.read(8192):
h.update(chunk)
return h.hexdigest()
def get_video_duration(file_path):
"""
Returns the duration of the video file in seconds.
:param file_path: Path to the video file
:return: Duration in seconds
"""
try:
with VideoFileClip(file_path) as video:
return video.duration
except:
return 0
def login(force=False):
client = Client()
try:
if not force:
client.load_settings("session_data.json")
else:
raise FileNotFoundError
except (FileNotFoundError, json.JSONDecodeError):
#username = input("Enter your Instagram username: ")
#password = getpass.getpass("Enter your Instagram password: ")
with open('p.enc', 'rb') as encrypted_file:
encrypted_data = encrypted_file.read()
fernet = Fernet(open('key.enc', 'r').read())
password = str(fernet.decrypt(encrypted_data), 'utf-8')
username = 'olivercury'
auth = input("Enter your 2FA code (leave blank if not enabled): ")
if auth:
client.login(username=username, password=password, verification_code=auth)
else:
client.login(username, password)
client.dump_settings("session_data.json")
print("Logged in successfully.")
return client
def parse_media_data(media_item):
mediaTypes = {1: 'image', 2: 'video', 8: 'album'}
try:taken_at = media_item.taken_at
except:taken_at = None
try:post_type = media_item.product_type
except:post_type = None
mediaInfo = {'taken_at': taken_at, 'post_type' : post_type, 'media_type': mediaTypes[media_item.media_type]}
if media_item.media_type == 1: # Image
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['fileURL'] = media_item.thumbnail_url
mediaInfo['filename'] = f"{media_item.pk}.jpg"
elif media_item.media_type == 2: # Video
mediaInfo['media_id'] = int(media_item.pk)
mediaInfo['fileURL'] = media_item.video_url
try:mediaInfo['duration'] = media_item.video_duration
except:mediaInfo['duration'] = 0
mediaInfo['filename'] = f"{media_item.pk}.mp4"
else:
print(f"Unsupported media type with ID {media_item.pk}")
return None
return mediaInfo
def download_file(url, filePath):
try:
response = requests.get(url, stream=True, headers=headers, proxies=proxies)
response.raise_for_status()
directory = os.path.dirname(filePath)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filePath, 'wb') as out_file:
for chunk in response.iter_content(chunk_size=8192):
out_file.write(chunk)
print(f"Downloaded {filePath}")
except Exception as e:
print(f"Failed to download {url}. Error: {e}")
def process_media(mediaInfo, filePath):
if mediaInfo['media_type'] == 'image':
with Image.open(filePath) as img:
mediaInfo['width'], mediaInfo['height'] = img.size
else:
mediaInfo['width'], mediaInfo['height'] = get_video_dimensions(filePath)
mediaInfo['duration'] = get_video_duration(filePath)
if 'hash' not in mediaInfo:
mediaInfo['hash'] = file_hash(filePath)
def upload_to_storage(local_path, server_path):
try:
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
obj_storage.PutFile(local_path, server_path)
print(f"Uploaded to https://storysave.b-cdn.net/{server_path}")
except Exception as e:
print(f"Failed to upload {local_path} to {server_path}. Error: {e}")
def add_media_to_db(mediaInfo):
media_id = mediaInfo['media_id']
user_id = mediaInfo['user_id']
username = mediaInfo['username']
date = mediaInfo['taken_at'] if 'taken_at' in mediaInfo else None
media_type = mediaInfo['media_type']
post_type = mediaInfo['post_type']
duration = mediaInfo.get('duration', 0)
media_url = mediaInfo['media_url']
width = mediaInfo['width']
height = mediaInfo['height']
filehash = mediaInfo['hash']
try:
db, cursor = config.gen_connection()
query = """
INSERT INTO media (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, hash)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
data = (user_id, username, date, media_type, post_type, media_url, duration, width, height, media_id, filehash)
cursor.execute(query, data)
db.commit()
print(f"Added media for {username} to the database.")
except Exception as e:
print(f"Failed to add media for {username} to the database. Error: {e}")
def insert_highlight_items(media_ids, highlight_id, title, user_id):
try:
db, cursor = config.gen_connection()
query = "INSERT IGNORE INTO highlights (media_id, highlight_id, title, user_id) VALUES (%s, %s, %s, %s)"
values = [(media_id, highlight_id, title, user_id) for media_id in media_ids]
cursor.executemany(query, values)
db.commit()
if cursor.rowcount > 0:
print(f"Added {cursor.rowcount} highlight items to the database.")
except Exception as e:
print(f"Failed to add highlight items to the database. Error: {e}")
def get_video_dimensions(video_path):
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
if __name__ == '__main__':
client = login()
client.set_proxy(proxies['https'])
db, cursor = config.gen_connection()
cursor.execute("SELECT instagram_username, instagram_user_id, favorite FROM following ORDER BY id DESC;")
following = cursor.fetchall()
new_following = []
for user in following:
username, user_id, favorite = user
if bool(favorite):
new_following.insert(0, user)
else:
new_following.append(user)
following = new_following
cursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL;")
existing_files = [media[0] for media in cursor.fetchall()]
continueFromLast = input("Continue from the last user? (y/n): ").lower() == 'y'
if continueFromLast:
cursor.execute("SELECT username FROM media ORDER BY id DESC LIMIT 1;")
lastUser = cursor.fetchone()
if lastUser:
lastUser = lastUser[0]
while True:
if lastUser != following[0][0]:
following.pop(0)
else:
break
actionsTaken = 0
with ThreadPoolExecutor(max_workers=10) as executor:
for user in following:
while True:
try:
firstImport = False
username, user_id, isFavorite = user
if not user_id:
firstImport = True
user_id = client.user_id_from_username(username)
actionsTaken += 1
cursor.execute("UPDATE following SET instagram_user_id = %s WHERE instagram_username = %s;", (user_id, username))
db.commit()
print(f"Updated user ID for {username} to {user_id}")
#################### profile picture ####################
profilePath = os.path.join('media', 'profile', username, 'profile.jpg')
profileURL = client.user_info(user_id).profile_pic_url_hd
download_file(profileURL, profilePath)
fileHash = file_hash(profilePath)
serverPath = os.path.join(os.path.dirname(profilePath), f"{fileHash}.jpg")
upload_to_storage(profilePath, serverPath)
mediaInfo = {
'username': username,
'user_id': user_id,
'media_id': None,
'media_type': 'image',
'post_type': 'profile',
'media_url': f"https://storysave.b-cdn.net/{serverPath}",
'duration': 0,
'hash': fileHash
}
process_media(mediaInfo, profilePath)
add_media_to_db(mediaInfo)
#################### profile picture ####################
#################### stories ####################
print(f"[{username}]\nChecking: Stories")
# fetch user stories
stories = client.user_stories(user_id)
actionsTaken += 1
# fetch user's highlights and add to stories
if firstImport or isFavorite:
highlights = client.user_highlights(user_id) # API request
actionsTaken += 1
for highlight in highlights:
try:
highlight_items = client.highlight_info_v1(highlight.pk).items # API request
actionsTaken += 1
except:
print(f"Failed to get highlight items for {highlight.pk}")
time.sleep(5)
media_ids = [item.pk for item in highlight_items]
executor.submit(insert_highlight_items, media_ids, highlight.pk, highlight.title, user_id)
stories.extend(highlight_items)
# process stories and highlight stories
newStoryCount = 0
for story in stories:
try:
mediaInfo = parse_media_data(story)
# skip duplicates
if mediaInfo['media_id'] in existing_files:
continue
newStoryCount += 1
mediaInfo['user_id'] = user_id
mediaInfo['username'] = username
mediaInfo['post_type'] = 'story'
if mediaInfo['fileURL'] and mediaInfo['filename']:
filePath = os.path.join('media', 'stories', username, mediaInfo['filename'])
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
download_file(mediaInfo['fileURL'], filePath)
process_media(mediaInfo, filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo['media_id'])
except Exception as e:
print(f"Failed to process story for {username}. Error: {e}")
#################### stories ####################
#################### posts ####################
print("Checking: Posts")
medias = client.user_medias(user_id, 36) # API request
actionsTaken += 1
posts = []
for post in medias:
if post.media_type == 8:
for item in post.resources:
posts.append(item)
continue
posts.append(post)
newPostsCount = 0
for post in posts:
mediaInfo = parse_media_data(post)
if mediaInfo['media_id'] in existing_files:
continue
newPostsCount += 1
mediaInfo['user_id'] = user_id
mediaInfo['username'] = username
mediaInfo['post_type'] = 'post'
if mediaInfo['fileURL'] and mediaInfo['filename']:
filePath = os.path.join('media', 'posts', username, mediaInfo['filename'])
mediaInfo['media_url'] = f"https://storysave.b-cdn.net/{filePath}"
download_file(mediaInfo['fileURL'], filePath)
process_media(mediaInfo, filePath)
upload_to_storage(filePath, filePath)
add_media_to_db(mediaInfo)
os.remove(filePath)
existing_files.append(mediaInfo['media_id'])
#################### posts ####################
print(f"New stories: {newStoryCount}\tNew Posts: {newPostsCount}")
print(f"Actions taken: {actionsTaken}")
print("=====================================")
break
except Exception as e:
if "login_required" in str(e):
print("Please log in to your account again.")
client = login(force=True)
elif "Please wait a few minutes before you try again." in str(e):
print("Rate limited. Waiting for 5 minutes...")
client = login(force=True)
else:
print("An unexpected error occurred:", e)
break
# TO DO
# ADD DATE TO POSTS / STORIES
# FETCH ONLY THE NEW STORIES
# MINIMIZE DATABASE CONNECTIONS

File diff suppressed because it is too large Load Diff

@ -1,32 +0,0 @@
import requests
url = 'https://www.save-free.com/process'
data = {
'instagram_url': 'natahalieeee',
'type': 'profile',
'resource': 'save'
}
zoom_data = {
'instagram_url': 'natahalieeee',
'type': 'profile',
'resource': 'zoom'
}
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
'Referer' : 'https://www.save-free.com/profile-downloader/',
}
proxies={
"http": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/",
"https": "http://yehyuxsl-rotate:4tl5bvrwkz5e@p.webshare.io:80/"
}
response = requests.post(url, data=data, headers=headers)
response = requests.post(url, data=zoom_data, headers=headers)
with open('image.jpg', 'wb') as f:
f.write(response.content)

@ -1,24 +0,0 @@
import json
filePath = 'test.json'
with open(filePath, 'r', encoding='utf-8') as f:
data = json.load(f)
print(data)
posts = data['data']['xdt_api__v1__feed__user_timeline_graphql_connection']['edges']
posts = [post['node'] for post in posts]
for post in posts:
biggestRes = 0
images = post['image_versions2']['candidates']
for image in images:
width = image['width']
height = image['height']
if width * height > biggestRes:
biggestRes = width * height
goodPost = post
print(goodPost)

@ -1,37 +0,0 @@
import requests
from bs4 import BeautifulSoup
import cloudscraper
from zenrows import ZenRowsClient
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"}
def get_tiktok_video(url):
client = ZenRowsClient("39cf41d4c1ffcb944fca23a95fee8a2722bf4f28")
data = client.get(url, headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
video_url = soup.find('div', class_='video_html5').find('video').get('src')
return video_url
def get_user_videos(username):
url = f'https://urlebird.com/user/{username}/'
client = ZenRowsClient("39cf41d4c1ffcb944fca23a95fee8a2722bf4f28")
data = client.get(url)
soup = BeautifulSoup(data.text, 'html.parser')
video_urls = []
foundVideos = soup.find_all('div', class_='thumb')
for video in foundVideos:
videoURL = video.find_all('a')[-1].get('href')
video_urls.append(videoURL)
return video_urls
get_tiktok_video('https://urlebird.com/video/7295074788165373190/')
videos = get_user_videos('liliashaked')
for video in videos:
print(get_tiktok_video(video))

@ -1,2 +0,0 @@
https://www.redgifs.com/watch/terrificexhaustedgannet#rel=tag%3Anaomi-soraya%2Ca;order=trending
https://www.sex.com/pins

@ -0,0 +1,70 @@
import os, requests, config
from snapchat import get_data, get_stories, get_highlight_stories
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
directory = "snapchat"
def get_existing_media_ids(directory):
# get all files and their their base filename without extension, split the filename by ~ and get the 3rd element
existing_media_ids = set()
for root, _, files in os.walk(directory):
for file in files:
if '~' not in file:
continue
filename, _ = os.path.splitext(file)
media_id = filename.split('~')[2]
existing_media_ids.add(media_id)
return existing_media_ids
def get_media_id(url):
return url.split('/')[-1].split('?')[0].split('.')[0]
def find_duplicate_snap(existing_snaps, snap_id):
for snap in existing_snaps:
if snap_id in snap[1]:
return snap
return False
def main():
if not os.path.exists(directory):
os.makedirs(directory)
db, cursor = config.gen_connection()
cursor.execute("SELECT username FROM following WHERE platform = 'snapchat'")
usernames = [row[0] for row in cursor.fetchall()]
cursor.execute("SELECT id, filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat' AND snap_id IS NULL")
existing_medias = cursor.fetchall()
existing_media_ids = get_existing_media_ids(directory)
for username in usernames:
print(f"Getting stories for {username}...")
data = get_data(username)
if not data:
continue
print("Getting stories...")
stories = get_stories(data)
print("Getting highlights...")
stories.extend(get_highlight_stories(data))
for story in stories:
media_id = story['media_id']
url = story['url']
timestamp = story['timestamp']
snap_id = get_media_id(url)
duplicate_snap = find_duplicate_snap(existing_medias, snap_id)
if duplicate_snap:
snap_id = get_media_id(url)
cursor.execute("UPDATE media SET snap_id = %s WHERE id = %s", (snap_id, duplicate_snap[0]))
db.commit()
print(f"{cursor.rowcount} Media {snap_id} updated.")
continue
if __name__ == "__main__":
main()

@ -1,6 +0,0 @@
https://www.instagram.com/neomi_hanukayev/
https://www.instagram.com/osher_yakir/
https://www.instagram.com/m1ry2m_/
https://www.instagram.com/4m1t_f1shpot/
https://www.instagram.com/yarden.bengigi/
https://www.instagram.com/a.roniiiiii/
Loading…
Cancel
Save