main
oscar 9 months ago
parent ad39eeaed1
commit 445b0ad9f0

@ -1,37 +0,0 @@
{
"uuids": {
"phone_id": "53c03380-c7b9-44ab-b10e-1b585e8e428b",
"uuid": "2a9c7a37-c902-4332-8a32-1fd903acd991",
"client_session_id": "2b0a28f0-86c4-4cd4-b044-c4effd953cc9",
"advertising_id": "d330f041-56f1-4f45-906d-d3740717f0b1",
"android_device_id": "android-df5a2572f9762ff7",
"request_id": "35de6403-02e2-46b4-a02c-403cea1fe9c6",
"tray_session_id": "ed1874f7-cb8d-4ed6-bea8-13c53b9c3d67"
},
"mid": "ZwOR_QABAAGgkEbeoytBO3EL-dgC",
"ig_u_rur": null,
"ig_www_claim": null,
"authorization_data": {
"ds_user_id": "1587432849",
"sessionid": "1587432849%3Ak5q9QqmHia2WWq%3A18%3AAYcDFsLKMiFCtVhCcqYl7KZrFLw5IOSgf1pNfQZYLA"
},
"cookies": {},
"last_login": 1728287241.130515,
"device_settings": {
"app_version": "269.0.0.18.75",
"android_version": 26,
"android_release": "8.0.0",
"dpi": "480dpi",
"resolution": "1080x1920",
"manufacturer": "OnePlus",
"device": "devitron",
"model": "6T Dev",
"cpu": "qcom",
"version_code": "314665256"
},
"user_agent": "Instagram 269.0.0.18.75 Android (26/8.0.0; 480dpi; 1080x1920; OnePlus; 6T Dev; devitron; qcom; en_US; 314665256)",
"country": "US",
"country_code": 1,
"locale": "en_US",
"timezone_offset": -14400
}

File diff suppressed because it is too large Load Diff

@ -1,142 +0,0 @@
from datetime import datetime
import config
import funcs
import cv2
import os
directory = 'storysaver'
def UploadMedia(media):
media_id = media['media_id']
username = media['username']
post_date = media['timestamp']
user_id = media['user_id']
filepath = media['filepath']
highlight_id = media['highlight_id']
post_type = media['post_type']
thumbnail_url = None
phash = None
if media_id and int(media_id) in existing_files:
print('Duplicate file detected. Removing...')
os.remove(filepath)
return True
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
file_hash = funcs.calculate_file_hash(filepath)
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'video':
try:
thumbPath = f'temp/{media_id}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumbPath, frame)
cap.release()
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
phash = funcs.generate_phash(thumbPath)
os.remove(thumbPath)
except:
print('Error generating thumbnail. Skipping...')
return False
elif media_type == 'image':
phash = funcs.generate_phash(filepath)
if media_id:
newFilename = f'{media_id}{file_extension}'
else:
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/{post_type}/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
if highlight_id:
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
newDB.commit()
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram')
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def get_user_id(username):
username = username.lower()
if username in existing_users:
return existing_users[username]
return None
def get_media():
medias = []
post_types = {
'posts': 'post',
'stories': 'story',
'profile': 'profile',
}
for post_type in os.listdir('media'):
users = os.listdir(f'media/{post_type}')
for user in users:
user_path = f'media/{post_type}/{user}'
for filename in os.listdir(user_path):
data = {}
filepath = os.path.join(user_path, filename)
data['post_type'] = post_types[post_type]
data['username'] = user
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
if 'com.instagram.android__' in filename:
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
data['filepath'] = filepath
data['media_id'] = None
data['user_id'] = get_user_id(data['username'])
data['highlight_id'] = None
medias.append(data)
return medias
def dump_instagram():
medias = get_media()
for media in medias:
UploadMedia(media)
existing_files.append(media['media_id'])
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = config.get_storage()
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
existing_files = [image[0] for image in newCursor.fetchall()]
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
dump_instagram(directory)
print("Processing completed.")

@ -1,140 +0,0 @@
from datetime import datetime
from uuid import uuid4
import funcs
import config
import cv2
import os
directory = 'processed_tiktoks'
def UploadMedia(media):
platform = 'TikTok'
username = media['username']
filepath = media['filepath']
file_size = os.path.getsize(filepath)
thumbnail_url = None
phash = None
filename = os.path.basename(filepath)
file_extension = os.path.splitext(filename)[1].lower()
media_type = funcs.get_media_type(filename)
if not media_type:
print(f'Error determining media type for {filename}. Skipping...')
return False
post_type = funcs.determine_post_type(filepath)
if not post_type:
print(f'Error determining post type for {filename}. Skipping...')
return False
file_hash = funcs.calculate_file_hash(filepath)
if file_hash in existing_hashes:
print(f'File {filename} already exists. Skipping...')
return False
post_date = datetime.now()
width, height = funcs.get_media_dimensions(filepath)
duration = funcs.get_video_duration(filepath)
if media_type == 'image':
phash = funcs.generate_phash(filepath)
elif media_type == 'video':
try:
thumb_path = generate_thumbnail(filepath)
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
phash = funcs.generate_phash(thumb_path)
os.remove(thumb_path)
except:
print('Error generating thumbnail. Skipping...')
return False
newFilename = f'{file_hash}{file_extension}'
server_path = f'media/tiktoks/{username}/{newFilename}'
file_url = f"https://storysave.b-cdn.net/{server_path}"
obj_storage.PutFile(filepath, server_path) # slow as fuck
post_type = 'story' if post_type == 'stories' else 'post'
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
newCursor.execute(query, values) # slower
newDB.commit()
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
os.remove(filepath)
return True
def generate_thumbnail(filepath):
thumb_path = f'temp/{uuid4()}.jpg'
cap = cv2.VideoCapture(filepath)
ret, frame = cap.read()
cv2.imwrite(thumb_path, frame)
cap.release()
return thumb_path
def get_media_data(filepath):
filename = os.path.basename(filepath)
parts = filename.split('~')
if len(parts) == 3:
username, title, tiktok_id = parts
elif len(parts) == 2:
username, title = parts
tiktok_id = None
else:
return False
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
return data
def get_media(folder_path):
medias = []
users = os.listdir(folder_path)
for user in users:
user_folder = os.path.join(folder_path, user)
if not os.path.isdir(user_folder):
print(f"Skipping {user}")
continue
files = os.listdir(user_folder)
for filename in files:
filepath = os.path.join(user_folder, filename)
data = get_media_data(filepath)
if data:
medias.append(data)
return medias
def dump_instagram(folder_path):
medias = get_media(folder_path)
for media in medias:
UploadMedia(media)
if __name__ == '__main__':
print('Starting processing...')
if not os.listdir(directory):
print('No files to process. Exiting...')
exit()
newDB, newCursor = config.gen_connection()
obj_storage = config.get_storage()
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
existing_hashes = [row[0] for row in newCursor.fetchall()]
dump_instagram(directory)
print("Processing completed.")

@ -1,58 +0,0 @@
from uuid import uuid4
import uuid
import os
def is_valid_uuid(uuid_to_test, version=4):
try:
uuid_obj = uuid.UUID(uuid_to_test, version=version)
except ValueError:
return False
return str(uuid_obj) == uuid_to_test
source_dir = 'tiktoks/'
processed_dir = 'processed_tiktoks'
os.makedirs(processed_dir, exist_ok=True)
users = os.listdir(source_dir)
for user in users:
user_dir = os.path.join(source_dir, user)
if not os.path.isdir(user_dir):
print(f"Skipping {user}")
continue
for file in os.listdir(user_dir):
filename = os.path.splitext(file)[0]
filepath = os.path.join(user_dir, file)
file_ext = os.path.splitext(file)[1]
tiktok_id = str(uuid4())
username = user
if is_valid_uuid(filename):
title = ''
tiktok_id = filename
elif 'masstik' in file or 'masstiktok' in file:
data = file.split('_')
title = filename.split('_')[-1]
else:
title = filename
print("="*100)
title = title.encode('utf-8', 'ignore').decode('utf-8')
print(f"Username: {username}\nTitle: {title}")
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
new_filepath = os.path.join(processed_dir, username, new_filename)
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
if not os.path.exists(new_filepath):
os.rename(filepath, new_filepath)
print(f"Renamed {file} to {new_filepath}")
else:
print("File with the same name already exists. Renaming aborted.")
print("="*100)
Loading…
Cancel
Save