cleanup
parent
ad39eeaed1
commit
445b0ad9f0
@ -1,37 +0,0 @@
|
||||
{
|
||||
"uuids": {
|
||||
"phone_id": "53c03380-c7b9-44ab-b10e-1b585e8e428b",
|
||||
"uuid": "2a9c7a37-c902-4332-8a32-1fd903acd991",
|
||||
"client_session_id": "2b0a28f0-86c4-4cd4-b044-c4effd953cc9",
|
||||
"advertising_id": "d330f041-56f1-4f45-906d-d3740717f0b1",
|
||||
"android_device_id": "android-df5a2572f9762ff7",
|
||||
"request_id": "35de6403-02e2-46b4-a02c-403cea1fe9c6",
|
||||
"tray_session_id": "ed1874f7-cb8d-4ed6-bea8-13c53b9c3d67"
|
||||
},
|
||||
"mid": "ZwOR_QABAAGgkEbeoytBO3EL-dgC",
|
||||
"ig_u_rur": null,
|
||||
"ig_www_claim": null,
|
||||
"authorization_data": {
|
||||
"ds_user_id": "1587432849",
|
||||
"sessionid": "1587432849%3Ak5q9QqmHia2WWq%3A18%3AAYcDFsLKMiFCtVhCcqYl7KZrFLw5IOSgf1pNfQZYLA"
|
||||
},
|
||||
"cookies": {},
|
||||
"last_login": 1728287241.130515,
|
||||
"device_settings": {
|
||||
"app_version": "269.0.0.18.75",
|
||||
"android_version": 26,
|
||||
"android_release": "8.0.0",
|
||||
"dpi": "480dpi",
|
||||
"resolution": "1080x1920",
|
||||
"manufacturer": "OnePlus",
|
||||
"device": "devitron",
|
||||
"model": "6T Dev",
|
||||
"cpu": "qcom",
|
||||
"version_code": "314665256"
|
||||
},
|
||||
"user_agent": "Instagram 269.0.0.18.75 Android (26/8.0.0; 480dpi; 1080x1920; OnePlus; 6T Dev; devitron; qcom; en_US; 314665256)",
|
||||
"country": "US",
|
||||
"country_code": 1,
|
||||
"locale": "en_US",
|
||||
"timezone_offset": -14400
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,142 +0,0 @@
|
||||
from datetime import datetime
|
||||
import config
|
||||
import funcs
|
||||
import cv2
|
||||
import os
|
||||
|
||||
directory = 'storysaver'
|
||||
|
||||
def UploadMedia(media):
|
||||
media_id = media['media_id']
|
||||
username = media['username']
|
||||
post_date = media['timestamp']
|
||||
user_id = media['user_id']
|
||||
filepath = media['filepath']
|
||||
highlight_id = media['highlight_id']
|
||||
post_type = media['post_type']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
if media_id and int(media_id) in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'video':
|
||||
try:
|
||||
thumbPath = f'temp/{media_id}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumbPath, frame)
|
||||
cap.release()
|
||||
obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg') # slower
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
|
||||
phash = funcs.generate_phash(thumbPath)
|
||||
os.remove(thumbPath)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
elif media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
|
||||
if media_id:
|
||||
newFilename = f'{media_id}{file_extension}'
|
||||
else:
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
|
||||
server_path = f'media/{post_type}/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
if highlight_id:
|
||||
newCursor.execute("INSERT IGNORE INTO highlights (highlight_id, user_id, media_id) VALUES (%s, %s, %s)", (highlight_id, user_id, media_id))
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] added highlight {highlight_id} to user {user_id}')
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url, phash, 'instagram')
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def get_user_id(username):
|
||||
username = username.lower()
|
||||
if username in existing_users:
|
||||
return existing_users[username]
|
||||
|
||||
return None
|
||||
|
||||
def get_media():
|
||||
medias = []
|
||||
post_types = {
|
||||
'posts': 'post',
|
||||
'stories': 'story',
|
||||
'profile': 'profile',
|
||||
}
|
||||
|
||||
for post_type in os.listdir('media'):
|
||||
users = os.listdir(f'media/{post_type}')
|
||||
for user in users:
|
||||
user_path = f'media/{post_type}/{user}'
|
||||
for filename in os.listdir(user_path):
|
||||
data = {}
|
||||
filepath = os.path.join(user_path, filename)
|
||||
|
||||
data['post_type'] = post_types[post_type]
|
||||
data['username'] = user
|
||||
data['timestamp'] = filename.split('__')[-1].split('.')[0] if 'com.instagram.android__' in filename else datetime.now()
|
||||
if 'com.instagram.android__' in filename:
|
||||
data['timestamp'] = datetime.strptime(data, '%Y%m%d%H%M%S%f')
|
||||
data['filepath'] = filepath
|
||||
data['media_id'] = None
|
||||
data['user_id'] = get_user_id(data['username'])
|
||||
data['highlight_id'] = None
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram():
|
||||
medias = get_media()
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
existing_files.append(media['media_id'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
if not os.listdir(directory):
|
||||
print('No files to process. Exiting...')
|
||||
exit()
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
newCursor.execute("SELECT media_id FROM media WHERE media_id IS NOT NULL")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
newCursor.execute("SELECT DISTINCT username, user_id FROM media WHERE user_id IS NOT NULL")
|
||||
existing_users = {user[0].lower(): user[1].lower() for user in newCursor.fetchall()}
|
||||
|
||||
dump_instagram(directory)
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,140 +0,0 @@
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
import funcs
|
||||
import config
|
||||
import cv2
|
||||
import os
|
||||
|
||||
directory = 'processed_tiktoks'
|
||||
|
||||
def UploadMedia(media):
|
||||
platform = 'TikTok'
|
||||
username = media['username']
|
||||
filepath = media['filepath']
|
||||
file_size = os.path.getsize(filepath)
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
if not media_type:
|
||||
print(f'Error determining media type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
post_type = funcs.determine_post_type(filepath)
|
||||
if not post_type:
|
||||
print(f'Error determining post type for {filename}. Skipping...')
|
||||
return False
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
if file_hash in existing_hashes:
|
||||
print(f'File {filename} already exists. Skipping...')
|
||||
return False
|
||||
|
||||
post_date = datetime.now()
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
newFilename = f'{file_hash}{file_extension}'
|
||||
server_path = f'media/tiktoks/{username}/{newFilename}'
|
||||
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
post_type = 'story' if post_type == 'stories' else 'post'
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform, file_size) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, post_type, post_date, file_hash, filename, duration, thumbnail_url, phash, platform, file_size)
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
|
||||
if len(parts) == 3:
|
||||
username, title, tiktok_id = parts
|
||||
elif len(parts) == 2:
|
||||
username, title = parts
|
||||
tiktok_id = None
|
||||
else:
|
||||
return False
|
||||
|
||||
data = {'username': username, 'filepath': filepath, 'tiktok_id': tiktok_id, 'title': title}
|
||||
|
||||
return data
|
||||
|
||||
def get_media(folder_path):
|
||||
medias = []
|
||||
|
||||
users = os.listdir(folder_path)
|
||||
for user in users:
|
||||
user_folder = os.path.join(folder_path, user)
|
||||
if not os.path.isdir(user_folder):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
files = os.listdir(user_folder)
|
||||
for filename in files:
|
||||
filepath = os.path.join(user_folder, filename)
|
||||
|
||||
data = get_media_data(filepath)
|
||||
if data:
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump_instagram(folder_path):
|
||||
medias = get_media(folder_path)
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
if not os.listdir(directory):
|
||||
print('No files to process. Exiting...')
|
||||
exit()
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
newCursor.execute("SELECT hash FROM media WHERE hash IS NOT NULL AND platform = 'TikTok'")
|
||||
existing_hashes = [row[0] for row in newCursor.fetchall()]
|
||||
|
||||
dump_instagram(directory)
|
||||
|
||||
print("Processing completed.")
|
||||
@ -1,58 +0,0 @@
|
||||
from uuid import uuid4
|
||||
import uuid
|
||||
import os
|
||||
|
||||
def is_valid_uuid(uuid_to_test, version=4):
|
||||
try:
|
||||
uuid_obj = uuid.UUID(uuid_to_test, version=version)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
return str(uuid_obj) == uuid_to_test
|
||||
|
||||
source_dir = 'tiktoks/'
|
||||
processed_dir = 'processed_tiktoks'
|
||||
|
||||
os.makedirs(processed_dir, exist_ok=True)
|
||||
|
||||
users = os.listdir(source_dir)
|
||||
|
||||
for user in users:
|
||||
user_dir = os.path.join(source_dir, user)
|
||||
if not os.path.isdir(user_dir):
|
||||
print(f"Skipping {user}")
|
||||
continue
|
||||
|
||||
for file in os.listdir(user_dir):
|
||||
filename = os.path.splitext(file)[0]
|
||||
filepath = os.path.join(user_dir, file)
|
||||
file_ext = os.path.splitext(file)[1]
|
||||
|
||||
tiktok_id = str(uuid4())
|
||||
username = user
|
||||
|
||||
if is_valid_uuid(filename):
|
||||
title = ''
|
||||
tiktok_id = filename
|
||||
elif 'masstik' in file or 'masstiktok' in file:
|
||||
data = file.split('_')
|
||||
title = filename.split('_')[-1]
|
||||
else:
|
||||
title = filename
|
||||
|
||||
|
||||
print("="*100)
|
||||
title = title.encode('utf-8', 'ignore').decode('utf-8')
|
||||
print(f"Username: {username}\nTitle: {title}")
|
||||
|
||||
new_filename = f"{username}~{title}~{tiktok_id}{file_ext}"
|
||||
new_filepath = os.path.join(processed_dir, username, new_filename)
|
||||
|
||||
os.makedirs(os.path.dirname(new_filepath), exist_ok=True)
|
||||
if not os.path.exists(new_filepath):
|
||||
os.rename(filepath, new_filepath)
|
||||
print(f"Renamed {file} to {new_filepath}")
|
||||
else:
|
||||
print("File with the same name already exists. Renaming aborted.")
|
||||
|
||||
print("="*100)
|
||||
Loading…
Reference in New Issue