You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
from BunnyCDN.Storage import Storage
|
|
from datetime import datetime
|
|
import os, config, funcs, cv2
|
|
from PIL import Image
|
|
|
|
|
|
def UploadMedia(media):
|
|
username = media['username']
|
|
timestamp = media['timestamp']
|
|
filepath = media['filepath']
|
|
thumbnail_url = None
|
|
phash = None
|
|
|
|
filename = os.path.basename(filepath)
|
|
file_extension = os.path.splitext(filename)[1].lower()
|
|
|
|
if filename in existing_files:
|
|
print('Duplicate file detected. Removing...')
|
|
os.remove(filepath)
|
|
return True
|
|
|
|
|
|
media_type = funcs.get_media_type(filename)
|
|
|
|
file_hash = funcs.calculate_file_hash(filepath)
|
|
|
|
if '-' in timestamp:
|
|
timestamp = timestamp.split('-')[0]
|
|
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
|
|
|
width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
|
|
|
|
duration = funcs.get_video_duration(filepath) if media_type == 'video' else 0 # slower
|
|
|
|
if media_type == 'video':
|
|
try:
|
|
thumbPath = f'temp/{file_hash}.jpg'
|
|
cap = cv2.VideoCapture(filepath)
|
|
ret, frame = cap.read()
|
|
cv2.imwrite(thumbPath, frame)
|
|
cap.release()
|
|
obj_storage.PutFile(thumbPath, f'thumbnails/{file_hash}.jpg') # slower
|
|
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
|
phash = funcs.generate_phash(thumbPath)
|
|
os.remove(thumbPath)
|
|
except:
|
|
print('Error generating thumbnail. Skipping...')
|
|
return False
|
|
elif media_type == 'image':
|
|
phash = funcs.generate_phash(filepath)
|
|
|
|
newFilename = f'{file_hash}{file_extension}'
|
|
server_path = f'media/snaps/{username}/{newFilename}'
|
|
|
|
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
|
|
|
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
|
|
|
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
|
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat')
|
|
|
|
newCursor.execute(query, values) # slower
|
|
newDB.commit()
|
|
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
|
|
|
os.remove(filepath)
|
|
|
|
return True
|
|
|
|
def get_media_data(filepath):
|
|
filename = os.path.basename(filepath)
|
|
parts = filename.split('~')
|
|
if len(parts) < 3:
|
|
return False
|
|
|
|
username = parts[0]
|
|
timestamp = parts[1]
|
|
|
|
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath}
|
|
|
|
return data
|
|
|
|
def get_media(folder_path):
|
|
medias = []
|
|
|
|
for root, dirs, files in os.walk(folder_path):
|
|
for filename in files:
|
|
filepath = os.path.join(root, filename)
|
|
|
|
data = get_media_data(filepath)
|
|
if data:
|
|
medias.append(data)
|
|
|
|
return medias
|
|
|
|
def dump(folder_path):
|
|
medias = get_media(folder_path)
|
|
|
|
for media in medias:
|
|
UploadMedia(media)
|
|
|
|
if __name__ == '__main__':
|
|
print('Starting processing...')
|
|
|
|
directory = 'snapchat/'
|
|
|
|
if not os.listdir(directory):
|
|
print('No files to process. Exiting...')
|
|
exit()
|
|
|
|
newDB, newCursor = config.gen_connection()
|
|
|
|
obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
|
|
|
|
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
|
|
existing_files = [image[0] for image in newCursor.fetchall()]
|
|
|
|
dump(directory)
|
|
|
|
print("Processing completed.") |