integrated dump into snappy master
parent
4cd3983055
commit
93c9d660f9
@ -1,139 +0,0 @@
|
||||
from datetime import datetime
|
||||
import os, config, funcs, cv2
|
||||
from uuid import uuid4
|
||||
|
||||
directory = 'snapchat'
|
||||
|
||||
def UploadMedia(media):
|
||||
username = media['username']
|
||||
timestamp = media['timestamp']
|
||||
filepath = media['filepath']
|
||||
filename = os.path.basename(filepath)
|
||||
media_id = media['media_id']
|
||||
thumbnail_url = None
|
||||
phash = None
|
||||
|
||||
if filename in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
os.remove(filepath)
|
||||
return True
|
||||
|
||||
if media_id in existing_files:
|
||||
print('Duplicate file detected. Removing...')
|
||||
return True
|
||||
|
||||
media_type = funcs.get_media_type(filename)
|
||||
|
||||
file_hash = funcs.calculate_file_hash(filepath)
|
||||
|
||||
if '-' in timestamp:
|
||||
timestamp = timestamp.split('-')[0]
|
||||
|
||||
post_date = datetime.fromtimestamp(int(timestamp)) if timestamp else datetime.now()
|
||||
|
||||
width, height = funcs.get_media_dimensions(filepath)
|
||||
|
||||
duration = funcs.get_video_duration(filepath)
|
||||
|
||||
if media_type == 'image':
|
||||
phash = funcs.generate_phash(filepath)
|
||||
elif media_type == 'video':
|
||||
try:
|
||||
thumb_path = generate_thumbnail(filepath)
|
||||
obj_storage.PutFile(thumb_path, f'thumbnails/{file_hash}.jpg') # this might be a problem in case of duplicate hashes
|
||||
thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{file_hash}.jpg"
|
||||
phash = funcs.generate_phash(thumb_path)
|
||||
os.remove(thumb_path)
|
||||
except:
|
||||
print('Error generating thumbnail. Skipping...')
|
||||
return False
|
||||
|
||||
file_extension = os.path.splitext(filename)[1].lower()
|
||||
new_filename = f'{file_hash}{file_extension}'
|
||||
server_path = f'media/snaps/{username}/{filename}'
|
||||
file_url = f"https://storysave.b-cdn.net/{server_path}"
|
||||
|
||||
obj_storage.PutFile(filepath, server_path) # slow as fuck
|
||||
|
||||
query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, post_type, date, hash, filename, duration, thumbnail, phash, platform) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
|
||||
values = (username, media_type, file_url, width, height, 'story', post_date, file_hash, filename, duration, thumbnail_url, phash, 'snapchat')
|
||||
|
||||
newCursor.execute(query, values) # slower
|
||||
newDB.commit()
|
||||
print(f'[{newCursor.rowcount}] records updated. File {filename} uploaded to {file_url}')
|
||||
|
||||
os.remove(filepath)
|
||||
|
||||
return True
|
||||
|
||||
def generate_thumbnail(filepath):
|
||||
thumb_path = f'temp/{uuid4()}.jpg'
|
||||
cap = cv2.VideoCapture(filepath)
|
||||
ret, frame = cap.read()
|
||||
cv2.imwrite(thumb_path, frame)
|
||||
cap.release()
|
||||
return thumb_path
|
||||
|
||||
def get_media_data(filepath):
|
||||
filename = os.path.basename(filepath)
|
||||
parts = filename.split('~')
|
||||
if len(parts) < 3:
|
||||
return False
|
||||
|
||||
username = parts[0]
|
||||
timestamp = parts[1]
|
||||
snap_id = parts[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
|
||||
data = {'username': username, 'timestamp': timestamp, 'filepath': filepath, 'media_id': snap_id}
|
||||
|
||||
return data
|
||||
|
||||
def get_media(folder_path):
|
||||
medias = []
|
||||
|
||||
for root, dirs, files in os.walk(folder_path):
|
||||
for filename in files:
|
||||
filepath = os.path.join(root, filename)
|
||||
|
||||
data = get_media_data(filepath)
|
||||
if data:
|
||||
medias.append(data)
|
||||
|
||||
return medias
|
||||
|
||||
def dump(folder_path):
|
||||
medias = get_media(folder_path)
|
||||
|
||||
for media in medias:
|
||||
UploadMedia(media)
|
||||
|
||||
def process_snap_ids(filenames):
|
||||
snap_ids = []
|
||||
for filename in filenames:
|
||||
snap_id = filename.split('~')[2]
|
||||
snap_id = os.path.splitext(snap_id)[0]
|
||||
if snap_id not in snap_ids:
|
||||
snap_ids.append(snap_id)
|
||||
|
||||
return snap_ids
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Starting processing...')
|
||||
|
||||
if not os.listdir(directory):
|
||||
print('No files to process. Exiting...')
|
||||
exit()
|
||||
|
||||
newDB, newCursor = config.gen_connection()
|
||||
|
||||
obj_storage = config.get_storage()
|
||||
|
||||
newCursor.execute("SELECT filename FROM media WHERE filename IS NOT NULL AND platform = 'snapchat'")
|
||||
existing_files = [image[0] for image in newCursor.fetchall()]
|
||||
|
||||
existing_files = process_snap_ids(existing_files)
|
||||
|
||||
dump(directory)
|
||||
|
||||
print("Processing completed.")
|
||||
Loading…
Reference in New Issue