From 73889be10ea1935ed00a50f22bfe142720c01a15 Mon Sep 17 00:00:00 2001
From: oscar <>
Date: Fri, 29 Nov 2024 14:22:40 -0800
Subject: [PATCH] update

---
 .gitignore        |  5 +++-
 dump_instagram.py | 68 ++++++++++++++++++++++++++++++++++++++---------
 fixresolution.py  | 47 ++++++++++++++++++++++++++++++++
 fixthumbnails.py  | 63 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 170 insertions(+), 13 deletions(-)
 create mode 100644 fixresolution.py
 create mode 100644 fixthumbnails.py

diff --git a/.gitignore b/.gitignore
index 4180796..9c5186c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,7 @@
 # Content
 storysaver/
 facebook/
-media/
\ No newline at end of file
+media/
+cache/
+temp/
+*.pyc
\ No newline at end of file
diff --git a/dump_instagram.py b/dump_instagram.py
index c504526..4376d4a 100644
--- a/dump_instagram.py
+++ b/dump_instagram.py
@@ -1,15 +1,16 @@
 from BunnyCDN.Storage import Storage
 from datetime import datetime
-import os, config, funcs
+import os, config, funcs, cv2
 from PIL import Image
 
 
-def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
-    if media_id and int(media_id) in existing_files:
-        print('Duplicate file detected. Removing...')
-        os.remove(filepath)
-        return True
-    
+def UploadMedia(media):
+    media_id = media['media_id']
+    username = media['username']
+    timestamp = media['timestamp']
+    user_id = media['user_id']
+    filepath = media['filepath']
+        
     filename = os.path.basename(filepath)
     file_extension = os.path.splitext(filename)[1].lower()
 
@@ -25,6 +26,20 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
 
     width, height = funcs.get_video_dimensions(filepath) if media_type == 'video' else Image.open(filepath).size
 
+    thumbnail_url = None
+    if media_type == 'video':
+        try:
+            thumbPath = f'temp/{media_id}.jpg'
+            cap = cv2.VideoCapture(filepath)
+            ret, frame = cap.read()
+            cv2.imwrite(thumbPath, frame)
+            cap.release()
+            obj_storage.PutFile(thumbPath, f'thumbnails/{media_id}.jpg')
+            thumbnail_url = f"https://storysave.b-cdn.net/thumbnails/{media_id}.jpg"
+        except:
+            print('Error generating thumbnail. Skipping...')
+            return False
+
     server_path = f'media/{post_type}/{username}/{media_id}{file_extension}'
 
     file_url = f"https://storysave.b-cdn.net/{server_path}"
@@ -47,8 +62,8 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
 
     obj_storage.PutFile(filepath, server_path)
 
-    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration)
+    query = "INSERT IGNORE INTO media (username, media_type, media_url, width, height, media_id, post_type, date, user_id, hash, filename, duration, thumbnail) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
+    values = (username, media_type, file_url, width, height, media_id, post_type, post_date, user_id, file_hash, filename, duration, thumbnail_url)
 
     newCursor.execute(query, values)
     newDB.commit()
@@ -58,8 +73,8 @@ def UploadMedia(filepath, username, media_id, timestamp = None, user_id = None):
 
     return True
 
-
-def dump_instagram(folder_path):
+def getMedias(folder_path):
+    medias = []
     for filename in os.listdir(folder_path):
         parts = filename.split('~')
         if len(parts) < 4:
@@ -71,7 +86,36 @@ def dump_instagram(folder_path):
         user_id = parts[3].split('_')[-1].split('.')[0]
         
         filepath = os.path.join(folder_path, filename)
-        UploadMedia(username=username, filepath=filepath, media_id=media_id, timestamp=timestamp, user_id=user_id)
+
+        if not media_id:
+            print(f'Invalid media_id for file {filename}. Skipping...')
+            continue
+
+        try:media_id = int(media_id)
+        except:
+            print(f'Invalid media_id for file {filename}. Skipping...')
+            continue
+        
+        data = {
+            'username': username,
+            'timestamp': timestamp,
+            'media_id': media_id,
+            'user_id': user_id,
+            'filepath': filepath
+        }
+        medias.append(data)
+    return medias
+        
+def dump_instagram(folder_path):
+    medias = getMedias(folder_path)
+
+    for media in medias:
+        if media['media_id'] in existing_files:
+            print('Duplicate file detected. Removing...')
+            os.remove(media['filepath'])
+
+    for media in medias:
+        UploadMedia(media)
  
 
 if __name__ == '__main__':
diff --git a/fixresolution.py b/fixresolution.py
new file mode 100644
index 0000000..4fa15e9
--- /dev/null
+++ b/fixresolution.py
@@ -0,0 +1,47 @@
+from BunnyCDN.Storage import Storage
+import config, os, funcs
+from PIL import Image
+
+# the hash of the images are different due to optimizer
+
+#obj_storage = Storage('577cb82d-8176-4ccf-935ce0a574bf-fe4c-4012', 'altpins')
+obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
+
+db, cursor = config.gen_connection()
+
+cursor.execute("SELECT id, media_id, media_url FROM media WHERE width = 0;")
+results = cursor.fetchall()
+
+count = 0
+print(f"Found {len(results)} files to process.")
+
+cacheDir = 'cache'
+for result in results:
+    count += 1
+    videoID = result[0]
+    mediaID = result[1]
+    mediaURL = result[2]
+    extension = mediaURL.split('.')[-1]
+    
+    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
+    
+    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
+    
+    if os.path.exists(localFilePath):
+        print(f"File already exists: {localFilePath}")
+    else:
+        obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
+
+    mediaType = funcs.get_media_type(localFilePath)
+
+    if mediaType == 'image':
+        with Image.open(localFilePath) as img:
+            width, height = img.size
+    elif mediaType == 'video':
+        width, height = funcs.get_video_dimensions(localFilePath)
+            
+
+    cursor.execute("UPDATE media SET width = %s, height=%s WHERE id = %s;", (width, height, videoID))
+    db.commit()
+
+    print(f"[{count}/{len(results)}] width: {width}, height: {height} {cursor.rowcount}")
\ No newline at end of file
diff --git a/fixthumbnails.py b/fixthumbnails.py
new file mode 100644
index 0000000..94861ca
--- /dev/null
+++ b/fixthumbnails.py
@@ -0,0 +1,63 @@
+from BunnyCDN.Storage import Storage
+import config, os, cv2
+from concurrent.futures import ThreadPoolExecutor
+
+# this script will take a screenshot of the first frame of each video and upload it as a thumbnail to BunnyCDN
+
+obj_storage = Storage('345697f9-d9aa-4a6b-a5ec8bffc16d-ceaf-453e', 'storysave')
+
+db, cursor = config.gen_connection()
+
+cursor.execute("SELECT id, media_id, media_url FROM media WHERE media_type = 'video' AND thumbnail IS NULL and status = 'public';")
+results = cursor.fetchall()
+
+count = 0
+print(f"Found {len(results)} files to process.")
+
+cacheDir = 'cache'
+
+def DownloadFile(serverPath, cacheDir):
+    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
+    
+    if os.path.exists(localFilePath):
+        print(f"File already exists: {localFilePath}")
+        return localFilePath
+    
+    obj_storage.DownloadFile(storage_path=serverPath, download_path=cacheDir)
+    print(f"Downloaded {serverPath} to {localFilePath}")
+    return localFilePath
+
+def ImportMedias():
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        for video in results:
+            serverPath = video[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
+            executor.submit(DownloadFile, serverPath, cacheDir)
+    
+
+for result in results:
+    count += 1
+    itemID = result[0]
+    mediaID = result[1]
+    mediaURL = result[2]
+    extension = mediaURL.split('.')[-1]
+    
+    serverPath = result[2].replace("https://storysave.b-cdn.net/", '').replace('//', '/').replace('\\', '/')
+    
+    localFilePath = os.path.join(cacheDir, os.path.basename(serverPath))
+
+    filePath = DownloadFile(serverPath, cacheDir)
+
+    cap = cv2.VideoCapture(localFilePath)
+    ret, frame = cap.read()
+    cv2.imwrite('thumbnail.jpg', frame)
+    cap.release()
+
+    thumbnailURL = f"https://storysave.b-cdn.net/thumbnails/{itemID}.jpg"
+    
+    obj_storage.PutFile('thumbnail.jpg', f'thumbnails/{itemID}.jpg')
+    
+
+    cursor.execute("UPDATE media SET thumbnail = %s WHERE id = %s;", (thumbnailURL, itemID))
+    db.commit()
+    
+    print(f"[{count}/{len(results)}] thumbnail: {thumbnailURL} {cursor.rowcount}")
\ No newline at end of file