from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup import requests import json headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"} snap_types = { 27 : ['spotlight', 'video'], 256 : ['thumbnail', 'image'], 400 : ['idk', 'image'], 1023 : ['idk', 'image'], 1034 : ['downscaled_video', 'video'], 1322 : ['idk', 'video'], 1325 : ['idk', 'video'], } def get_data(username): url = f"https://www.snapchat.com/add/{username}" response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") data_script = soup.find("script", id="__NEXT_DATA__") if not data_script: print(f"No data found for {username}.") return None data = json.loads(data_script.string) return data def get_social_medias(data): website_url = None try: website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl'] except KeyError: pass return website_url def get_related_profiles(data): related_profiles = [] try: related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles'] for profile in related_profiles_data: related_profiles.append(profile['username']) except KeyError: pass return related_profiles def get_all_users_data(usernames): all_data = {} # Define a helper function for threading def fetch_data(username): return username, get_data(username) # Use ThreadPoolExecutor for concurrent fetching with ThreadPoolExecutor() as executor: futures = {executor.submit(fetch_data, username): username for username in usernames} for future in as_completed(futures): username = futures[future] try: username, data = future.result() all_data[username] = data except Exception as e: print(f"Error fetching data for {username}: {e}") all_data[username] = None return all_data def parse_stories(stories): parsed_stories = [] for story in stories: parsed_story = parse_story(story) parsed_stories.append(parsed_story) return parsed_stories def get_stories(data): """Extract story list from the JSON data.""" try: stories = data['props']['pageProps']['story']['snapList'] if not type(stories) == list: return [] stories.sort(key=lambda x: x.get('snapIndex'), reverse=True) return stories except: return [] def get_highlights(data): """Extract highlights from possible highlight keys in JSON data.""" highlights = [] page_props = data.get('props', {}).get('pageProps', {}) possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights'] for key in possible_highlight_keys: highlight_data = page_props.get(key, []) if highlight_data: highlights.extend(highlight_data) return highlights def parse_story(story): original_snap_id = story.get('snapId', {}).get('value', '') snap_url = story.get('snapUrls', {}).get('mediaUrl', '') timestamp = story.get('timestampInSec', {}).get('value', '') media_type = story.get('snapMediaType') media_type = 'image' if media_type == 0 else 'video' return { "original_snap_id": original_snap_id, "snap_id": get_snap_id(snap_url), "url": snap_url, "timestamp": timestamp, "platform": "snapchat", "type": "story", "username": story.get('username', ''), "media_type": media_type, } def get_snap_id(url): return url.split('?')[0].split('/')[-1].split('.')[0] def get_highlight_stories(data): stories = [] highlights = get_highlights(data) for highlight in highlights: snap_list = highlight.get('snapList', []) for snap in snap_list: story = parse_story(snap) stories.append(story) return stories def get_spotlight_metadata(data): """Extract spotlight metadata from JSON data.""" try: return data['props']['pageProps']['spotlightStoryMetadata'] except KeyError: return [] def get_username(data): """Extract username from JSON data.""" try: return data['props']['pageProps']['userProfile']['publicProfileInfo']['username'] except KeyError: return None