|
|
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import requests
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
|
|
|
|
|
|
|
|
|
|
snap_types = {
|
|
|
|
|
27 : ['spotlight', 'video'],
|
|
|
|
|
256 : ['thumbnail', 'image'],
|
|
|
|
|
400 : ['idk', 'image'],
|
|
|
|
|
1023 : ['idk', 'image'],
|
|
|
|
|
1034 : ['downscaled_video', 'video'],
|
|
|
|
|
1322 : ['idk', 'video'],
|
|
|
|
|
1325 : ['idk', 'video'],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_data(username):
|
|
|
|
|
url = f"https://www.snapchat.com/add/{username}"
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
data_script = soup.find("script", id="__NEXT_DATA__")
|
|
|
|
|
if not data_script:
|
|
|
|
|
print(f"No data found for {username}.")
|
|
|
|
|
return None
|
|
|
|
|
data = json.loads(data_script.string)
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
def get_social_medias(data):
|
|
|
|
|
website_url = None
|
|
|
|
|
try:
|
|
|
|
|
website_url = data['props']['pageProps']['userProfile']['publicProfileInfo']['websiteUrl']
|
|
|
|
|
except KeyError:
|
|
|
|
|
pass
|
|
|
|
|
return website_url
|
|
|
|
|
|
|
|
|
|
def get_related_profiles(data):
|
|
|
|
|
related_profiles = []
|
|
|
|
|
try:
|
|
|
|
|
related_profiles_data = data['props']['pageProps']['userProfile']['relatedProfiles']
|
|
|
|
|
for profile in related_profiles_data:
|
|
|
|
|
related_profiles.append(profile['username'])
|
|
|
|
|
except KeyError:
|
|
|
|
|
pass
|
|
|
|
|
return related_profiles
|
|
|
|
|
|
|
|
|
|
def get_all_users_data(usernames):
|
|
|
|
|
all_data = {}
|
|
|
|
|
|
|
|
|
|
# Define a helper function for threading
|
|
|
|
|
def fetch_data(username):
|
|
|
|
|
return username, get_data(username)
|
|
|
|
|
|
|
|
|
|
# Use ThreadPoolExecutor for concurrent fetching
|
|
|
|
|
with ThreadPoolExecutor() as executor:
|
|
|
|
|
futures = {executor.submit(fetch_data, username): username for username in usernames}
|
|
|
|
|
|
|
|
|
|
for future in as_completed(futures):
|
|
|
|
|
username = futures[future]
|
|
|
|
|
try:
|
|
|
|
|
username, data = future.result()
|
|
|
|
|
all_data[username] = data
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error fetching data for {username}: {e}")
|
|
|
|
|
all_data[username] = None
|
|
|
|
|
|
|
|
|
|
return all_data
|
|
|
|
|
|
|
|
|
|
def parse_stories(stories):
|
|
|
|
|
parsed_stories = []
|
|
|
|
|
|
|
|
|
|
for story in stories:
|
|
|
|
|
parsed_story = parse_story(story)
|
|
|
|
|
parsed_stories.append(parsed_story)
|
|
|
|
|
|
|
|
|
|
return parsed_stories
|
|
|
|
|
|
|
|
|
|
def get_stories(data):
|
|
|
|
|
"""Extract story list from the JSON data."""
|
|
|
|
|
try:
|
|
|
|
|
stories = data['props']['pageProps']['story']['snapList']
|
|
|
|
|
|
|
|
|
|
if not type(stories) == list:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
stories.sort(key=lambda x: x.get('snapIndex'), reverse=True)
|
|
|
|
|
return stories
|
|
|
|
|
except:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def get_highlights(data):
|
|
|
|
|
"""Extract highlights from possible highlight keys in JSON data."""
|
|
|
|
|
highlights = []
|
|
|
|
|
|
|
|
|
|
page_props = data.get('props', {}).get('pageProps', {})
|
|
|
|
|
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
|
|
|
|
|
|
|
|
|
|
for key in possible_highlight_keys:
|
|
|
|
|
highlight_data = page_props.get(key, [])
|
|
|
|
|
if highlight_data:
|
|
|
|
|
highlights.extend(highlight_data)
|
|
|
|
|
|
|
|
|
|
return highlights
|
|
|
|
|
|
|
|
|
|
def parse_story(story):
|
|
|
|
|
original_snap_id = story.get('snapId', {}).get('value', '')
|
|
|
|
|
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
|
|
|
|
|
timestamp = story.get('timestampInSec', {}).get('value', '')
|
|
|
|
|
media_type = story.get('snapMediaType')
|
|
|
|
|
media_type = 'image' if media_type == 0 else 'video'
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"original_snap_id": original_snap_id,
|
|
|
|
|
"snap_id": get_snap_id(snap_url),
|
|
|
|
|
"url": snap_url,
|
|
|
|
|
"timestamp": timestamp,
|
|
|
|
|
"platform": "snapchat",
|
|
|
|
|
"type": "story",
|
|
|
|
|
"username": story.get('username', ''),
|
|
|
|
|
"media_type": media_type,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_snap_id(url):
|
|
|
|
|
return url.split('?')[0].split('/')[-1].split('.')[0]
|
|
|
|
|
|
|
|
|
|
def get_highlight_stories(data):
|
|
|
|
|
stories = []
|
|
|
|
|
highlights = get_highlights(data)
|
|
|
|
|
|
|
|
|
|
for highlight in highlights:
|
|
|
|
|
snap_list = highlight.get('snapList', [])
|
|
|
|
|
|
|
|
|
|
for snap in snap_list:
|
|
|
|
|
story = parse_story(snap)
|
|
|
|
|
stories.append(story)
|
|
|
|
|
|
|
|
|
|
return stories
|
|
|
|
|
|
|
|
|
|
def get_spotlight_metadata(data):
|
|
|
|
|
"""Extract spotlight metadata from JSON data."""
|
|
|
|
|
try:
|
|
|
|
|
return data['props']['pageProps']['spotlightStoryMetadata']
|
|
|
|
|
except KeyError:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def get_username(data):
|
|
|
|
|
"""Extract username from JSON data."""
|
|
|
|
|
try:
|
|
|
|
|
return data['props']['pageProps']['userProfile']['publicProfileInfo']['username']
|
|
|
|
|
except KeyError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|