You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
2.9 KiB
Python

11 months ago
import requests, json
11 months ago
from bs4 import BeautifulSoup
11 months ago
from concurrent.futures import ThreadPoolExecutor, as_completed
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"}
11 months ago
def get_data(username):
url = f"https://www.snapchat.com/add/{username}"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
11 months ago
data_script = soup.find("script", id="__NEXT_DATA__")
if not data_script:
print(f"No data found for {username}.")
return None
data = json.loads(data_script.string)
11 months ago
return data
11 months ago
def get_all_users_data(usernames):
all_data = {}
# Define a helper function for threading
def fetch_data(username):
return username, get_data(username)
# Use ThreadPoolExecutor for concurrent fetching
with ThreadPoolExecutor() as executor:
futures = {executor.submit(fetch_data, username): username for username in usernames}
for future in as_completed(futures):
username = futures[future]
try:
username, data = future.result()
all_data[username] = data
except Exception as e:
print(f"Error fetching data for {username}: {e}")
all_data[username] = None
return all_data
11 months ago
def parse_stories(stories):
parsed_stories = []
11 months ago
for story in stories:
parsed_story = parse_story(story)
parsed_stories.append(parsed_story)
11 months ago
return parsed_stories
def get_stories(data):
11 months ago
try:
stories = data['props']['pageProps']['story']['snapList']
return parse_stories(stories)
except KeyError:
return []
11 months ago
def get_highlights(data):
11 months ago
highlights = []
page_props = data.get('props', {}).get('pageProps', {})
# Possible keys that might contain highlights
possible_highlight_keys = ['curatedHighlights', 'savedHighlights', 'highlights']
for key in possible_highlight_keys:
highlight_data = page_props.get(key, [])
if highlight_data:
highlights.extend(highlight_data)
11 months ago
return highlights
11 months ago
def parse_story(story):
original_snap_id = story.get('snapId', {}).get('value', '')
snap_url = story.get('snapUrls', {}).get('mediaUrl', '')
timestamp = story.get('timestampInSec', {}).get('value', '')
11 months ago
11 months ago
return {
"original_snap_id": original_snap_id,
"snap_id": get_snap_id(snap_url),
"url": snap_url,
"timestamp": timestamp,
"platform": "snapchat",
"type": "story",
}
11 months ago
11 months ago
def get_snap_id(url):
return url.split('/')[-1].split('.')[0]
11 months ago
11 months ago
def get_highlight_stories(data):
stories = []
highlights = get_highlights(data)
for highlight in highlights:
snap_list = highlight.get('snapList', [])
for snap in snap_list:
story = parse_story(snap)
stories.append(story)
return stories