You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
123 lines
3.0 KiB
Python
123 lines
3.0 KiB
Python
from selenium.webdriver.common.by import By
|
|
import undetected_chromedriver as uc
|
|
import requests
|
|
import base64
|
|
import re
|
|
import os
|
|
|
|
def format_url(url):
|
|
clean_url = re.sub(r'%[0-9A-F]{2}', '', url)
|
|
return clean_url
|
|
|
|
def encode_offset(offset_num):
|
|
offset_base64 = str(offset_num).encode('utf-8')
|
|
offset_base64 = base64.b64encode(offset_base64).decode('utf-8')
|
|
return offset_base64
|
|
|
|
def get_clips(username):
|
|
url = 'https://gql.twitch.tv/gql'
|
|
|
|
offset_num = 20
|
|
offset_base64 = encode_offset(offset_num)
|
|
|
|
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
|
|
|
headers = {
|
|
'client-id': 'kimne78kx3ncx6brgo4mv6wki5h1ko',
|
|
'Content-Type': 'text/plain;charset=UTF-8',
|
|
'User-Agent': user_agent
|
|
}
|
|
|
|
data = {
|
|
"operationName":"ClipsCards__User",
|
|
"variables":{"login":username,"limit":100,},
|
|
"extensions":{"persistedQuery":{"version":1,"sha256Hash":"4eb8f85fc41a36c481d809e8e99b2a32127fdb7647c336d27743ec4a88c4ea44"}}
|
|
}
|
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
|
clips = response.json()
|
|
|
|
clips = clips['data']['user']['clips']['edges']
|
|
|
|
cleaned_clips = parse_clips(clips)
|
|
|
|
return cleaned_clips
|
|
|
|
|
|
def parse_clips(clips):
|
|
"""
|
|
clips is a list of dictionaries
|
|
"""
|
|
|
|
cleaned_clips = []
|
|
for clip in clips:
|
|
clip = clip['node']
|
|
|
|
clip_id = clip['id']
|
|
clip_url = clip['url']
|
|
clip_title = clip['title']
|
|
clip_view_count = clip['viewCount']
|
|
clip_duration = clip['durationSeconds']
|
|
|
|
cleaned_clip = {
|
|
'id': clip_id,
|
|
'url': clip_url,
|
|
'title': clip_title,
|
|
'views': clip_view_count,
|
|
'duration': clip_duration
|
|
}
|
|
|
|
cleaned_clips.append(cleaned_clip)
|
|
|
|
return cleaned_clips
|
|
|
|
def get_video_url(video_url, driver):
|
|
driver.get(video_url)
|
|
|
|
# Get the video element
|
|
video = driver.find_element(By.TAG_NAME, 'video')
|
|
|
|
# Get the video source
|
|
video_src = video.get_attribute('src')
|
|
|
|
return video_src
|
|
|
|
def download_video(video_url, filepath):
|
|
if os.path.exists(filepath):
|
|
return filepath
|
|
|
|
video = requests.get(video_url)
|
|
|
|
# Download in chunks
|
|
with open(filepath, 'wb') as f:
|
|
for chunk in video.iter_content(chunk_size=1024):
|
|
f.write(chunk)
|
|
|
|
return filepath
|
|
|
|
|
|
# Set up an undetected Chrome driver in headless mode
|
|
opts = uc.ChromeOptions()
|
|
opts.add_argument("--headless")
|
|
opts.add_argument("--window-size=1920,1080")
|
|
|
|
driver = uc.Chrome(use_subprocess=True, options=opts)
|
|
|
|
username = 'didicandy666'
|
|
clips = get_clips(username)
|
|
|
|
for clip in clips:
|
|
clip_url = clip['clip_url']
|
|
|
|
filename = f"{clip['id']}.mp4"
|
|
filepath = os.path.join('clips', filename)
|
|
|
|
if os.path.exists(filepath):
|
|
print(f"Already downloaded {filename}")
|
|
continue
|
|
|
|
video_url = get_video_url(clip_url, driver)
|
|
|
|
download_video(video_url, filepath)
|
|
print(f"Downloaded {filename}") |