public-scripts/youtube-download-channel.sh

92 lines
4.4 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
#Parameters:
#1st parameter: Channel you want to turn into a playlist. Leave blank to save your subscriptions (cookie file required)
channel=${1:-"subscriptions"}
#2nd parameter: Time limit for the download. Leave blank to save all videos from the last month.
breaktime=${2:-"today-1month"}
#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low.
sleeptime=${3:-"1.0"}
#Internal variables:
#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
#Required to download your own subscriptions.
#Obtain this file through the procedure listed at
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
#and place it next to your script.
cookies="${folder}/yt-cookies.txt"
subfolder="${folder}/${channel}"
archive="${subfolder}/${channel}.txt"
sortcsv="${subfolder}/${channel}-sort.csv"
csv="${subfolder}/${channel}.csv"
json="${subfolder}/${channel}.json"
python="python"
if [[ -f "/opt/venv/bin/python" ]]; then
python="/opt/venv/bin/python"
fi
ytdl="/usr/bin/yt-dlp"
if [[ -f "/opt/venv/bin/yt-dlp" ]]; then
ytdl="/opt/venv/bin/yt-dlp"
fi
if [[ -z "${subfolder}" ]]; then
mkdir "${subfolder}"
fi
cd "${subfolder}" || exit
#If available, you can use the cookies from your browser directly:
# --cookies-from-browser "firefox"
url="https://www.youtube.com/@${channel}"
if [[ "${channel}" = "subscriptions" ]]; then
url="https://www.youtube.com/feed/subscriptions"
fi
if [[ -z "${cookies}" ]]; then
"${python}" "${ytdl}" "${url}" \
--skip-download --download-archive "${archive}" \
--dateafter "${breaktime}" \
--extractor-args youtubetab:approximate_date \
--break-on-reject --lazy-playlist --write-info-json \
--sleep-requests "${sleeptime}"
else
"${python}" "${ytdl}" "${url}" \
--cookies "${cookies}" \
--skip-download --download-archive "${archive}" \
--dateafter "${breaktime}" \
--extractor-args youtubetab:approximate_date \
--break-on-reject --lazy-playlist --write-info-json \
--sleep-requests "${sleeptime}"
fi
rm -rf "${csv}"
#ls -t | grep -e ".info.json" | while read -r x; do
find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do
x="${xp##./}"
#echo youtube $(jq -c '.id' "${x}" | sed -e "s/\"//g") | tee -a "${archive}" &
echo "youtube $(jq -cr '.id' "${x}")" | tee -a "${archive}" &
jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do
echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"//g" | tee -a "${csv}" &
done
jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do
echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"//g" | tee -a "${sortcsv}" &
done
if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then
wait -n
fi
done
wait
sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv"
echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db"
#cat "/tmp/${channel}-sort-ordered.csv" | while read -r line; do
while read -r line; do
file=$(echo "${line}" | cut -d ',' -f3-)
echo "${file}"
jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": .epoch, \"timeAdded\": $(date +%s), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db"
echo "," >>"/tmp/${channel}.db"
done <"/tmp/${channel}-sort-ordered.csv"
echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db"
rm "${json}"
grep -v -e ":[ ]*null" "/tmp/${channel}.db" | tr '\n' '\r' | sed -e "s/,\r[,\r]*/,\r/g" | sed -e "s/,\r\]/\]/g" | tr '\r' '\n' | jq -c . >"${json}" && rm "/tmp/${channel}.db"
rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}"
sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv"
echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv"
cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv"
mv "/tmp/${channel}.csv" "${csv}"
rm "/tmp/${channel}-without-header.csv"
sort "${archive}" | uniq >"/tmp/${channel}.txt"
mv "/tmp/${channel}.txt" "${archive}"