From fedd8f544d43af11d58ca32cdbb88fb4f8f43d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Sol=C3=ADs?= Date: Fri, 30 May 2025 16:09:56 +0000 Subject: [PATCH] fix: Correct parsing, add comments --- youtube-download-channel.sh | 121 +++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 58 deletions(-) diff --git a/youtube-download-channel.sh b/youtube-download-channel.sh index ca4b8c5..db0e1b5 100755 --- a/youtube-download-channel.sh +++ b/youtube-download-channel.sh @@ -1,83 +1,88 @@ #!/bin/bash +#Parameters: +#1st parameter: Channel you want to turn into a playlist. Leave blank to save your subscriptions (cookie file required) channel=${1:-"subscriptions"} +#2nd parameter: Time limit for the download. Leave blank to save all videos from the last month. breaktime=${2:-"today-1month"} +#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low. sleeptime=${3:-"1.0"} +#Internal variables: #Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script -folder=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) #Required to download your own subscriptions. #Obtain this file through the procedure listed at # https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp #and place it next to your script. -cookies="$folder/yt-cookies.txt" -subfolder="$folder/$channel" -archive="$subfolder/$channel.txt" -sortcsv="$subfolder/$channel-sort.csv" -csv="$subfolder/$channel.csv" -json="$subfolder/$channel.json" +cookies="${folder}/yt-cookies.txt" +subfolder="${folder}/${channel}" +archive="${subfolder}/${channel}.txt" +sortcsv="${subfolder}/${channel}-sort.csv" +csv="${subfolder}/${channel}.csv" +json="${subfolder}/${channel}.json" python="python" if [[ -f "/opt/venv/bin/python" ]]; then - python="/opt/venv/bin/python" + python="/opt/venv/bin/python" fi ytdl="/usr/bin/yt-dlp" if [[ -f "/opt/venv/bin/yt-dlp" ]]; then - ytdl="/opt/venv/bin/yt-dlp" + ytdl="/opt/venv/bin/yt-dlp" fi -if [[ -z "$subfolder" ]]; then - mkdir "$subfolder" +if [[ -z "${subfolder}" ]]; then + mkdir "${subfolder}" fi -cd "$subfolder" || exit +cd "${subfolder}" || exit #If available, you can use the cookies from your browser directly: # --cookies-from-browser "firefox" -url="https://www.youtube.com/@$channel" -if [[ "$channel" = "subscriptions" ]]; then - url="https://www.youtube.com/feed/subscriptions" +url="https://www.youtube.com/@${channel}" +if [[ "${channel}" = "subscriptions" ]]; then + url="https://www.youtube.com/feed/subscriptions" fi -if [[ -z "$cookies" ]]; then - "$python" "$ytdl" "$url" \ - --skip-download --download-archive "$archive" \ - --dateafter "$breaktime" \ - --extractor-args youtubetab:approximate_date \ - --break-on-reject --lazy-playlist --write-info-json \ - --sleep-requests "$sleeptime" +if [[ -z "${cookies}" ]]; then + "${python}" "${ytdl}" "${url}" \ + --skip-download --download-archive "${archive}" \ + --dateafter "${breaktime}" \ + --extractor-args youtubetab:approximate_date \ + --break-on-reject --lazy-playlist --write-info-json \ + --sleep-requests "${sleeptime}" else - "$python" "$ytdl" "$url" \ - --cookies "$cookies" \ - --skip-download --download-archive "$archive" \ - --dateafter "$breaktime" \ - --extractor-args youtubetab:approximate_date \ - --break-on-reject --lazy-playlist --write-info-json \ - --sleep-requests "$sleeptime" + "${python}" "${ytdl}" "${url}" \ + --cookies "${cookies}" \ + --skip-download --download-archive "${archive}" \ + --dateafter "${breaktime}" \ + --extractor-args youtubetab:approximate_date \ + --break-on-reject --lazy-playlist --write-info-json \ + --sleep-requests "${sleeptime}" fi -rm -rf "$csv" +rm -rf "${csv}" ls -t | grep -e ".info.json" | while read -r x; do - echo youtube $(jq -c '.id' "$x" | sed -e "s/\"//g") | tee -a "$archive" & - jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "$subfolder/$x" | while read -r i; do - echo "$i" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" | tee -a "$csv" & - done - jq -c '[.upload_date, .timestamp]' "$subfolder/$x" | while read -r i; do - echo "$i,$x" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" | tee -a "$sortcsv" & - done - if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2 )) ]]; then - wait -n - fi + echo youtube $(jq -c '.id' "${x}" | sed -e "s/\"//g") | tee -a "${archive}" & + jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do + echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" | tee -a "${csv}" & + done + jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do + echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" | tee -a "${sortcsv}" & + done + if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then + wait -n + fi done wait -sort "$sortcsv" | uniq > "/tmp/$channel-sort-ordered.csv" -echo "{\"playlistName\":\"$channel\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" > "/tmp/$channel.db" -cat "/tmp/$channel-sort-ordered.csv" | while read -r line; do - file=$(echo "$line" | cut -d ',' -f3-) - echo "$file" - jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": .epoch, \"timeAdded\": $(date +%s), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "$subfolder/$file" | tee -a "/tmp/$channel.db" - echo "," >> "/tmp/$channel.db" +sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv" +echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db" +cat "/tmp/${channel}-sort-ordered.csv" | while read -r line; do + file=$(echo "${line}" | cut -d ',' -f3-) + echo "${file}" + jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": .epoch, \"timeAdded\": $(date +%s), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db" + echo "," >>"/tmp/${channel}.db" done -echo "],\"_id\":\"$channel\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >> "/tmp/$channel.db" -rm "$json" -cat "/tmp/$channel.db" | tr '\n' '\r' | sed -e "s/,\r\]/\]/g" | tr '\r' '\n' | jq -c "." > "$json" && rm "/tmp/$channel.db" -rm "/tmp/$channel-sort-ordered.csv" -sort "$csv" | uniq > "/tmp/$channel-without-header.csv" -echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' > "/tmp/$channel.csv" -cat "/tmp/$channel-without-header.csv" >> "/tmp/$channel.csv" -mv "/tmp/$channel.csv" "$csv" -rm "/tmp/$channel-without-header.csv" -sort "$archive" | uniq > "/tmp/$channel.txt" -mv "/tmp/$channel.txt" "$archive" +echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db" +rm "${json}" +cat "/tmp/${channel}.db" | grep -v -e ":[ ]*null" | tr '\n' '\r' | sed -e "s/,\r\]/\]/g" | tr '\r' '\n' | jq -c "." >"${json}" && rm "/tmp/${channel}.db" +rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}" +sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv" +echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv" +cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv" +mv "/tmp/${channel}.csv" "${csv}" +rm "/tmp/${channel}-without-header.csv" +sort "${archive}" | uniq >"/tmp/${channel}.txt" +mv "/tmp/${channel}.txt" "${archive}"