public-scripts/youtube-download-channel.sh

#!/bin/bash
#Parameters:
#1st parameter: Channel you want to turn into a playlist. Leave blank to save your subscriptions (cookie file required)
channel=${1:-"subscriptions"}
#2nd parameter: Time limit for the download. Leave blank to save all videos from the last month.
breaktime=${2:-"today-1month"}
#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low.
sleeptime=${3:-"1.0"}
#4th parameter: Whether to enable exporting to FreeTube playlist database (1=on by default, 0=off)
enabledb=${4:-"1"}
#5th parameter: Whether to enable exporting to a CSV file (1=on by default, 0=off)
enablecsv=${5:-"1"}
#Internal variables:
#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
#Required to download your own subscriptions.
#Obtain this file through the procedure listed at
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
#and place it next to your script.
cookies="${folder}/yt-cookies.txt"
subfolder="${folder}/${channel}"
archive="${subfolder}/${channel}.txt"
sortcsv="${subfolder}/${channel}-sort.csv"
csv="${subfolder}/${channel}.csv"
json="${subfolder}/${channel}.db"
python="python"
if [[ -f "/opt/venv/bin/python" ]]; then
	python="/opt/venv/bin/python"
fi
ytdl="/usr/bin/yt-dlp"
if [[ -f "/opt/venv/bin/yt-dlp" ]]; then
	ytdl="/opt/venv/bin/yt-dlp"
fi
if [[ ! -d "${subfolder}" ]]; then
	mkdir -v "${subfolder}"
fi
cd "${subfolder}" || exit
if [[ ! -f "${archive}" ]]; then
	touch "${archive}"
fi
if [[ -f "${channel}.tar.zst" ]]; then
	tar -xvp -I zstd -f "${channel}.tar.zst"
fi
#If available, you can use the cookies from your browser directly:
#    --cookies-from-browser "firefox"
url="https://www.youtube.com/@${channel}"
if [[ "${channel}" = "subscriptions" ]]; then
	url="https://www.youtube.com/feed/subscriptions"
fi
if [[ -z "${cookies}" && "${channel}" = "subscriptions" ]]; then
	"${python}" "${ytdl}" "${url}" \
		--skip-download --download-archive "${archive}" \
		--dateafter "${breaktime}" \
		--extractor-args youtubetab:approximate_date \
		--break-on-reject --lazy-playlist --write-info-json \
		--sleep-requests "${sleeptime}"
else
	"${python}" "${ytdl}" "${url}" \
		--cookies "${cookies}" \
		--skip-download --download-archive "${archive}" \
		--dateafter "${breaktime}" \
		--extractor-args youtubetab:approximate_date \
		--break-on-reject --lazy-playlist --write-info-json \
		--sleep-requests "${sleeptime}"
fi
rm -rf "${csv}"
if [[ ! -f "${sortcsv}" ]]; then
	touch "${sortcsv}"
fi
find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do
	x="${xp##./}"
	echo "youtube $(jq -cr '.id' "${x}")" | tee -a "${archive}" &
	if [[ ${enablecsv} = "1" ]]; then
		jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do
			echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/＂/g" | tee -a "${csv}" &
		done
	fi
	if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
		jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do
			echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/＂/g" | tee -a "${sortcsv}" &
		done
	fi
	if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then
		wait -n
	fi
done
wait
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
	sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv"
fi
if [[ ${enabledb} = "1" ]]; then
	rm "/tmp/${channel}.db"
	echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db"
fi
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
	while read -r line; do
		file=$(echo "${line}" | cut -d ',' -f3-)
		echo "${file}"
		if [[ "${breaktime}" =~ ^[0-9]+$ ]]; then
			uploaddate=$(echo "${line}" | cut -d ',' -f1 | sed -e "s/\"//g")
			if [[ "${uploaddate}" -lt "${breaktime}" ]]; then
				echo "Video ${file} uploaded on ${uploaddate}, removing..."
				rm "${file}"
			fi
		fi
		if [[ ${enabledb} = "1" ]]; then
			if [[ -f "${file}" ]]; then
				jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ), \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db"
				echo "," >>"/tmp/${channel}.db"
			fi
		fi
	done <"/tmp/${channel}-sort-ordered.csv"
fi
if [[ ${enabledb} = "1" ]]; then
	echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db"
	rm "${json}"
	grep -v -e ":[ ]*null" "/tmp/${channel}.db" | tr '\n' '\r' | sed -e "s/,\r[,\r]*/,\r/g" | sed -e "s/,\r\]/\]/g" -e "s/\[\r,/\[/g" | tr '\r' '\n' | jq -c . >"${json}" && rm "/tmp/${channel}.db"
fi
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
	rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}"
fi
if [[ ${enablecsv} = "1" ]]; then
	sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv"
	echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv"
	cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv"
	mv "/tmp/${channel}.csv" "${csv}"
	rm "/tmp/${channel}-without-header.csv"
fi
sort "${archive}" | uniq >"/tmp/${channel}.txt"
mv "/tmp/${channel}.txt" "${archive}"
tar -cvp -I zstd -f "${channel}.tar.zst" ./*.info.json && rm ./*.info.json
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								#!/bin/bash
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								#Parameters:
 								#1st parameter: Channel you want to turn into a playlist. Leave blank to save your subscriptions (cookie file required)
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								channel=${1:-"subscriptions"}
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								#2nd parameter: Time limit for the download. Leave blank to save all videos from the last month.
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								breaktime=${2:-"today-1month"}
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low.
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								sleeptime=${3:-"1.0"}
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+								#4th parameter: Whether to enable exporting to FreeTube playlist database (1=on by default, 0=off)
 								enabledb=${4:-"1"}
 								#5th parameter: Whether to enable exporting to a CSV file (1=on by default, 0=off)
 								enablecsv=${5:-"1"}
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								#Internal variables:
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								#Required to download your own subscriptions.
 								#Obtain this file through the procedure listed at
 								# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
 								#and place it next to your script.
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								cookies="${folder}/yt-cookies.txt"
 								subfolder="${folder}/${channel}"
 								archive="${subfolder}/${channel}.txt"
 								sortcsv="${subfolder}/${channel}-sort.csv"
 								csv="${subfolder}/${channel}.csv"
-												fix: Correct parsing, add compression of data after finishing

											
										
										
											2025-05-31 19:37:00 +00:00
+								json="${subfolder}/${channel}.db"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								python="python"
 								if [[ -f "/opt/venv/bin/python" ]]; then
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+									python="/opt/venv/bin/python"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								fi
 								ytdl="/usr/bin/yt-dlp"
 								if [[ -f "/opt/venv/bin/yt-dlp" ]]; then
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+									ytdl="/opt/venv/bin/yt-dlp"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								fi
-												fix: Correct parsing, add compression of data after finishing

											
										
										
											2025-05-31 19:37:00 +00:00
+								if [[ ! -d "${subfolder}" ]]; then
 									mkdir -v "${subfolder}"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								fi
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								cd "${subfolder}" || exit
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+								if [[ ! -f "${archive}" ]]; then
 									touch "${archive}"
 								fi
-												fix: Correct parsing, add compression of data after finishing

											
										
										
											2025-05-31 19:37:00 +00:00
+								if [[ -f "${channel}.tar.zst" ]]; then
 									tar -xvp -I zstd -f "${channel}.tar.zst"
 								fi
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								#If available, you can use the cookies from your browser directly:
 								#    --cookies-from-browser "firefox"
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								url="https://www.youtube.com/@${channel}"
 								if [[ "${channel}" = "subscriptions" ]]; then
 									url="https://www.youtube.com/feed/subscriptions"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								fi
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+								if [[ -z "${cookies}" && "${channel}" = "subscriptions" ]]; then
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+									"${python}" "${ytdl}" "${url}" \
 										--skip-download --download-archive "${archive}" \
 										--dateafter "${breaktime}" \
 										--extractor-args youtubetab:approximate_date \
 										--break-on-reject --lazy-playlist --write-info-json \
 										--sleep-requests "${sleeptime}"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								else
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+									"${python}" "${ytdl}" "${url}" \
 										--cookies "${cookies}" \
 										--skip-download --download-archive "${archive}" \
 										--dateafter "${breaktime}" \
 										--extractor-args youtubetab:approximate_date \
 										--break-on-reject --lazy-playlist --write-info-json \
 										--sleep-requests "${sleeptime}"
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								fi
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								rm -rf "${csv}"
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+								if [[ ! -f "${sortcsv}" ]]; then
 									touch "${sortcsv}"
 								fi
-												fix: Correct parsing logic

											
										
										
											2025-05-30 19:49:04 +00:00
+								find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do
 									x="${xp##./}"
 									echo "youtube $(jq -cr '.id' "${x}")" | tee -a "${archive}" &
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+									if [[ ${enablecsv} = "1" ]]; then
 										jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do
 											echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/＂/g" | tee -a "${csv}" &
 										done
 									fi
 									if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
 										jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do
 											echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/＂/g" | tee -a "${sortcsv}" &
 										done
 									fi
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+									if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then
 										wait -n
 									fi
-												feat: Add new script to download YouTube channel data

											
										
										
											2025-05-30 15:52:49 +00:00
+								done
 								wait
-												feat: Add the ability to split CSV and DB generation

											
										
										
											2025-06-02 14:21:59 +00:00
+								if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
 									sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv"
 								fi
 								if [[ ${enabledb} = "1" ]]; then
 									rm "/tmp/${channel}.db"
 									echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db"
 								fi
 								if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
 									while read -r line; do
 										file=$(echo "${line}" | cut -d ',' -f3-)
 										echo "${file}"
 										if [[ "${breaktime}" =~ ^[0-9]+$ ]]; then
 											uploaddate=$(echo "${line}" | cut -d ',' -f1 | sed -e "s/\"//g")
 											if [[ "${uploaddate}" -lt "${breaktime}" ]]; then
 												echo "Video ${file} uploaded on ${uploaddate}, removing..."
 												rm "${file}"
 											fi
 										fi
 										if [[ ${enabledb} = "1" ]]; then
 											if [[ -f "${file}" ]]; then
 												jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ), \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db"
 												echo "," >>"/tmp/${channel}.db"
 											fi
 										fi
 									done <"/tmp/${channel}-sort-ordered.csv"
 								fi
 								if [[ ${enabledb} = "1" ]]; then
 									echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db"
 									rm "${json}"
 									grep -v -e ":[ ]*null" "/tmp/${channel}.db" | tr '\n' '\r' | sed -e "s/,\r[,\r]*/,\r/g" | sed -e "s/,\r\]/\]/g" -e "s/\[\r,/\[/g" | tr '\r' '\n' | jq -c . >"${json}" && rm "/tmp/${channel}.db"
 								fi
 								if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
 									rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}"
 								fi
 								if [[ ${enablecsv} = "1" ]]; then
 									sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv"
 									echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv"
 									cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv"
 									mv "/tmp/${channel}.csv" "${csv}"
 									rm "/tmp/${channel}-without-header.csv"
 								fi
-												fix: Correct parsing, add comments

											
										
										
											2025-05-30 16:09:56 +00:00
+								sort "${archive}" | uniq >"/tmp/${channel}.txt"
 								mv "/tmp/${channel}.txt" "${archive}"
-												fix: Correct parsing, add compression of data after finishing

											
										
										
											2025-05-31 19:37:00 +00:00
+								tar -cvp -I zstd -f "${channel}.tar.zst" ./*.info.json && rm ./*.info.json