From 2c265e5f4c2fc02c74e0ae6e5e36be4feaa817e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Sol=C3=ADs?= Date: Wed, 11 Jun 2025 00:11:24 +0000 Subject: [PATCH] feat: Store temporary files in /tmp, add fixes --- youtube-download-channel.sh | 92 +++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/youtube-download-channel.sh b/youtube-download-channel.sh index ad40fd2..bded69c 100755 --- a/youtube-download-channel.sh +++ b/youtube-download-channel.sh @@ -19,6 +19,7 @@ folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) #and place it next to your script. cookies="${folder}/yt-cookies.txt" subfolder="${folder}/subscriptions" +temporary="/tmp/subscriptions/${channel}" archive="${subfolder}/${channel}.txt" sortcsv="${subfolder}/${channel}-sort.csv" csv="${subfolder}/${channel}.csv" @@ -34,15 +35,21 @@ fi if [[ ! -d "${subfolder}" ]]; then mkdir -v "${subfolder}" fi -cd "${subfolder}" || exit +if [[ ! -d "/tmp/subscriptions" ]]; then + mkdir -v "/tmp/subscriptions" +fi +if [[ ! -d "${temporary}" ]]; then + mkdir -v "${temporary}" +fi +cd "${temporary}" || exit if [[ ! -f "${archive}" ]]; then touch "${archive}" fi -if [[ -f "${channel}.tar.zst" ]]; then +if [[ -f "${subfolder}/${channel}.tar.zst" ]]; then if [[ "${channel}" = "subscriptions" ]]; then - find . -iname "*.tar.zst" | while read -r c; do tar -xvp -I zstd -f "${c}"; done + find "${subfolder}" -iname "*.tar.zst" | while read -r c; do tar -xvp -I zstd -f "${c}"; done else - tar -xvp -I zstd -f "${channel}.tar.zst" + tar -xvp -I zstd -f "${subfolder}/${channel}.tar.zst" fi fi url="https://www.youtube.com/@${channel}" @@ -50,13 +57,6 @@ if [[ "${channel}" = "subscriptions" ]]; then url="https://www.youtube.com/feed/subscriptions" fi if [[ -f "${cookies}" && "${channel}" = "subscriptions" ]]; then - "${python}" "${ytdl}" "${url}" \ - --extractor-args "youtubetab:approximate_date" \ - --skip-download --download-archive "${archive}" \ - --dateafter "${breaktime}" \ - --break-on-reject --lazy-playlist --write-info-json \ - --sleep-requests "${sleeptime}" -else #If available, you can use the cookies from your browser directly. Substitute # --cookies "${cookies}" #for the below, substituting for your browser of choice: @@ -75,42 +75,50 @@ else --dateafter "${breaktime}" \ --break-on-reject --lazy-playlist --write-info-json \ --sleep-requests "${sleeptime}" +else + "${python}" "${ytdl}" "${url}" \ + --extractor-args "youtubetab:approximate_date" \ + --skip-download --download-archive "${archive}" \ + --dateafter "${breaktime}" \ + --break-on-reject --lazy-playlist --write-info-json \ + --sleep-requests "${sleeptime}" fi if [[ -f "${csv}" ]]; then rm -rf "${csv}" fi -if [[ ! -f "${sortcsv}" ]]; then - touch "${sortcsv}" +touch "${csv}" +if [[ -f "${sortcsv}" ]]; then + rm -rf "${sortcsv}" fi +touch "${sortcsv}" breaktime_timestamp=$(date -d"${breaktime}" +"%s") -find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do +count=0 +total=$(find "${temporary}" -type f -iname "*.info.json" | wc -l) +find "${temporary}" -type f -iname "*.info.json" | while read -r x; do + count=$((count + 1)) ( - x="${xp##./}" - if [[ -f "${subfolder}/${x}" && "${channel}" != "subscriptions" && $(jq -rc ".uploader_id" "${subfolder}/${x}") != "@${channel}" ]]; then - echo "Video ${x} not uploaded from ${channel}, removing..." && rm "${subfolder}/${x}" + if [[ -f "${x}" && "${channel}" != "subscriptions" && $(jq -rc ".uploader_id" "${x}") != "@${channel}" ]]; then + echo "Video ${x} not uploaded from ${channel}, removing..." && rm "${x}" fi - if [[ -f "${subfolder}/${x}" && "${breaktime}" =~ ^[0-9]+$ ]]; then - file_timestamp=$(jq -rc '.timestamp' "${subfolder}/${x}") + if [[ -f "${x}" && "${breaktime}" =~ ^[0-9]+$ ]]; then + file_timestamp=$(jq -rc '.timestamp' "${x}") if [[ "${breaktime_timestamp}" -ge "${file_timestamp}" ]]; then - echo "Video ${x} uploaded before ${breaktime}, removing..." && rm "${subfolder}/${x}" + echo "Video ${x} uploaded before ${breaktime}, removing..." && rm "${x}" fi fi - if [[ -f "${subfolder}/${x}" ]]; then - file_timestamp=$(jq -rc '.timestamp' "${subfolder}/${x}") - touch "${subfolder}/${x}" -d "@${file_timestamp}" - fi - if [[ -f "${subfolder}/${x}" ]]; then - echo "youtube $(jq -cr '.id' "${subfolder}/${x}")" | tee -a "${archive}" & + if [[ -f "${x}" ]]; then + echo "youtube $(jq -cr '.id' "${x}")" >>"${archive}" if [[ ${enablecsv} = "1" ]]; then - jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do - echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" | tee -a "${csv}" + jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${x}" | while read -r i; do + echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" >>"${csv}" done fi if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then - jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do - echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" | tee -a "${sortcsv}" + jq -c '[.upload_date, .timestamp]' "${x}" | while read -r i; do + echo "${i},${x##*/}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" >>"${sortcsv}" done fi + echo "${count}/${total} ${x}" fi ) & if [[ $(jobs -r -p | wc -l) -ge $(getconf _NPROCESSORS_ONLN) ]]; then @@ -127,22 +135,15 @@ if [[ ${enabledb} = "1" ]]; then rm "/tmp/${channel}.db" fi echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos from ${channel} to watch later\",\"videos\":[" >"/tmp/${channel}.db" -fi -if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then + count=0 + total=$(wc -l <"/tmp/${channel}-sort-ordered.csv") while read -r line; do + count=$((count + 1)) file=$(echo "${line}" | cut -d ',' -f3-) - #if [[ "${breaktime}" =~ ^[0-9]+$ ]]; then - # uploaddate=$(echo "${line}" | cut -d ',' -f1 | sed -e "s/\"//g") - # if [[ "${uploaddate}" -lt "${breaktime}" ]]; then - # echo "Video ${file} uploaded on ${uploaddate}, removing..." - # rm "${file}" - # fi - #fi - if [[ ${enabledb} = "1" ]]; then - if [[ -f "${file}" ]]; then - jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ), \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db" - echo "," >>"/tmp/${channel}.db" - fi + if [[ -f "${file}" ]]; then + jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ), \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${temporary}/${file}" >>"/tmp/${channel}.db" + echo "," >>"/tmp/${channel}.db" + echo "${count}/${total} ${file}" fi done <"/tmp/${channel}-sort-ordered.csv" fi @@ -163,4 +164,5 @@ if [[ ${enablecsv} = "1" ]]; then fi sort "${archive}" | uniq >"/tmp/${channel}.txt" mv "/tmp/${channel}.txt" "${archive}" -tar -cvp -I zstd -f "${channel}.tar.zst" -- *.info.json && rm -- *.info.json +cd "${temporary}" || exit +tar -cvp -I "zstd -T0" -f "${subfolder}/${channel}.tar.zst" -- *.info.json && rm -- *.info.json && rm -rf "${temporary}"