feat: Add the ability to split CSV and DB generation
This commit is contained in:
parent
4b3c32e37b
commit
424f040158
2 changed files with 65 additions and 30 deletions
3
copy.sh
3
copy.sh
|
@ -1,4 +1,7 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
|
||||||
|
folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
|
cd "${folder}" || exit
|
||||||
for i in ./*.sh; do
|
for i in ./*.sh; do
|
||||||
shfmt -w "${i}"
|
shfmt -w "${i}"
|
||||||
shellcheck -o all -e SC2312 -f diff "${i}" | patch -p1
|
shellcheck -o all -e SC2312 -f diff "${i}" | patch -p1
|
||||||
|
|
|
@ -6,10 +6,13 @@ channel=${1:-"subscriptions"}
|
||||||
breaktime=${2:-"today-1month"}
|
breaktime=${2:-"today-1month"}
|
||||||
#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low.
|
#3rd parameter: Seconds between data requests. Decrease to make downloads faster, but your account may be temporarily blocked if you use a number too low.
|
||||||
sleeptime=${3:-"1.0"}
|
sleeptime=${3:-"1.0"}
|
||||||
|
#4th parameter: Whether to enable exporting to FreeTube playlist database (1=on by default, 0=off)
|
||||||
|
enabledb=${4:-"1"}
|
||||||
|
#5th parameter: Whether to enable exporting to a CSV file (1=on by default, 0=off)
|
||||||
|
enablecsv=${5:-"1"}
|
||||||
#Internal variables:
|
#Internal variables:
|
||||||
#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
|
#Via https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
|
||||||
folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
folder=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
|
||||||
echo "${folder}"
|
|
||||||
#Required to download your own subscriptions.
|
#Required to download your own subscriptions.
|
||||||
#Obtain this file through the procedure listed at
|
#Obtain this file through the procedure listed at
|
||||||
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
|
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
|
||||||
|
@ -32,6 +35,9 @@ if [[ ! -d "${subfolder}" ]]; then
|
||||||
mkdir -v "${subfolder}"
|
mkdir -v "${subfolder}"
|
||||||
fi
|
fi
|
||||||
cd "${subfolder}" || exit
|
cd "${subfolder}" || exit
|
||||||
|
if [[ ! -f "${archive}" ]]; then
|
||||||
|
touch "${archive}"
|
||||||
|
fi
|
||||||
if [[ -f "${channel}.tar.zst" ]]; then
|
if [[ -f "${channel}.tar.zst" ]]; then
|
||||||
tar -xvp -I zstd -f "${channel}.tar.zst"
|
tar -xvp -I zstd -f "${channel}.tar.zst"
|
||||||
fi
|
fi
|
||||||
|
@ -41,7 +47,7 @@ url="https://www.youtube.com/@${channel}"
|
||||||
if [[ "${channel}" = "subscriptions" ]]; then
|
if [[ "${channel}" = "subscriptions" ]]; then
|
||||||
url="https://www.youtube.com/feed/subscriptions"
|
url="https://www.youtube.com/feed/subscriptions"
|
||||||
fi
|
fi
|
||||||
if [[ -z "${cookies}" && ${channel} = "subscriptions" ]]; then
|
if [[ -z "${cookies}" && "${channel}" = "subscriptions" ]]; then
|
||||||
"${python}" "${ytdl}" "${url}" \
|
"${python}" "${ytdl}" "${url}" \
|
||||||
--skip-download --download-archive "${archive}" \
|
--skip-download --download-archive "${archive}" \
|
||||||
--dateafter "${breaktime}" \
|
--dateafter "${breaktime}" \
|
||||||
|
@ -58,42 +64,68 @@ else
|
||||||
--sleep-requests "${sleeptime}"
|
--sleep-requests "${sleeptime}"
|
||||||
fi
|
fi
|
||||||
rm -rf "${csv}"
|
rm -rf "${csv}"
|
||||||
#ls -t | grep -e ".info.json" | while read -r x; do
|
if [[ ! -f "${sortcsv}" ]]; then
|
||||||
|
touch "${sortcsv}"
|
||||||
|
fi
|
||||||
find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do
|
find . -type f -iname "*.info.json" -exec ls -t {} + | while read -r xp; do
|
||||||
x="${xp##./}"
|
x="${xp##./}"
|
||||||
#echo youtube $(jq -c '.id' "${x}" | sed -e "s/\"//g") | tee -a "${archive}" &
|
|
||||||
echo "youtube $(jq -cr '.id' "${x}")" | tee -a "${archive}" &
|
echo "youtube $(jq -cr '.id' "${x}")" | tee -a "${archive}" &
|
||||||
jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do
|
if [[ ${enablecsv} = "1" ]]; then
|
||||||
echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" | tee -a "${csv}" &
|
jq -c '[.upload_date, .timestamp, .uploader , .title, .webpage_url]' "${subfolder}/${x}" | while read -r i; do
|
||||||
done
|
echo "${i}" | sed -e "s/^\[//g" -e "s/\]$//g" -e "s/\\\\\"/"/g" | tee -a "${csv}" &
|
||||||
jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do
|
done
|
||||||
echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" | tee -a "${sortcsv}" &
|
fi
|
||||||
done
|
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
|
||||||
|
jq -c '[.upload_date, .timestamp]' "${subfolder}/${x}" | while read -r i; do
|
||||||
|
echo "${i},${x}" | sed -e "s/^\[//g" -e "s/\],/,/g" -e "s/\\\\\"/"/g" | tee -a "${sortcsv}" &
|
||||||
|
done
|
||||||
|
fi
|
||||||
if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then
|
if [[ $(jobs -r -p | wc -l) -ge $(($(getconf _NPROCESSORS_ONLN) * 3 * 2)) ]]; then
|
||||||
wait -n
|
wait -n
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
wait
|
wait
|
||||||
sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv"
|
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
|
||||||
echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db"
|
sort "${sortcsv}" | uniq >"/tmp/${channel}-sort-ordered.csv"
|
||||||
#cat "/tmp/${channel}-sort-ordered.csv" | while read -r line; do
|
fi
|
||||||
while read -r line; do
|
if [[ ${enabledb} = "1" ]]; then
|
||||||
file=$(echo "${line}" | cut -d ',' -f3-)
|
rm "/tmp/${channel}.db"
|
||||||
echo "${file}"
|
echo "{\"playlistName\":\"${channel}\",\"protected\":false,\"description\":\"Videos to watch later\",\"videos\":[" >"/tmp/${channel}.db"
|
||||||
jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \
|
fi
|
||||||
\"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ) , \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \
|
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
|
||||||
\"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db"
|
while read -r line; do
|
||||||
echo "," >>"/tmp/${channel}.db"
|
file=$(echo "${line}" | cut -d ',' -f3-)
|
||||||
done <"/tmp/${channel}-sort-ordered.csv"
|
echo "${file}"
|
||||||
echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db"
|
if [[ "${breaktime}" =~ ^[0-9]+$ ]]; then
|
||||||
rm "${json}"
|
uploaddate=$(echo "${line}" | cut -d ',' -f1 | sed -e "s/\"//g")
|
||||||
grep -v -e ":[ ]*null" "/tmp/${channel}.db" | tr '\n' '\r' | sed -e "s/,\r[,\r]*/,\r/g" | sed -e "s/,\r\]/\]/g" | tr '\r' '\n' | jq -c . >"${json}" && rm "/tmp/${channel}.db"
|
if [[ "${uploaddate}" -lt "${breaktime}" ]]; then
|
||||||
rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}"
|
echo "Video ${file} uploaded on ${uploaddate}, removing..."
|
||||||
sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv"
|
rm "${file}"
|
||||||
echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv"
|
fi
|
||||||
cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv"
|
fi
|
||||||
mv "/tmp/${channel}.csv" "${csv}"
|
if [[ ${enabledb} = "1" ]]; then
|
||||||
rm "/tmp/${channel}-without-header.csv"
|
if [[ -f "${file}" ]]; then
|
||||||
|
jq -c "{\"videoId\": .id, \"title\": .title, \"author\": .uploader, \"authorId\": .channel_id, \"lengthSeconds\": .duration, \"published\": ( .timestamp * 1000 ), \"timeAdded\": $(date +%s)$(date +%N | cut -c-3), \"playlistItemId\": \"$(cat /proc/sys/kernel/random/uuid)\", \"type\": \"video\"}" "${subfolder}/${file}" | tee -a "/tmp/${channel}.db"
|
||||||
|
echo "," >>"/tmp/${channel}.db"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done <"/tmp/${channel}-sort-ordered.csv"
|
||||||
|
fi
|
||||||
|
if [[ ${enabledb} = "1" ]]; then
|
||||||
|
echo "],\"_id\":\"${channel}\",\"createdAt\":$(date +%s),\"lastUpdatedAt\":$(date +%s)}" >>"/tmp/${channel}.db"
|
||||||
|
rm "${json}"
|
||||||
|
grep -v -e ":[ ]*null" "/tmp/${channel}.db" | tr '\n' '\r' | sed -e "s/,\r[,\r]*/,\r/g" | sed -e "s/,\r\]/\]/g" -e "s/\[\r,/\[/g" | tr '\r' '\n' | jq -c . >"${json}" && rm "/tmp/${channel}.db"
|
||||||
|
fi
|
||||||
|
if [[ ${enablecsv} = "1" || ${enabledb} = "1" ]]; then
|
||||||
|
rm "/tmp/${channel}-sort-ordered.csv" "${sortcsv}"
|
||||||
|
fi
|
||||||
|
if [[ ${enablecsv} = "1" ]]; then
|
||||||
|
sort "${csv}" | uniq >"/tmp/${channel}-without-header.csv"
|
||||||
|
echo '"Upload Date", "Timestamp", "Uploader", "Title", "Webpage URL"' >"/tmp/${channel}.csv"
|
||||||
|
cat "/tmp/${channel}-without-header.csv" >>"/tmp/${channel}.csv"
|
||||||
|
mv "/tmp/${channel}.csv" "${csv}"
|
||||||
|
rm "/tmp/${channel}-without-header.csv"
|
||||||
|
fi
|
||||||
sort "${archive}" | uniq >"/tmp/${channel}.txt"
|
sort "${archive}" | uniq >"/tmp/${channel}.txt"
|
||||||
mv "/tmp/${channel}.txt" "${archive}"
|
mv "/tmp/${channel}.txt" "${archive}"
|
||||||
tar -cvp -I zstd -f "${channel}.tar.zst" ./*.info.json && rm ./*.info.json
|
tar -cvp -I zstd -f "${channel}.tar.zst" ./*.info.json && rm ./*.info.json
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue