💯
TIL
  • TIL
  • Different options for doing LLM inference
  • Generate Subtitles locally using whisper
  • Compressing images from the CLI
  • Running parallel Ollama inference
  • Fetching Train availability in IRCTC
  • Changing Git Commit Email Address
  • Clone any website to local using Httrack
  • Adding Drag drop from terminal in Mac OS
  • Minimum Effort
    • YouTube Transcript Formatter
    • Virtual Talking AI companion to help out in tough times
    • Super fast voice based translation system
  • Proud Memories
    • Conducting an LLM Workshop
    • Honours degree from Education Minister
    • GIDS Conference Participant from TW
    • Speaking at MEC.Conf
    • Being a judge at Hack4Tomorrow hackathon
Powered by GitBook
On this page

Was this helpful?

Edit on GitHub
  1. Minimum Effort

YouTube Transcript Formatter

Bash script to download youtube transcript and format them to a prompt you can pas to an LLM

PreviousAdding Drag drop from terminal in Mac OSNextVirtual Talking AI companion to help out in tough times

Last updated 9 months ago

Was this helpful?

Demo here:

#!/bin/bash

# YouTube Transcript Extractor
# This script downloads and extracts the transcript from a YouTube video,
# calculates some statistics, and copies the result to the clipboard.

# Instructions:
# 1. Ensure you have the following dependencies installed:
#    - yt-dlp: https://github.com/yt-dlp/yt-dlp#installation
#    - jq: https://stedolan.github.io/jq/download/
#    - xclip (for Linux) or pbcopy (for macOS, comes pre-installed)
# 2. Make this script executable: chmod +x youtube_transcript_extractor.sh
# 3. Run the script with a YouTube URL: ./youtube_transcript_extractor.sh "https://www.youtube.com/watch?v=VIDEO_ID"

# Requirements:
# - Linux or macOS
# - Bash shell
# - yt-dlp
# - jq
# - xclip (Linux) or pbcopy (macOS)

# Function to copy to clipboard
copy_to_clipboard() {
    if command -v pbcopy > /dev/null; then
        pbcopy
    elif command -v xclip > /dev/null; then
        xclip -selection clipboard
    else
        echo "Unable to copy to clipboard: neither pbcopy nor xclip is available."
        return 1
    fi
}

# Function to format duration
format_duration() {
    local total_seconds=$1
    local hours=$((total_seconds / 3600))
    local minutes=$(( (total_seconds % 3600) / 60 ))
    local seconds=$((total_seconds % 60))
    printf "%02d:%02d:%02d" $hours $minutes $seconds
}

# Check if a URL is provided
if [ $# -eq 0 ]; then
    echo "Please provide a YouTube video URL as an argument."
    echo "Usage: $0 <YouTube_URL>"
    exit 1
fi

# YouTube video URL
URL="$1"

# Download subtitles and metadata
yt-dlp --skip-download --write-subs --write-auto-subs --sub-lang en --sub-format ttml --convert-subs srt --output "transcript.%(ext)s" --write-info-json "$URL"

# Clean up the subtitle file
sed -i.bak -e '/^[0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9][0-9][0-9] --> [0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9][0-9][0-9]$/d' \
    -e '/^[[:digit:]]\{1,3\}$/d' \
    -e 's/<[^>]*>//g' \
    ./transcript.en.srt && rm ./transcript.en.srt.bak

# Create the final output file
sed -e 's/<[^>]*>//g' -e '/^[[:space:]]*$/d' transcript.en.srt > output.txt

# Remove the intermediate .srt file
rm transcript.en.srt

echo "Transcript extraction complete. The result is saved in output.txt"

# Calculate word count and line count
WORD_COUNT=$(wc -w < output.txt)
LINE_COUNT=$(wc -l < output.txt)

# Extract metadata
TITLE=$(jq -r '.title' transcript.info.json)
DURATION=$(jq -r '.duration' transcript.info.json)
UPLOAD_DATE=$(jq -r '.upload_date' transcript.info.json)

# Format duration
DURATION_FORMATTED=$(format_duration $DURATION)

# Prepare metadata string
METADATA="Video Title: $TITLE
Duration: $DURATION_FORMATTED
Upload Date: ${UPLOAD_DATE:0:4}-${UPLOAD_DATE:4:2}-${UPLOAD_DATE:6:2}
Word Count: $WORD_COUNT
Line Count: $LINE_COUNT"

echo "Transcript Statistics and Metadata:"
echo "$METADATA"

# Copy the transcript to clipboard with the specified format and metadata
{
    echo "The following youtube video transcript:"
    echo ""
    cat output.txt
    echo ""
} | copy_to_clipboard

if [ $? -eq 0 ]; then
    echo "Transcript and metadata have been copied to clipboard."
else
    echo "Transcript and metadata were not copied to clipboard due to an error."
fi

# Clean up metadata file
rm transcript.info.json

https://x.com/aldrinjenson/status/1815007108934332486