YouTube Transcript
==================
.. contents::
::
import streamlit as st
import os
from typing import Optional, Dict, Any
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from openai import OpenAI
Prints a stylized banner to the console when the application starts.
::
st.set_page_config(
page_title="You-T"
)
@st.cache_data
def print_banner():
print("""
)
( /( * )
)\\()) ( ` ) /(
((_)\\ ( ))\\ ___ ( )(_))
__ ((_) )\\ /((_)|___|(_(_())
\\ \\ / /((_)(_))( |_ _|
\\ V // _ \\| || | | |
|_| \\___/ \\_,_| |_|
""")
return 1
print_banner()
Get transcript from YouTube URL
::
youtube_url = st.text_input("YouTube URL")
lang = st.radio("Language", ["ru","en","by"], horizontal=True)
transcript_file = "transcript.txt"
def transcript_as_text(url: str, lang: str = 'en') -> str:
"""
Returns one plain‑text string containing the caption lines.
Falls back to auto‑generated captions if no manual track exists.
"""
video_id = url.split("v=")[-1].split("&")[0]
try:
# This tries manual captions first, then auto‑generated.
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
return " ".join(chunk['text'] for chunk in transcript)
except TranscriptsDisabled:
return "[Captions are disabled on this video.]"
except NoTranscriptFound:
return "[No transcript available in the requested language.]"
# Truncate text to max len
def max_len(text, k):
if len(text) <= k:
return text
return text[:k] + '...'
if st.button("Save Transcript", use_container_width=True):
transcript = transcript_as_text(youtube_url, lang)
st.text_area("Transcript", transcript, height=300)
with open(transcript_file, 'w', encoding='utf-8') as file:
file.write(transcript)
st.write(f"Transcript saved: `{transcript_file}`")
Get trunscript summary
::
prompt_summary = """
Тебе будет передана расшифровка видео.
Твоя задача: подготовить ёмкое резюме.
"""
llm_prices = {
"gemini-2.5-flash-preview-05-20": 0.0,
"gemini-2.0-flash": 0.0,
"gemma-3-27b-it": 0.0,
"gpt-4.1-mini": 0.4,
"gpt-4.1-nano": 0.1,
"gpt-4.1": 2.0,
"gpt-4o-mini": 0.15,
"o4-mini": 1.10,
"o3-mini": 1.10,
"gpt-4o": 2.5,
"o1": 15.0,
}
llm_models = list(llm_prices.keys())
MultiModel
----------
::
class MultiModel:
"""
Wrapper for multiple LLM APIs (OpenAI, Gemini, Gemma).
"""
def __init__(self, llm_model: str, llm_temperature = 0.1) -> None:
self.llm_model = llm_model
self.llm_temperature = llm_temperature
vendor = self._get_vendor(llm_model)
if vendor == "google":
self.client = OpenAI(
api_key=os.getenv("GEMINI_API_KEY"),
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
else:
self.client = OpenAI()
@staticmethod
def _get_vendor(llm_model: str) -> str:
"""
Determines the vendor based on the model name.
"""
if llm_model.lower().startswith(("gemini", "gemma")):
return "google"
return "openai"
def _call_gpt(self, prompt: str, text: str):
"""
Calls a GPT-like model with standard message format and temperature.
"""
messages = [
{"role": "system", "content": prompt},
{"role": "user", "content": text},
]
response = self.client.chat.completions.create(
model=self.llm_model,
messages=messages,
temperature=self.llm_temperature,
)
return response.choices[0]
def _call_gemma(self, prompt: str, text: str):
"""
Calls a Gemma model with custom message format and temperature.
"""
messages = [
{"role": "user", "content": f"{prompt}\n{text}"},
]
response = self.client.chat.completions.create(
model=self.llm_model,
messages=messages,
temperature=self.llm_temperature,
)
return response.choices[0]
def _call_o_model(self, prompt: str, text: str):
"""
Calls an 'o'-prefixed model with standard message format, no temperature.
"""
messages = [
{"role": "system", "content": prompt},
{"role": "user", "content": text},
]
response = self.client.chat.completions.create(
model=self.llm_model,
messages=messages,
)
return response.choices[0]
def call_llm(self, prompt: str, text: str):
"""
Calls the appropriate LLM based on the model name.
"""
model = self.llm_model.lower()
if model.startswith(("gemini", "gpt")):
return self._call_gpt(prompt, text)
elif model.startswith("gemma"):
return self._call_gemma(prompt, text)
elif model.startswith("o"):
return self._call_o_model(prompt, text)
else:
raise ValueError(f"Unknown model prefix for: {self.llm_model}")
Select LLM
::
llm_model = st.selectbox("LLM Model", llm_models)
def create_summary():
with open(transcript_file, 'r', encoding='utf-8') as file:
transcript = file.read()
llm = MultiModel(llm_model)
summary = llm.call_llm(prompt_summary, transcript)
return summary.message.content
Summary button
::
if st.button("Summary", use_container_width=True):
st.session_state.summary = create_summary()
st.write(st.session_state.get("summary"))