Book Chat (LC)¶
Use Calibre to convert EPUB files into HTML format. This script will then analyze the HTML content using RAG (Retrieval-Augmented Generation) with LangChain.
import streamlit as st
import os
import pyperclip
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_community.vectorstores import FAISS
Prints a stylized banner to the console when the application starts.
@st.cache_data
def print_banner():
print("""
___. __ .__ __
\\_ |__ ____ ____ | | __ ____ | |__ _____ _/ |_
| __ \\ / _ \\ / _ \\| |/ / ______ _/ ___\\| | \\\\__ \\\\ __\\
| \\_\\ ( <_> | <_> ) < /_____/ \\ \\___| Y \\/ __ \\| |
|___ /\\____/ \\____/|__|_ \\ \\___ >___| (____ /__|
\\/ \\/ \\/ \\/ \\/
""")
return 1
print_banner()
st.logo("https://ea-books.netlify.app/lit/book_lc.svg")
Get GEMINI_API_KEY
g_key = os.getenv("GEMINI_API_KEY")
Select Embeddings
embed_model_names = [
"gemini-embedding-exp-03-07", # March 2025
"text-embedding-004", # April 2024
"embedding-001", # December 2023
]
embed_model_name = st.sidebar.selectbox("Embedding", embed_model_names)
embedding = GoogleGenerativeAIEmbeddings(model=f"models/{embed_model_name}", google_api_key=g_key)
Folder to save index
index_folder = f"vectors/book-lc-{embed_model_name}"
Input HTML file with the book’s contents and a log of the questions asked.
book_html = "html/index.html"
history_file = "vectors/history.txt"
Print current folder name as a title
current_folder = os.path.basename(os.getcwd())
st.write(f"### {current_folder}")
Select LLM
llm_models = [
"gemini-2.5-pro-exp-03-25",
"gemini-2.0-flash",
"gemma-3-27b-it",
]
llm_model = st.sidebar.selectbox("LLM", llm_models)
llm = ChatGoogleGenerativeAI(model=llm_model, google_api_key=g_key)
Load history
history = ""
def update_history(new_text):
with open(history_file, 'w', encoding="utf-8") as file:
file.write(new_text + history)
if os.path.exists(history_file):
with open(history_file, "r", encoding="utf-8") as fin:
history = fin.read()
history = st.sidebar.text_area(f"History", value=history.strip(), height=200)
if st.sidebar.button(":recycle: Update history", use_container_width=True):
update_history("")
st.toast(f'History updated')
Create or load index
def create_index(input_file, persist_dir):
loader = UnstructuredHTMLLoader(input_file)
documents = loader.load()
vectorstore = FAISS.from_documents(documents, embedding)
vectorstore.save_local(persist_dir)
st.session_state.vstore = vectorstore
def load_index(persist_dir):
try:
vectorstore = FAISS.load_local(persist_dir, embedding, allow_dangerous_deserialization=True)
st.session_state.vstore = vectorstore
except Exception as e:
st.error(f"Error loading index: {e}")
Handle indexing logic
if os.path.exists(index_folder):
if "vstore" not in st.session_state:
load_index(index_folder)
else:
if st.sidebar.button(':construction: Create Index', type='primary', use_container_width=True):
create_index(book_html, index_folder)
st.rerun()
else:
st.stop()
Setup QA chain
if "qa" not in st.session_state:
retriever = st.session_state.vstore.as_retriever()
st.session_state.qa = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
chain_type="stuff"
)
Ask a question
question = st.text_area(f"Question", height=200)
if st.button(":question: Ask", use_container_width=True):
update_history(question + "\n\n---\n")
st.session_state.response = st.session_state.qa.invoke(question)
st.rerun()
if "response" in st.session_state:
st.write(st.session_state.response["result"])
if st.sidebar.button(":clipboard: Copy to clipboard", use_container_width=True):
pyperclip.copy(st.session_state.response["result"])
st.toast(f'Copied to clipboard')