Obsidian-AI¶
Summarize Obsidian page.
Get Python Source.
Script is written in literate programming.
Name |
URL |
---|---|
OpenAI API Examples |
|
How to count tokens with tiktoken |
https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken |
import streamlit as st
import yaml
import json
import os
import tiktoken
from openai import OpenAI
import pyperclip
from typing import List
import time
from pathlib import Path
from typing import List
from collections import namedtuple
Print banner.
st.set_page_config(
page_title="O-AI"
)
@st.cache_data
def print_banner():
print("""
dBBBBP dBBBBBb dBP
dB'.BP BB
dB'.BP dBP BB dBP
dB'.BP dBBBBBP dBP BB dBP
dBBBBP dBBBBBBB dBP
""")
return 1
print_banner()
st.logo("https://ea-books.netlify.app/lit/ai_obsidian.svg")
Prompts¶
prompt_summary = """You will be provided with statements in markdown,
and your task is to summarize the content you are provided."""
prompt_summary_v2 = """You will be provided with statements in markdown,
and your task is to summarize the key topics and entities you are provided."""
prompt_questions = """
You will be provided with context in markdown,
and your task is to generate 3 questions this context can provide
specific answers to which are unlikely to be found elsewhere.
Higher-level summaries of surrounding context may be provided
as well. Try using these summaries to generate better questions
that this context can answer.
"""
prompt_questions_v2 = """
Given the provided context, generate 3 highly specific questions that:
- Can be answered precisely using only this context.
- Are unlikely to have answers easily found elsewhere.
- Benefit from any provided higher-level summaries of surrounding context.
Prioritize specificity and uniqueness in each question.
"""
prompt_improve = """You will be provided with statements in markdown,
and your task is to improve the content you are provided.
"""
prompt = prompt_summary
Select LLM¶
Name |
URL |
---|---|
OpenAI Models |
|
Gemini Models |
llm_prices = {
"gemini-2.5-flash-preview-05-20": 0.0,
"gemma-3-27b-it": 0.0,
"gemini-2.0-flash": 0.0,
"gpt-4.1-mini": 0.4,
"gpt-4.1-nano": 0.1,
"gpt-4.1": 2.0,
"gpt-4o-mini": 0.15,
"gpt-4o": 2.5,
"o3-mini": 1.10,
"o3": 2.0,
"o3-pro": 20.0,
}
def get_llm_properties(llm_model):
if llm_model.startswith("gemini"):
return {"google": True, "temperature": True, "xml": False}
elif llm_model.startswith("gemma"):
return {"google": True, "temperature": True, "xml": True}
elif llm_model.startswith("gpt"):
return {"google": False, "temperature": True, "xml": False}
else: #o3
return {"google": False, "temperature": False, "xml": False}
Persisted List¶
Date |
Comment |
---|---|
2025-06-13 |
New elements come first |
Copied from: explain_java.py |
class PersistedList:
"""
A tiny helper that remembers a list of strings on disk.
"""
def __init__(self, filename: str) -> None:
self.filename = Path(filename)
self.names: List[str] = self._read_from_file()
# ──────────────────────────────────────────────────────────────
# Private helpers
# ──────────────────────────────────────────────────────────────
def _read_from_file(self) -> List[str]:
"""
Return the list stored on disk (empty if the file is missing).
"""
if self.filename.exists():
with self.filename.open("r", encoding="utf-8") as fh:
return [line.strip() for line in fh if line.strip()]
return []
def _write_to_file(self) -> None:
"""
Persist the current list to disk (one item per line).
"""
self.filename.parent.mkdir(parents=True, exist_ok=True)
with self.filename.open("w", encoding="utf-8") as fh:
fh.write("\n".join(self.names))
@staticmethod
def _remove_strings(source: List[str], to_remove: List[str]) -> List[str]:
"""
Return a copy of *source* without any element that occurs in *to_remove*.
"""
removal_set = set(to_remove)
return [s for s in source if s not in removal_set]
# ──────────────────────────────────────────────────────────────
# Public API
# ──────────────────────────────────────────────────────────────
def sort_by_pattern(self, all_names: List[str]) -> List[str]:
"""
Sort *all_names* so that previously‑stored names keep their old
ordering, and every new name is appended alphabetically.
The internal list is updated and re‑written to disk.
"""
priority = {name: idx for idx, name in enumerate(self.names)}
sorted_names = sorted(
all_names,
key=lambda n: (1, priority[n]) if n in priority else (0, n)
)
self.names = sorted_names
self._write_to_file()
return sorted_names
def select(self, selected_name: str) -> None:
"""
Move *selected_name* to the top of the list (inserting it if it
wasn’t present) and persist the change.
"""
self.names = self._remove_strings(self.names, [selected_name])
self.names.insert(0, selected_name)
self._write_to_file()
# ──────────────────────────────────────────────────────────────
# Convenience
# ──────────────────────────────────────────────────────────────
def __iter__(self):
return iter(self.names)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.filename!s}, {self.names})"
Select LLM. Remember which LLM was used last time.
llm_models = list(llm_prices.keys())
llm_models_persisted = PersistedList(".o-ai")
llm_models = llm_models_persisted.sort_by_pattern(llm_models)
llm_temperatures = [0, 0.1, 0.7, 1]
llm_model = st.sidebar.selectbox(
"LLM Model",
llm_models,
index = 0
)
llm_temperature = st.sidebar.select_slider(
"LLM Temperature",
options = llm_temperatures,
value = 0.1
)
Select Obsidian folder from recent vaults.
def reset_llm_result():
if "llm_result" in st.session_state:
del st.session_state["llm_result"]
if "note_name" in st.session_state:
del st.session_state["note_name"]
home_folder = os.path.expanduser('~')
obsidian_json_path = f"{home_folder}/Library/Application Support/obsidian/obsidian.json"
with open(obsidian_json_path, "r") as json_file:
obsidian_json = json.load(json_file)
obsidian_vaults = obsidian_json.get('vaults')
# Extract the values from the dictionary and sort them based on the 'ts' key
sorted_vaults = sorted(obsidian_vaults.values(), key=lambda x: x['ts'], reverse=True)
# Extract the 'path' from each sorted entry
obsidian_folders = [vault['path'] for vault in sorted_vaults]
note_home = st.selectbox(
"Obsidian folder",
obsidian_folders,
on_change=reset_llm_result
)
Load LLM prompts.
prompts_file = "openai_helper.yml"
with open(prompts_file, 'r') as file:
prompts = yaml.safe_load(file)
def get_prompt(name):
for entry in prompts:
if entry['name'] == name:
return entry.get('note')
return None
Get num_files
newest files from the provided directory
.
def get_newest_files(directory, num_files):
# Check if the directory exists
if not os.path.isdir(directory):
raise ValueError(f"The directory {directory} does not exist.")
# Get a list of files in the directory with their full paths and modification times
files_with_paths = []
for file_name in os.listdir(directory):
file_path = os.path.join(directory, file_name)
if os.path.isfile(file_path):
files_with_paths.append((file_path, os.path.getmtime(file_path)))
# Sort files by modification time in descending order (newest first)
sorted_files = sorted(files_with_paths, key=lambda x: x[1], reverse=True)
# Extract the num_files newest file names
newest_files = [os.path.basename(file_with_path[0]) for file_with_path in sorted_files[:num_files]]
return newest_files
Select note_name
from 5 newest notes.
newest_files = get_newest_files(note_home, 5)
note_name = st.selectbox(
"Note",
newest_files,
on_change=reset_llm_result
)
Get the number of tokens.
file_path = os.path.join(note_home, note_name)
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
Tokens & Price¶
Certain models are not compatible with tiktoken 0.7.0
,
so we have added a separate configuration for them.
def count_tokens():
llm_model_tiktoken = "gpt-4o-mini"
encoding = tiktoken.encoding_for_model(llm_model_tiktoken)
tokens = encoding.encode(text)
cents = round(len(tokens) * llm_prices[llm_model]/10000, 5)
st.sidebar.write(f'''
| Characters | Tokens | Cents |
|---|---|---|
| {len(text)} | {len(tokens)} | {cents} |
''')
#if llm_model.startswith("gpt-") or llm_model.startswith("o-"):
count_tokens()
OpenAI and Gemini clients
client = OpenAI()
g_key = os.getenv("GEMINI_API_KEY")
g_client = OpenAI(
api_key=g_key,
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
Generic LLM call.
def call_llm():
start_time = time.time()
st.write('')
st.info(prompt, icon="🤔")
# Remember LLM to push it to the top of selectbox
llm_models_persisted.select(llm_model)
# Call LLM
props = get_llm_properties(llm_model)
llm_client = g_client if props["google"] else client
if props["xml"]:
messages = [
{"role": "user", "content": f"<prompt>{prompt}</prompt>\n<query>{text}</query>"},
]
else:
messages = [
{"role": "developer", "content": prompt},
{"role": "user", "content": text},
]
if props["temperature"]:
response = llm_client.chat.completions.create(
model=llm_model,
messages=messages,
temperature=llm_temperature,
)
else:
response = llm_client.chat.completions.create(
model=llm_model,
messages=messages,
)
choice = response.choices[0]
# Save result in session
st.session_state.llm_result = choice.message.content
st.session_state.note_name = note_name
# Save result to clipboard
pyperclip.copy(st.session_state.llm_result)
st.write(f'Copied to clipboard')
end_time = time.time()
st.session_state.execution_time = end_time - start_time
st.rerun()
Print result
if "llm_result" in st.session_state:
st.write('---')
st.write(st.session_state.llm_result)
st.write('---')
if "execution_time" in st.session_state:
st.sidebar.write(f"Execution time: `{round(st.session_state.execution_time, 1)}` sec")
Sidebar buttons
st.write('')
if st.button(':bulb: Summarize', type='primary', use_container_width=True):
prompt = prompt_summary
call_llm()
if st.sidebar.button(':question: Ask questions', use_container_width=True):
prompt = prompt_questions
call_llm()
if st.sidebar.button(':exclamation: Improve', use_container_width=True):
prompt = prompt_improve
call_llm()
if "llm_result" in st.session_state and st.sidebar.button(':clipboard: Copy to clipboard', use_container_width=True):
pyperclip.copy(st.session_state.llm_result)
st.sidebar.write('---')
if st.sidebar.button(f' `Summarize` {" "*8} :test_tube: `v.2`'):
prompt = prompt_summary_v2
call_llm()
if st.sidebar.button(f'`Ask questions` :test_tube: `v.2`'):
prompt = prompt_questions_v2
call_llm()