Obsidian-AI =========== Summarize Obsidian_ page. - Get `Python Source`_. Script is written in `literate programming`_. - See `PyLit Tutorial`_ - See `reStructuredText Primer`_ .. _Obsidian: https://obsidian.md/ .. _Python Source: ../../ai_obsidian.py .. _literate programming: https://en.wikipedia.org/wiki/Literate_programming .. _reStructuredText Primer: https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html .. _PyLit Tutorial: https://slott56.github.io/PyLit-3/_build/html/tutorial/index.html .. csv-table:: Useful Links :header: "Name", "URL" :widths: 10 30 "OpenAI API Examples", https://platform.openai.com/examples "How to count tokens with tiktoken", https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken :: import streamlit as st import yaml import json import os import tiktoken from openai import OpenAI import pyperclip from typing import List import time from pathlib import Path from typing import List from collections import namedtuple Print banner. :: st.set_page_config( page_title="O-AI" ) @st.cache_data def print_banner(): print(""" dBBBBP dBBBBBb dBP dB'.BP BB dB'.BP dBP BB dBP dB'.BP dBBBBBP dBP BB dBP dBBBBP dBBBBBBB dBP """) return 1 print_banner() st.logo("https://ea-books.netlify.app/lit/ai_obsidian.svg") Prompts ------- :: prompt_summary = """You will be provided with statements in markdown, and your task is to summarize the content you are provided.""" prompt_summary_v2 = """You will be provided with statements in markdown, and your task is to summarize the key topics and entities you are provided.""" prompt_questions = """ You will be provided with context in markdown, and your task is to generate 3 questions this context can provide specific answers to which are unlikely to be found elsewhere. Higher-level summaries of surrounding context may be provided as well. Try using these summaries to generate better questions that this context can answer. """ prompt_questions_v2 = """ Given the provided context, generate 3 highly specific questions that: - Can be answered precisely using only this context. - Are unlikely to have answers easily found elsewhere. - Benefit from any provided higher-level summaries of surrounding context. Prioritize specificity and uniqueness in each question. """ prompt_improve = """You will be provided with statements in markdown, and your task is to improve the content you are provided. """ prompt = prompt_summary Select LLM ---------- .. csv-table:: Useful Links :header: "Name", "URL" :widths: 10 30 "OpenAI Models", https://platform.openai.com/docs/models "Gemini Models", https://ai.google.dev/gemini-api/docs/models :: llm_prices = { "gemini-2.5-flash-preview-05-20": 0.0, "gemma-3-27b-it": 0.0, "gemini-2.0-flash": 0.0, "gpt-4.1-mini": 0.4, "gpt-4.1-nano": 0.1, "gpt-4.1": 2.0, "gpt-4o-mini": 0.15, "gpt-4o": 2.5, "o3-mini": 1.10, "o3": 2.0, "o3-pro": 20.0, } def get_llm_properties(llm_model): if llm_model.startswith("gemini"): return {"google": True, "temperature": True, "xml": False} elif llm_model.startswith("gemma"): return {"google": True, "temperature": True, "xml": True} elif llm_model.startswith("gpt"): return {"google": False, "temperature": True, "xml": False} else: #o3 return {"google": False, "temperature": False, "xml": False} Persisted List -------------- .. csv-table:: History :header: "Date", "Comment" :widths: 10 30 "2025-06-13", "New elements come first" "", "Copied from: `explain_java.py`_" .. _explain_java.py: explain_java.py.html#persisted-list :: class PersistedList: """ A tiny helper that remembers a list of strings on disk. """ def __init__(self, filename: str) -> None: self.filename = Path(filename) self.names: List[str] = self._read_from_file() # ────────────────────────────────────────────────────────────── # Private helpers # ────────────────────────────────────────────────────────────── def _read_from_file(self) -> List[str]: """ Return the list stored on disk (empty if the file is missing). """ if self.filename.exists(): with self.filename.open("r", encoding="utf-8") as fh: return [line.strip() for line in fh if line.strip()] return [] def _write_to_file(self) -> None: """ Persist the current list to disk (one item per line). """ self.filename.parent.mkdir(parents=True, exist_ok=True) with self.filename.open("w", encoding="utf-8") as fh: fh.write("\n".join(self.names)) @staticmethod def _remove_strings(source: List[str], to_remove: List[str]) -> List[str]: """ Return a copy of *source* without any element that occurs in *to_remove*. """ removal_set = set(to_remove) return [s for s in source if s not in removal_set] # ────────────────────────────────────────────────────────────── # Public API # ────────────────────────────────────────────────────────────── def sort_by_pattern(self, all_names: List[str]) -> List[str]: """ Sort *all_names* so that previously‑stored names keep their old ordering, and every new name is appended alphabetically. The internal list is updated and re‑written to disk. """ priority = {name: idx for idx, name in enumerate(self.names)} sorted_names = sorted( all_names, key=lambda n: (1, priority[n]) if n in priority else (0, n) ) self.names = sorted_names self._write_to_file() return sorted_names def select(self, selected_name: str) -> None: """ Move *selected_name* to the top of the list (inserting it if it wasn’t present) and persist the change. """ self.names = self._remove_strings(self.names, [selected_name]) self.names.insert(0, selected_name) self._write_to_file() # ────────────────────────────────────────────────────────────── # Convenience # ────────────────────────────────────────────────────────────── def __iter__(self): return iter(self.names) def __repr__(self) -> str: return f"{self.__class__.__name__}({self.filename!s}, {self.names})" Select LLM. Remember which LLM was used last time. :: llm_models = list(llm_prices.keys()) llm_models_persisted = PersistedList(".o-ai") llm_models = llm_models_persisted.sort_by_pattern(llm_models) llm_temperatures = [0, 0.1, 0.7, 1] llm_model = st.sidebar.selectbox( "LLM Model", llm_models, index = 0 ) llm_temperature = st.sidebar.select_slider( "LLM Temperature", options = llm_temperatures, value = 0.1 ) Select Obsidian folder from recent vaults. :: def reset_llm_result(): if "llm_result" in st.session_state: del st.session_state["llm_result"] if "note_name" in st.session_state: del st.session_state["note_name"] home_folder = os.path.expanduser('~') obsidian_json_path = f"{home_folder}/Library/Application Support/obsidian/obsidian.json" with open(obsidian_json_path, "r") as json_file: obsidian_json = json.load(json_file) obsidian_vaults = obsidian_json.get('vaults') # Extract the values from the dictionary and sort them based on the 'ts' key sorted_vaults = sorted(obsidian_vaults.values(), key=lambda x: x['ts'], reverse=True) # Extract the 'path' from each sorted entry obsidian_folders = [vault['path'] for vault in sorted_vaults] note_home = st.selectbox( "Obsidian folder", obsidian_folders, on_change=reset_llm_result ) Load LLM prompts. :: prompts_file = "openai_helper.yml" with open(prompts_file, 'r') as file: prompts = yaml.safe_load(file) def get_prompt(name): for entry in prompts: if entry['name'] == name: return entry.get('note') return None Get ``num_files`` newest files from the provided ``directory``. :: def get_newest_files(directory, num_files): # Check if the directory exists if not os.path.isdir(directory): raise ValueError(f"The directory {directory} does not exist.") # Get a list of files in the directory with their full paths and modification times files_with_paths = [] for file_name in os.listdir(directory): file_path = os.path.join(directory, file_name) if os.path.isfile(file_path): files_with_paths.append((file_path, os.path.getmtime(file_path))) # Sort files by modification time in descending order (newest first) sorted_files = sorted(files_with_paths, key=lambda x: x[1], reverse=True) # Extract the num_files newest file names newest_files = [os.path.basename(file_with_path[0]) for file_with_path in sorted_files[:num_files]] return newest_files Select ``note_name`` from 5 newest notes. :: newest_files = get_newest_files(note_home, 5) note_name = st.selectbox( "Note", newest_files, on_change=reset_llm_result ) Get the number of tokens. :: file_path = os.path.join(note_home, note_name) with open(file_path, 'r', encoding='utf-8') as file: text = file.read() Tokens & Price -------------- Certain models are not compatible with ``tiktoken 0.7.0``, so we have added a separate configuration for them. :: def count_tokens(): llm_model_tiktoken = "gpt-4o-mini" encoding = tiktoken.encoding_for_model(llm_model_tiktoken) tokens = encoding.encode(text) cents = round(len(tokens) * llm_prices[llm_model]/10000, 5) st.sidebar.write(f''' | Characters | Tokens | Cents | |---|---|---| | {len(text)} | {len(tokens)} | {cents} | ''') #if llm_model.startswith("gpt-") or llm_model.startswith("o-"): count_tokens() OpenAI and Gemini clients :: client = OpenAI() g_key = os.getenv("GEMINI_API_KEY") g_client = OpenAI( api_key=g_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/" ) Generic LLM call. :: def call_llm(): start_time = time.time() st.write('') st.info(prompt, icon="🤔") # Remember LLM to push it to the top of selectbox llm_models_persisted.select(llm_model) # Call LLM props = get_llm_properties(llm_model) llm_client = g_client if props["google"] else client if props["xml"]: messages = [ {"role": "user", "content": f"{prompt}\n{text}"}, ] else: messages = [ {"role": "developer", "content": prompt}, {"role": "user", "content": text}, ] if props["temperature"]: response = llm_client.chat.completions.create( model=llm_model, messages=messages, temperature=llm_temperature, ) else: response = llm_client.chat.completions.create( model=llm_model, messages=messages, ) choice = response.choices[0] # Save result in session st.session_state.llm_result = choice.message.content st.session_state.note_name = note_name # Save result to clipboard pyperclip.copy(st.session_state.llm_result) st.write(f'Copied to clipboard') end_time = time.time() st.session_state.execution_time = end_time - start_time st.rerun() Print result :: if "llm_result" in st.session_state: st.write('---') st.write(st.session_state.llm_result) st.write('---') if "execution_time" in st.session_state: st.sidebar.write(f"Execution time: `{round(st.session_state.execution_time, 1)}` sec") Sidebar buttons :: st.write('') if st.button(':bulb:   Summarize', type='primary', use_container_width=True): prompt = prompt_summary call_llm() if st.sidebar.button(':question:   Ask questions', use_container_width=True): prompt = prompt_questions call_llm() if st.sidebar.button(':exclamation:   Improve', use_container_width=True): prompt = prompt_improve call_llm() if "llm_result" in st.session_state and st.sidebar.button(':clipboard:   Copy to clipboard', use_container_width=True): pyperclip.copy(st.session_state.llm_result) st.sidebar.write('---') if st.sidebar.button(f' `Summarize` {" "*8} :test_tube: `v.2`'): prompt = prompt_summary_v2 call_llm() if st.sidebar.button(f'`Ask questions` :test_tube: `v.2`'): prompt = prompt_questions_v2 call_llm()