v1.0

e75bc9be · chenzk · e75bc9be · e75bc9be · e75bc9be · e75bc9be
Commit e75bc9be authored Nov 28, 2024 by chenzk
17 changed files
--- a/local_inference/mlx.py
+++ b/local_inference/mlx.py
+from mlx_lm import load, generate
+
+model, tokenizer = load("mlx-community/SmolLM-1.7B-Instruct-8bit")
+response = generate(model, tokenizer, prompt="hello", verbose=True)
+
+print(response)
\ No newline at end of file
--- a/local_inference/transformers-js.js
+++ b/local_inference/transformers-js.js
+import { pipeline } from "@huggingface/transformers";
+
+// Create a text generation pipeline
+const generator = await pipeline(
+  "text-generation",
+  "HuggingFaceTB/SmolLM2-135M-Instruct",
+  { dtype: "q4f16", revision:'refs/pr/6' },
+);
+
+// Define the list of messages
+const messages = [
+  { role: "system", content: "You are a helpful assistant." },
+  { role: "user", content: "Rewrite the following: hello how r u?" },
+];
+
+// Generate a response
+const output = await generator(messages, { max_new_tokens: 128 });
+console.log(output[0].generated_text.at(-1).content);
+// "Hello, how's it going?"
\ No newline at end of file
--- a/model.properties
+++ b/model.properties
+# 模型编码
+modelCode=1120
+# 模型名称
+modelName=smollm_pytorch
+# 模型描述
+modelDescription=端侧小模型新星molLM2 1.7B击败Qwen 2.5 1.5B和Llama 3.2 1B，可以在不到2GB的VRAM上运行。
+# 应用场景
+appScenario=推理,训练,对话问答,制造,广媒,金融,能源,医疗,家居,教育
+# 框架类型
+frameType=pytorch
--- a/pre-training/README.md
+++ b/pre-training/README.md
+# Pre-training
+We use [nanotron](https://github.com/huggingface/nanotron/) library for training SmolLM and SmolLM2 base models.
+
+The scripts for training SmolLM v1 can be found in the `smollm1` folder. SmolLM2 has a similar architecture and setup but uses an improved data mixture that we curated and significantly longer training periods (11 trillion tokens for the 1.7B, 4 trillion for the 360M and 2 trillion for the 135M). We will upload the SmolLM2 configs soon.
+
+## Setup
+
+Please refer to [nanotron](https://github.com/huggingface/nanotron/) for detailed instructions on setting up your training environment and launching jobs.
+
+After setting up the environment and tokenizing the training datasets with [datatrove](https://github.com/huggingface/datatrove) (instructions available [here](https://github.com/huggingface/nanotron/blob/main/docs/nanoset.md#nanosets)), you can modify the configurations to match your number of nodes and local paths.
+
+Below is an example of launching SmolLM1 135M training on 1 node (you can change the DP value to 8 in the config and adjust the batch size) and run:
+
+```bash
+git clone https://github.com/huggingface/nanotron
+cd nanotron
+# follow installation
+CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=8 run_train.py --config-file smollm1/config_smollm1_135M.yaml
+```
+
+If you are working on a slurm cluster, you can modify the `launch.slurm` and launch the training with:
+
+```bash
+sbatch launch.slurm
+```
+> [!NOTE]
+> Don't forget to create the logs directory before launching the job:
--- a/pre-training/launch.slurm
+++ b/pre-training/launch.slurm
+#!/bin/bash
+#SBATCH --job-name=smollm1-135M
+#SBATCH --nodes=4
+#SBATCH --gres=gpu:8
+#SBATCH --qos=high
+#SBATCH --output=./logs/train-%j.out
+#SBATCH --error=./logs/train-%j.err
+
+set -e
+
+TRAINER_PYTHON_FILE="run_train.py"
+CONFIG_PATH_YAML="smollm1/config_smollm1_135M.yaml"
+nvidia-smi
+
+# Show some environment variables
+echo python3 version = `python3 --version`
+echo "Python path: $(which python3)"
+echo "NCCL version: $(python -c "import torch;print(torch.cuda.nccl.version())")"
+echo "CUDA version: $(python -c "import torch;print(torch.version.cuda)")"
+
+echo "START TIME: $(date)"
+secs_to_human() {
+    echo "$(( ${1} / 3600 )):$(( (${1} / 60) % 60 )):$(( ${1} % 60 ))"
+}
+start=$(date +%s)
+echo "$(date -d @${start} "+%Y-%m-%d %H:%M:%S"): ${SLURM_JOB_NAME} start id=${SLURM_JOB_ID}\n"
+
+# SLURM stuff
+export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
+export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
+export MASTER_PORT=6000
+export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
+
+export CUDA_DEVICE_MAX_CONNECTIONS="1"
+
+echo "Number of nodes: $COUNT_NODE"
+echo "Hostnames: $HOSTNAMES"
+
+CMD=" $TRAINER_PYTHON_FILE \
+    --config-file $CONFIG_PATH_YAML \
+    "
+export LAUNCHER="torchrun \
+    --nproc_per_node 8 \
+    --nnodes $COUNT_NODE \
+    --node_rank $SLURM_PROCID \
+    --role $SLURMD_NODENAME: \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+# Wait a random number between 0 and 1000 (milliseconds) to avoid too many concurrent requests to the hub
+random_milliseconds=$(( RANDOM % 1001 ))
+sleep_time=$(bc <<< "scale=3; $random_milliseconds / 1000")
+echo "Sleeping for $sleep_time seconds..."
+sleep $sleep_time
+
+srun $SRUN_ARGS -u bash -c "$LAUNCHER --node_rank $SLURM_PROCID --role $SLURMD_NODENAME: $CMD"
+
+echo "END TIME: $(date)"
\ No newline at end of file
--- a/pre-training/smollm1/config_smollm1_135M.yaml
+++ b/pre-training/smollm1/config_smollm1_135M.yaml
+# SmolLM1 135M trained on 600B tokens
+checkpoints:
+  checkpoint_interval: 2000
+  checkpoints_path: checkpoints
+  checkpoints_path_is_shared_file_system: false
+  resume_checkpoint_path: null
+  save_final_state: false
+  save_initial_state: false
+data_stages:
+- data:
+    dataset:
+      dataset_folder: # paths to tokenized datasets
+        - datasets/fineweb-edu-dedup
+        - datasets/cosmopedia-v2
+        - datasets/python-edu
+        - datasets/open-web-math
+        - datasets/stackoverflow
+      dataset_weights:
+        - 0.7
+        - 0.15
+        - 0.08
+        - 0.06
+        - 0.01
+    num_loading_workers: 1
+    seed: 42
+  name: training stage
+  start_training_step: 1
+general:
+  benchmark_csv_path: null
+  consumed_train_samples: null
+  ignore_sanity_checks: true
+  project: smollm
+  run: smollm-135M
+  seed: 8
+  step: null
+logging:
+  iteration_step_info_interval: 1
+  log_level: info
+  log_level_replica: info
+model:
+  ddp_bucket_cap_mb: 25
+  dtype: bfloat16
+  init_method:
+    std: 0.0416 # 1/sqrt(hidden_size)
+  make_vocab_size_divisible_by: 1
+  model_config:
+    bos_token_id: 0
+    eos_token_id: 0
+    hidden_act: silu
+    hidden_size: 576
+    initializer_range: 0.02
+    intermediate_size: 1536
+    is_llama_config: true
+    max_position_embeddings: 2048
+    num_attention_heads: 9
+    num_hidden_layers: 30
+    num_key_value_heads: 3
+    pad_token_id: null
+    pretraining_tp: 1
+    rms_norm_eps: 1.0e-05
+    rope_scaling: null
+    rope_theta: 10000.0
+    tie_word_embeddings: true
+    use_cache: true
+    vocab_size: 49152
+optimizer:
+  accumulate_grad_in_fp32: true
+  clip_grad: 1.0
+  learning_rate_scheduler:
+    learning_rate: 0.003
+    lr_decay_starting_step: 250000
+    lr_decay_steps: 50000
+    lr_decay_style: 1-sqrt
+    lr_warmup_steps: 2500
+    lr_warmup_style: linear
+    min_decay_lr: 0
+  optimizer_factory:
+    adam_beta1: 0.9
+    adam_beta2: 0.95
+    adam_eps: 1.0e-08
+    name: adamW
+    torch_adam_is_fused: true
+  weight_decay: 0.01
+  zero_stage: 0
+parallelism:
+  dp: 32 # 4 nodes
+  expert_parallel_size: 1
+  pp: 1
+  pp_engine: 1f1b
+  recompute_layer: false
+  tp: 1
+  tp_linear_async_communication: true
+  tp_mode: REDUCE_SCATTER
+  tp_recompute_allgather: true
+profiler: null
+tokenizer:
+  tokenizer_max_length: null
+  tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
+  tokenizer_revision: null
+tokens:
+  batch_accumulation_per_replica: 2
+  limit_test_batches: 0
+  limit_val_batches: 0
+  micro_batch_size: 8 # GBS = 8*2*32*sequence_length = 512*sequence_length = 1M tokens
+  sequence_length: 2048
+  train_steps: 600000
+  val_check_interval: -1
\ No newline at end of file
--- a/pre-training/smollm1/config_smollm1_1B.yaml
+++ b/pre-training/smollm1/config_smollm1_1B.yaml
+# SmolLM1 1.7B trained on 1T tokens
+checkpoints:
+  checkpoint_interval: 2000
+  checkpoints_path: checkpoints
+  checkpoints_path_is_shared_file_system: false
+  resume_checkpoint_path: null
+  save_final_state: false
+  save_initial_state: false
+data_stages:
+- data:
+    dataset:
+      dataset_folder: # paths to tokenized datasets
+        - datasets/fineweb-edu-dedup
+        - datasets/cosmopedia-v2
+        - datasets/open-web-math
+        - datasets/starcoderdata-python
+        - datasets/stackoverflow
+      dataset_weights:
+        - 0.7
+        - 0.15
+        - 0.06
+        - 0.08
+        - 0.01
+    num_loading_workers: 1
+    seed: 42
+  name: training stage
+  start_training_step: 1
+- data:
+    dataset: # we change data mixture to use python-edu
+      dataset_folder: 
+        - datasets/fineweb-edu-dedup
+        - datasets/cosmopedia-v2
+        - datasets/open-web-math
+        - datasets/python-edu
+        - datasets/stackoverflow
+        - datasets/deepmind_mathematics
+      dataset_weights:
+        - 0.7
+        - 0.15
+        - 0.055
+        - 0.08
+        - 0.01
+        - 0.005
+    num_loading_workers: 1
+    seed: 42
+  name: training stage 2
+  start_training_step: 300000
+general:
+  benchmark_csv_path: null
+  consumed_train_samples: null
+  ignore_sanity_checks: true
+  project: smollm
+  run: smollm-1700M
+  seed: 8
+  step: null
+logging:
+  iteration_step_info_interval: 1
+  log_level: info
+  log_level_replica: info
+model:
+  ddp_bucket_cap_mb: 25
+  dtype: bfloat16
+  init_method:
+    std: 0.022097086912079608
+  make_vocab_size_divisible_by: 1
+  model_config:
+    bos_token_id: 0
+    eos_token_id: 0
+    hidden_act: silu
+    hidden_size: 2048
+    initializer_range: 0.02
+    intermediate_size: 8192
+    is_llama_config: true
+    max_position_embeddings: 2048
+    num_attention_heads: 32
+    num_hidden_layers: 24
+    num_key_value_heads: 32
+    pad_token_id: null
+    pretraining_tp: 1
+    rms_norm_eps: 1.0e-05
+    rope_scaling: null
+    rope_theta: 10000.0
+    tie_word_embeddings: true
+    use_cache: true
+    vocab_size: 49152
+optimizer:
+  accumulate_grad_in_fp32: true
+  clip_grad: 1.0
+  learning_rate_scheduler:
+    learning_rate: 0.0005
+    lr_decay_starting_step: 400000
+    lr_decay_steps: 100000
+    lr_decay_style: 1-sqrt
+    lr_warmup_steps: 2000
+    lr_warmup_style: linear
+    min_decay_lr: 0
+  optimizer_factory:
+    adam_beta1: 0.9
+    adam_beta2: 0.95
+    adam_eps: 1.0e-08
+    name: adamW
+    torch_adam_is_fused: true
+  weight_decay: 0.01
+  zero_stage: 0
+parallelism:
+  dp: 64 # 8 nodes
+  expert_parallel_size: 1
+  pp: 1
+  pp_engine: 1f1b
+  recompute_layer: false
+  tp: 1
+  tp_linear_async_communication: true
+  tp_mode: REDUCE_SCATTER
+  tp_recompute_allgather: true
+profiler: null
+tokenizer:
+  tokenizer_max_length: null
+  tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
+  tokenizer_revision: null
+tokens:
+  batch_accumulation_per_replica: 4
+  limit_test_batches: 0
+  limit_val_batches: 0
+  micro_batch_size: 4 # GBS = 4*4*64*sequence_length = 1024*sequence_length = 2.1M tokens
+  sequence_length: 2048
+  train_steps: 500000
+  val_check_interval: -1
\ No newline at end of file
--- a/pre-training/smollm1/config_smollm1_360M.yaml
+++ b/pre-training/smollm1/config_smollm1_360M.yaml
+# SmolLM1 360M trained on 600B tokens
+checkpoints:
+  checkpoint_interval: 2000
+  checkpoints_path: checkpoints
+  checkpoints_path_is_shared_file_system: false
+  resume_checkpoint_path: null
+  save_final_state: false
+  save_initial_state: false
+data_stages:
+- data:
+    dataset:
+      dataset_folder: # paths to tokenized datasets
+        - datasets/fineweb-edu-dedup
+        - datasets/cosmopedia-v2
+        - datasets/python-edu
+        - datasets/open-web-math
+        - datasets/stackoverflow
+      dataset_weights:
+        - 0.7
+        - 0.15
+        - 0.08
+        - 0.06
+        - 0.01
+    num_loading_workers: 1
+    seed: 42
+  name: training stage
+  start_training_step: 1
+general:
+  benchmark_csv_path: null
+  consumed_train_samples: null
+  ignore_sanity_checks: true
+  project: smollm
+  run: smollm-360M
+  seed: 8
+  step: null
+logging:
+  iteration_step_info_interval: 1
+  log_level: info
+  log_level_replica: info
+model:
+  ddp_bucket_cap_mb: 25
+  dtype: bfloat16
+  init_method:
+    std: 0.03227486121839514
+  make_vocab_size_divisible_by: 1
+  model_config:
+    bos_token_id: 0
+    eos_token_id: 0
+    hidden_act: silu
+    hidden_size: 960
+    initializer_range: 0.02
+    intermediate_size: 2560
+    is_llama_config: true
+    max_position_embeddings: 2048
+    num_attention_heads: 15
+    num_hidden_layers: 32
+    num_key_value_heads: 5
+    pad_token_id: null
+    pretraining_tp: 1
+    rms_norm_eps: 1.0e-05
+    rope_scaling: null
+    rope_theta: 10000.0
+    tie_word_embeddings: true
+    use_cache: true
+    vocab_size: 49152
+optimizer:
+  accumulate_grad_in_fp32: true
+  clip_grad: 1.0
+  learning_rate_scheduler:
+    learning_rate: 0.003
+    lr_decay_starting_step: 500000
+    lr_decay_steps: 100000
+    lr_decay_style: 1-sqrt
+    lr_warmup_steps: 5000
+    lr_warmup_style: linear
+    min_decay_lr: 0
+  optimizer_factory:
+    adam_beta1: 0.9
+    adam_beta2: 0.95
+    adam_eps: 1.0e-08
+    name: adamW
+    torch_adam_is_fused: true
+  weight_decay: 0.01
+  zero_stage: 0
+parallelism:
+  dp: 32
+  expert_parallel_size: 1
+  pp: 1
+  pp_engine: 1f1b
+  recompute_layer: false
+  tp: 1
+  tp_linear_async_communication: true
+  tp_mode: REDUCE_SCATTER
+  tp_recompute_allgather: true
+profiler: null
+tokenizer:
+  tokenizer_max_length: null
+  tokenizer_name_or_path: HuggingFaceTB/cosmo2-tokenizer
+  tokenizer_revision: null
+tokens:
+  batch_accumulation_per_replica: 2
+  limit_test_batches: 0
+  limit_val_batches: 0
+  micro_batch_size: 8
+  sequence_length: 2048
+  train_steps: 600000
+  val_check_interval: -1
\ No newline at end of file
--- a/smol_tools/README.md
+++ b/smol_tools/README.md
+# smol-tools
+
+A collection of lightweight AI-powered tools built with LLaMA.cpp and small language models. These tools are designed to run locally on your machine without requiring expensive GPU resources. They can also run offline, without any internet connection.
+
+## Features
+
+### SmolSummarizer
+- Quick text summarization using SmolLM2-1.7B Instruct
+- Maintains key points while providing concise summaries
+- Able to reply to follow-up questions
+
+### SmolRewriter
+- Rewrites text to be more professional and approachable
+- Maintains the original message's intent and key points
+- Perfect for email and message drafting
+
+### SmolAgent
+- An AI agent that can perform various tasks through tool integration
+- Built-in tools include:
+  - Weather lookup
+  - Random number generation
+  - Current time
+  - Web browser control
+- Extensible tool system for adding new capabilities
+
+## Installation
+
+1. Clone the repository:
+
+```bash
+git clone https://github.com/huggingface/smollm.git
+cd smollm/smol_tools
+```
+
+2. Install dependencies:
+
+```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv pip install -r requirements.txt
+```
+
+If you're on mac, and you don't have tkinter installed, you can install it with:
+
+```bash
+brew install python-tk@3.11
+```
+
+For linux, you can install it with:
+
+```bash
+sudo apt-get install python3-tk
+```
+
+On Windows, when you install python you need to check the option to also install the tkinter library.
+
+## Usage
+
+### GUI Demo
+Run the Tkinter-based demo application:
+
+```bash
+python demo_tkinter.py
+```
+
+The demo provides a user-friendly interface with the following shortcuts:
+- `F1`: Open SmolDraft interface
+- `F2`: Summarize selected text
+- `F5`: Open SmolChat interface
+- `F10`: Open SmolAgent interface
+
+### Programmatic Usage
+
+```python
+from smol_tools.summarizer import SmolSummarizer
+from smol_tools.rewriter import SmolRewriter
+from smol_tools.agent import SmolToolAgent
+# Initialize tools
+summarizer = SmolSummarizer()
+rewriter = SmolRewriter()
+agent = SmolToolAgent()
+# Generate a summary
+for summary in summarizer.process("Your text here"):
+    print(summary)
+# Rewrite text
+for improved in rewriter.process("Your text here"):
+    print(improved)
+# Use the agent
+for response in agent.process("What's the weather in London?"):
+    print(response)
+```
+
+
+## Models
+
+The tools use the following models:
+- SmolSummarizer: SmolLM2-1.7B Instruct
+
+All models are quantized to 16-bit floating-point (F16) for efficient inference. Training was done on BF16, but in our tests, this format provides slower inference on Mac M-series chips.
+
+## License
+
+This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
\ No newline at end of file
--- a/smol_tools/demo_tkinter.py
+++ b/smol_tools/demo_tkinter.py
+import tkinter as tk
+from tkmacosx import Button
+import time
+import threading
+import queue
+from pynput import keyboard
+from smol_tools.summarizer import SmolSummarizer
+from smol_tools.rewriter import SmolRewriter
+from pynput.keyboard import Key, Controller
+import pyperclip
+from smol_tools.agent import SmolToolAgent
+from smol_tools.chatter import SmolChatter
+from smol_tools.titler import SmolTitler
+import os
+import getpass
+
+class TextPopupApp:
+    def __init__(self, root):
+        self.root = root
+        self.root.withdraw()  # Start with the window hidden
+        self.last_text = ""
+        self.active_popups = []
+        self.last_summary = ""
+        
+        # Initialize tools
+        self.summarizer = SmolSummarizer()
+        self.rewriter = SmolRewriter()
+        self.titler = SmolTitler()
+        self.agent = SmolToolAgent()
+        self.chatter = SmolChatter()
+        
+        self.keyboard_controller = Controller()
+        
+        # Replace the keyboard listener with GlobalHotKeys
+        self.keyboard_listener = keyboard.GlobalHotKeys({
+            "<F1>": lambda: self.show_draft_input(
+                self.root.winfo_x(),
+                self.root.winfo_y(),
+                self.root.winfo_width()
+            ),
+            "<F2>": self.generate_summary_from_selected_text,
+            "<F5>": self.show_chat_window,
+            "<F10>": self.show_agent_input,
+        })
+        self.keyboard_listener.start()
+        
+        self.username = getpass.getuser()  # Get system username
+
+    def generate_summary_from_selected_text(self):
+        selected_text = self.get_selected_text()
+        if selected_text:
+            # Directly generate summary instead of showing confirmation popup
+            self.generate_summary_direct(selected_text)
+
+    # New method to directly show summary window
+    def generate_summary_direct(self, text):
+        summary_popup = tk.Toplevel(self.root)
+        summary_popup.withdraw()  # Hide the window initially
+        self.active_popups.append(summary_popup)
+        summary_popup.title("Summary Chat")
+        summary_popup.configure(bg='#f6f8fa')
+        
+        # Set minimum window size
+        summary_popup.minsize(600, 600)
+        
+        # Main container
+        container = tk.Frame(summary_popup, bg='#f6f8fa')
+        container.pack(fill=tk.BOTH, expand=True, padx=20, pady=20)
+        
+        # Chat display area (top)
+        chat_frame = tk.Frame(
+            container,
+            bg='white',
+            highlightbackground='#e1e4e8',
+            highlightthickness=1,
+            bd=0,
+            relief=tk.FLAT
+        )
+        chat_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 10))
+        
+        # Add chat display with scrollbar
+        chat_display = tk.Text(
+            chat_frame, 
+            wrap=tk.WORD,
+            borderwidth=0,
+            highlightthickness=0,
+            bg='white',
+            fg='#24292e',
+            font=('Segoe UI', 12),
+            padx=15,
+            pady=12
+        )
+        scrollbar = tk.Scrollbar(chat_frame, command=chat_display.yview)
+        chat_display.configure(yscrollcommand=scrollbar.set)
+        
+        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
+        chat_display.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
+        
+        # Configure text tags
+        chat_display.tag_configure("assistant_name", foreground="#E57373", font=('Segoe UI', 12, 'bold'))
+        chat_display.tag_configure("user_name", foreground="#7986CB", font=('Segoe UI', 12, 'bold'))
+        chat_display.config(state='disabled')
+        
+        # Input area (bottom)
+        input_container = tk.Frame(
+            container,
+            bg='white',
+            highlightbackground='#e1e4e8',
+            highlightthickness=1,
+            bd=0,
+            relief=tk.FLAT
+        )
+        input_container.pack(fill=tk.X)
+        
+        # Text input
+        chat_input = tk.Text(
+            input_container, 
+            height=3, 
+            wrap=tk.WORD,
+            borderwidth=0,
+            highlightthickness=0,
+            bg='white',
+            fg='#24292e',
+            font=('Segoe UI', 12),
+            padx=15,
+            pady=12,
+            insertwidth=2,  # Width of cursor
+            insertbackground='#0066FF',  # Color of cursor matching our theme
+            insertofftime=500,  # Cursor blink off time in milliseconds
+            insertontime=500   # Cursor blink on time in milliseconds
+        )
+        chat_input.pack(side=tk.LEFT, fill=tk.X, expand=True)
+        
+        # Send button
+        send_btn = Button(
+            input_container,
+            text="Ask Question",
+            command=lambda: self.process_summary_question(
+                text, chat_input.get("1.0", "end-1c").strip(),
+                chat_display, chat_input
+            ),
+            font=('Segoe UI', 12),
+            bg='#0066FF',
+            fg='white',
+            activebackground='#0052CC',
+            activeforeground='white',
+            borderless=True,
+            focuscolor='',
+            height=32,
+            padx=15
+        )
+        send_btn.pack(side=tk.RIGHT, padx=15, pady=8)
+        
+        # Bind Enter key
+        chat_input.bind("<Return>", lambda e: [
+            self.process_summary_question(
+                text, chat_input.get("1.0", "end-1c").strip(),
+                chat_display, chat_input
+            ),
+            "break"
+        ][1])
+        
+        # Display initial summary request
+        preview = text[:100] + "..." if len(text) > 100 else text
+        self.update_summary_chat(chat_display, self.username, f"Please summarize this text: {preview}")
+        
+        # Position window
+        summary_popup.update_idletasks()
+        popup_width = 600
+        popup_height = 600
+        
+        # Get mouse position and screen dimensions
+        mouse_x = self.root.winfo_pointerx()
+        mouse_y = self.root.winfo_pointery()
+        screen_width = self.root.winfo_screenwidth()
+        screen_height = self.root.winfo_screenheight()
+        
+        # Calculate position
+        x = min(max(mouse_x - popup_width//2, 0), screen_width - popup_width)
+        y = min(max(mouse_y - popup_height//2, 0), screen_height - popup_height)
+        
+        summary_popup.geometry(f"{popup_width}x{popup_height}+{x}+{y}")
+        summary_popup.deiconify()
+        
+        def summarize(input_text):
+            try:
+                # First message from the model
+                self.root.after(0, lambda: self.update_summary_chat(
+                    chat_display, self.summarizer.name, ""))
+                
+                current_response = ""
+                for output in self.summarizer.process(input_text):
+                    # Only send the new part of the response
+                    if output.startswith(current_response):
+                        new_text = output[len(current_response):]
+                        if new_text:  # Only update if there's new text
+                            current_response = output
+                            self.root.after(0, lambda t=new_text: chat_display.config(state='normal') or 
+                                chat_display.insert("end-1c", t) or 
+                                chat_display.config(state='disabled'))
+            except Exception as e:
+                print(e)
+        
+        threading.Thread(target=lambda: summarize(text), daemon=True).start()
+
+    def update_summary_chat(self, chat_display: tk.Text, sender: str, message: str):
+        """Update the summary chat display with new message"""
+        chat_display.config(state='normal')
+        
+        # Add the message with appropriate styling
+        chat_display.insert(tk.END, "\n")  # Add spacing
+        chat_display.insert(tk.END, sender, 
+                        "assistant_name" if sender == self.summarizer.name else "user_name")
+        chat_display.insert(tk.END, f": {message}")
+        
+        chat_display.see(tk.END)
+        chat_display.config(state='disabled')
+
+    def process_summary_question(self, original_text: str, question: str, 
+                               chat_display: tk.Text, chat_input: tk.Text):
+        """Process a follow-up question about the summarized text"""
+        if not question.strip():
+            return
+            
+        # Clear input
+        chat_input.delete("1.0", tk.END)
+        
+        # Display user question
+        self.update_summary_chat(chat_display, self.username, question)
+        
+        def process_question():
+            try:
+                # First message from the model
+                self.root.after(0, lambda: self.update_summary_chat(
+                    chat_display, self.summarizer.name, ""))
+
+                current_response = ""
+                for output in self.summarizer.process(original_text, question=question):
+                    # Only send the new part of the response
+                    if output.startswith(current_response):
+                        new_text = output[len(current_response):]
+                        if new_text:  # Only update if there's new text
+                            current_response = output
+                            self.root.after(0, lambda t=new_text: chat_display.config(state='normal') or 
+                                chat_display.insert("end-1c", t) or 
+                                chat_display.config(state='disabled'))
+            except Exception as e:
+                print(e)
+
+        threading.Thread(target=process_question, daemon=True).start()
+
+    def get_selected_text(self):
+        # Copy selected text to clipboard
+        with self.keyboard_controller.pressed(Key.cmd):
+            self.keyboard_controller.tap('c')
+        
+        # Small delay to ensure clipboard is updated
+        time.sleep(0.1)
+        
+        # Get text from clipboard
+        return pyperclip.paste()
+
+    def destroy_active_popups(self):
+        # Destroy all active popups
+        for popup in self.active_popups:
+            try:
+                popup.destroy()
+            except:
+                pass  # Popup might already be destroyed
+        self.active_popups = []
+
+    def show_draft_input(self, summary_x, summary_y, summary_width):
+        draft_popup = tk.Toplevel(self.root)
+        draft_popup.withdraw()  # Hide initially
+        self.active_popups.append(draft_popup)
+        draft_popup.title("Draft Reply")
+        draft_popup.configure(bg='#f6f8fa')
+        
+        # Create frame for the two columns
+        columns_frame = tk.Frame(draft_popup, bg='#f6f8fa')
+        columns_frame.pack(expand=True, fill='both', padx=20, pady=20)
+        
+        # Calculate required height based on summary content
+        num_lines = len(self.last_summary.split('\n'))
+        line_height = max(num_lines * 1.5, 15)
+        widget_height = min(line_height, 40) 
+        
+        # Column 1: Draft Input
+        input_frame = tk.Frame(
+            columns_frame,
+            bg='white',
+            highlightbackground='#e1e4e8',
+            highlightthickness=1,
+            bd=0,
+            relief=tk.FLAT
+        )
+        input_frame.pack(side=tk.LEFT, padx=5, fill='both', expand=True)
+        tk.Label(input_frame, text="Your Reply", bg='white', fg='#24292e', font=('Segoe UI', 12)).pack(padx=15, pady=(12,0), anchor='w')
+        text_input = tk.Text(
+            input_frame, 
+            height=widget_height, 
+            width=30, 
+            wrap=tk.WORD,
+            borderwidth=0,
+            highlightthickness=0,
+            selectbackground='#e1e4e8',
+            selectforeground='#24292e',
+            insertwidth=2,  # Width of cursor
+            insertbackground='#0066FF',  # Color of cursor matching our theme
+            insertofftime=500,  # Cursor blink off time in milliseconds
+            insertontime=500   # Cursor blink on time in milliseconds
+
+        )
+        text_input.pack(fill='both', expand=True, padx=15, pady=12)
+        text_input.config(bg='white', fg='#24292e', font=('Segoe UI', 12))
+        
+        # Column 2: Improved Text
+        improved_frame = tk.Frame(
+            columns_frame,
+            bg='white',
+            highlightbackground='#e1e4e8',
+            highlightthickness=1,
+            bd=0,
+            relief=tk.FLAT
+        )
+        improved_frame.pack(side=tk.LEFT, padx=5, fill='both', expand=True)
+        tk.Label(improved_frame, text="Improved Reply", bg='white', fg='#24292e', font=('Segoe UI', 12)).pack(padx=15, pady=(12,0), anchor='w')
+        improved_text = tk.Text(
+            improved_frame, 
+            height=widget_height, 
+            width=30, 
+            wrap=tk.WORD,
+            borderwidth=0,
+            highlightthickness=0,
+            selectbackground='#e1e4e8',
+            selectforeground='#24292e'
+        )
+        improved_text.pack(fill='both', expand=True, padx=15, pady=12)
+        improved_text.config(state='disabled', bg='white', fg='#586069', font=('Segoe UI', 12))
+        
+        # Add Copy button using tkmacosx
+        copy_btn = Button(
+            improved_frame, 
+            text="Copy",
+            command=lambda: pyperclip.copy(improved_text.get("1.0", "end-1c")),
+            font=('Segoe UI', 14),
+            bg='#0066FF',
+            fg='white',
+            activebackground='#0052CC',
+            activeforeground='white',
+            borderless=True,
+            focuscolor='',
+            padx=20,
+            pady=8,
+            cursor='hand2'
+        )
+        copy_btn.pack(pady=(0, 12))
+        
+        # Add "Smol Improvement?" button using tkmacosx
+        improve_btn = Button(
+            input_frame, 
+            text="Smol Improvement?",
+            command=lambda: self.generate_improved_text(
+                text_input.get("1.0", "end-1c"),
+                improved_text),
+            font=('Segoe UI', 14),
+            bg='#0066FF',
+            fg='white',
+            activebackground='#0052CC',
+            activeforeground='white',
+            borderless=True,
+            focuscolor='',
+            padx=20,
+            pady=8,
+            cursor='hand2'
+        )
+        improve_btn.pack(pady=(0, 12))
+        
+        # Position window relative to the summary window's position
+        draft_popup.update_idletasks()
+        screen_width = draft_popup.winfo_screenwidth()
+        screen_height = draft_popup.winfo_screenheight()
+        popup_width = draft_popup.winfo_width()
+        if popup_width == 1:
+            popup_width = 800
+        popup_height = draft_popup.winfo_height()
+        
+        # Calculate center point of the summary window
+        summary_center_x = summary_x + summary_width//2
+        
+        # Center the new window on the same point, but ensure it stays on screen
+        new_x = max(min(summary_center_x - popup_width//2, screen_width - popup_width), 0)
+        # Position vertically based on screen space available
+        if summary_y > screen_height / 2:
+            new_y = max(summary_y - popup_height - 10, 0)  # 10px gap above summary
+        else:
+            new_y = min(summary_y + 10, screen_height - popup_height)  # 10px gap below summary
+        
+        draft_popup.geometry(f"+{new_x}+{new_y}")
+        draft_popup.deiconify()
+
+    def generate_improved_text(self, text, improved_text_widget):
+        # Get reference to the improve button
+        improve_btn = improved_text_widget.master.master.children['!frame2'].children['!button']
+        
+        # Disable the button and change text to show processing
+        improve_btn.configure(
+            state='disabled',
+            text="Generating...",
+            bg='#A8A8A8',  # Grayed out color
+        )
+        
+        # Update the improve function
+        improved_text_widget.config(state='normal')
+        improved_text_widget.delete("1.0", tk.END)
+        improved_text_widget.insert("1.0", "Generating improvement...")
+        improved_text_widget.config(state='disabled')
+        
+        def improve(input_text):
+            try:
+                for output in self.rewriter.process(input_text):
+                    self.root.after(0, lambda t=output: self.update_improved_text(improved_text_widget, t))
+                
+                # Re-enable button and restore original state after generation is complete
+                self.root.after(0, lambda: improve_btn.config(
+                    state='normal',
+                    text="Copy",
+                    bg='#0066FF'
+                ))
+            except Exception as e:
+                # Make sure to re-enable button even if there's an error
+                self.root.after(0, lambda: improve_btn.config(
+                    state='normal',
+                    text="Copy",
+                    bg='#0066FF'
+                ))
+                raise e
+        
+        threading.Thread(target=lambda: improve(text), daemon=True).start()
+
+    def update_improved_text(self, text_widget, new_text):
+        text_widget.config(state='normal')
+        text_widget.delete("1.0", tk.END)
+        text_widget.insert("1.0", new_text)
+        text_widget.config(state='disabled')
+
+    def show_agent_input(self):
+        # Create new popup for agent input
+        agent_popup = tk.Toplevel(self.root)
+        self.active_popups.append(agent_popup)
+        agent_popup.title("SmolAgent")
+        
+        # Create input area
+        input_frame = tk.Frame(agent_popup)
+        input_frame.pack(padx=10, pady=5, fill='both', expand=True)
+        
+        tk.Label(input_frame, text="What would you like me to do?").pack()
+        text_input = tk.Text(input_frame, height=4, width=50, wrap=tk.WORD)
+        text_input.pack(pady=5)
+        
+        # Create output area
+        output_frame = tk.Frame(agent_popup)
+        output_frame.pack(padx=10, pady=5, fill='both', expand=True)
+        
+        tk.Label(output_frame, text="Response:").pack()
+        output_text = tk.Text(output_frame, height=8, width=50, wrap=tk.WORD)
+        output_text.pack(pady=5)
+        output_text.config(state='disabled')
+        
+        def process_agent_request():
+            query = text_input.get("1.0", "end-1c")
+            output_text.config(state='normal')
+            output_text.delete("1.0", tk.END)
+            output_text.insert("1.0", "Processing request...\n")
+            output_text.config(state='disabled')
+            
+            def run_agent():
+                full_response = []
+                for response in self.agent.process(query):
+                    full_response.append(response)
+                    self.root.after(0, lambda t="\n".join(full_response): self.update_agent_output(output_text, t))
+            
+            threading.Thread(target=run_agent, daemon=True).start()
+        
+        # Add Submit button
+        submit_btn = tk.Button(agent_popup, text="Submit", command=process_agent_request)
+        submit_btn.pack(pady=5)
+        
+        # Position window
+        agent_popup.update_idletasks()
+        screen_width = agent_popup.winfo_screenwidth()
+        screen_height = agent_popup.winfo_screenheight()
+        popup_width = agent_popup.winfo_width()
+        popup_height = agent_popup.winfo_height()
+        x = (screen_width - popup_width) // 2
+        y = (screen_height - popup_height) // 2
+        agent_popup.geometry(f"+{x}+{y}")
+
+    def update_agent_output(self, text_widget, new_text):
+        text_widget.config(state='normal')
+        text_widget.delete("1.0", tk.END)
+        text_widget.insert("1.0", new_text)
+        text_widget.config(state='disabled')
+
+    def show_chat_window(self):
+        chat_window = tk.Toplevel(self.root)
+        self.active_popups.append(chat_window)
+        chat_window.title("SmolChat")
+        
+        # Configure the chat window to be resizable
+        chat_window.geometry("800x800")
+        chat_window.minsize(600, 600)
+        
+        # Create split view with history panel
+        history_panel = tk.Frame(chat_window, width=200, padx=5, pady=10)
+        history_panel.pack(side=tk.LEFT, fill=tk.Y)
+        history_panel.pack_propagate(False)  # Maintain width
+        
+        # Create main chat area
+        main_frame = tk.Frame(chat_window, padx=10, pady=10)
+        main_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
+        
+        # Create chat display first
+        chat_display = tk.Text(main_frame, wrap=tk.WORD)
+        chat_display.pack(fill=tk.BOTH, expand=True)
+        chat_display.config(state='disabled')
+        
+        # Add input field and send button
+        input_frame = tk.Frame(main_frame)
+        input_frame.pack(fill=tk.X, pady=(10, 0))
+        
+        chat_input = tk.Text(input_frame, height=3, wrap=tk.WORD)
+        chat_input.pack(side=tk.LEFT, fill=tk.X, expand=True)
+        
+        send_btn = tk.Button(input_frame, text="Send", 
+                           command=lambda: self.process_chat_message(
+                               chat_input.get("1.0", "end-1c").strip(),
+                               chat_display))
+        send_btn.pack(side=tk.RIGHT, padx=(10, 0))
+        
+        # Bind Enter key to send message
+        chat_input.bind("<Return>", lambda e: [
+            self.process_chat_message(
+                chat_input.get("1.0", "end-1c").strip(),
+                chat_display),
+            "break"  # Prevent the default newline behavior
+        ][1])
+        
+        # Now add the New Chat button (after chat_display is created)
+        new_chat_btn = tk.Button(history_panel, text="New Chat", 
+                               command=lambda: self.start_new_chat(chat_display))
+        new_chat_btn.pack(fill=tk.X, pady=(0, 10))
+        
+        # Add listbox for chat history
+        history_label = tk.Label(history_panel, text="Previous Chats")
+        history_label.pack()
+        chat_listbox = tk.Listbox(history_panel, height=20)
+        chat_listbox.pack(fill=tk.BOTH, expand=True)
+        
+        # Get and sort chats by modification time (newest first)
+        saved_chats = self.chatter.get_saved_chats()
+        sorted_chats = sorted(
+            saved_chats,
+            key=lambda x: os.path.getmtime(os.path.join("saved_chats", f"chat_{x}.json")),
+            reverse=True
+        )
+        
+        # Populate chat history with sorted chats
+        for chat_id in sorted_chats:
+            chat_listbox.insert(tk.END, chat_id)
+        
+        # Bind selection event
+        chat_listbox.bind('<<ListboxSelect>>', 
+                         lambda e: self.load_selected_chat(chat_listbox, chat_display))
+        
+        # Add scrollbar to listbox
+        history_scrollbar = tk.Scrollbar(history_panel, command=chat_listbox.yview)
+        history_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
+        chat_listbox.config(yscrollcommand=history_scrollbar.set)
+        
+        # Store references to UI elements that need to be disabled during chat
+        self.chat_controls = {
+            'listbox': chat_listbox,
+            'new_chat_btn': new_chat_btn
+        }
+
+        # Add text tags with softer colors
+        chat_display.tag_configure("assistant_name", foreground="#E57373")  # Soft red
+        chat_display.tag_configure("user_name", foreground="#7986CB")      # Soft blue
+
+    def load_selected_chat(self, listbox: tk.Listbox, chat_display: tk.Text):
+        selection = listbox.curselection()
+        if selection:
+            chat_id = listbox.get(selection[0])
+            self.chatter.load_chat(chat_id)
+            self.display_chat_history(chat_display)
+
+    def start_new_chat(self, chat_display):
+        # Only save the current chat if it has been modified since loading
+        if self.chatter.has_current_chat() and self.chatter.is_chat_modified():
+            # Get full chat history as a single string
+            chat_history = "\n".join([f"{msg.role}: {msg.content}" 
+                                    for msg in self.chatter.get_chat_history()])
+            
+            # If we're continuing an existing chat, use its ID
+            current_chat_id = self.chatter.get_current_chat_id()
+            if current_chat_id:
+                # Save using existing ID
+                self.chatter.save_current_chat(current_chat_id, overwrite=True)
+            else:
+                # Generate new title for new chat
+                summary = ""
+                for chunk in self.titler.process(chat_history):
+                    summary = chunk
+                
+                summary_title = summary[:50].strip().replace("/", "-").replace("\\", "-")
+                self.chatter.save_current_chat(summary_title, overwrite=True)
+        
+        # Start new chat
+        self.chatter.start_new_chat()
+        
+        # Clear and update display
+        self.display_chat_history(chat_display)
+        
+        # Update the chat history listbox with sorted chats
+        listbox = self.chat_controls['listbox']
+        listbox.delete(0, tk.END)
+        saved_chats = self.chatter.get_saved_chats()
+        sorted_chats = sorted(
+            saved_chats,
+            key=lambda x: os.path.getmtime(os.path.join("saved_chats", f"chat_{x}.json")),
+            reverse=True
+        )
+        for chat_id in sorted_chats:
+            listbox.insert(tk.END, chat_id)
+
+    def process_chat_message(self, message: str, chat_display: tk.Text):
+        if not message.strip():  # Skip empty messages
+            return
+            
+        # Disable chat controls while processing
+        self.chat_controls['listbox'].config(state='disabled')
+        self.chat_controls['new_chat_btn'].config(state='disabled')
+            
+        chat_display.config(state='normal')
+        
+        # Add extra newline before user message for spacing
+        chat_display.insert(tk.END, "")  # Start new line
+        chat_display.insert(tk.END, self.username, "user_name")  # Add colored username
+        chat_display.insert(tk.END, f": {message}\n")  # Add message
+        
+        chat_display.insert(tk.END, "\n")  # Add spacing
+        chat_display.insert(tk.END, self.chatter.name, "assistant_name")  # Add colored AI name
+        chat_display.insert(tk.END, ": ")  # Add separator
+        
+        # Clear the input field (get its reference from chat_display's master)
+        input_frame = chat_display.master.children['!frame']
+        chat_input = input_frame.children['!text']
+        chat_input.delete("1.0", tk.END)
+        
+        # Initialize an empty string to store the full response
+        self.current_response = ""
+        
+        chat_display.see(tk.END)
+        chat_display.config(state='disabled')
+        
+        def chat_response():
+            try:
+                for chunk in self.chatter.process(message):
+                    # Only send the new part of the response
+                    if chunk.startswith(self.current_response):
+                        new_text = chunk[len(self.current_response):]
+                        if new_text:  # Only update if there's new text
+                            self.current_response = chunk
+                            self.root.after(0, lambda t=new_text: self.update_chat_display(chat_display, t))
+                self.root.after(0, lambda t="\n\n": self.update_chat_display(chat_display, t))
+            finally:
+                # Re-enable chat controls after response is complete
+                self.root.after(0, self.enable_chat_controls)
+        
+        threading.Thread(target=chat_response, daemon=True).start()
+
+    def enable_chat_controls(self):
+        """Re-enable chat controls after response is complete"""
+        self.chat_controls['listbox'].config(state='normal')
+        self.chat_controls['new_chat_btn'].config(state='normal')
+
+    def update_chat_display(self, chat_display: tk.Text, new_text: str):
+        chat_display.config(state='normal')
+        chat_display.insert(tk.END, new_text)
+        chat_display.see(tk.END)
+        chat_display.config(state='disabled')
+
+    def display_chat_history(self, chat_display: tk.Text):
+        chat_display.config(state='normal')
+        chat_display.delete("1.0", tk.END)
+        
+        # Configure text tags with softer colors
+        chat_display.tag_configure("assistant_name", foreground="#E57373")  # Soft red
+        chat_display.tag_configure("user_name", foreground="#7986CB")      # Soft blue
+        
+        for message in self.chatter.get_chat_history():
+            if message.role == "user":
+                chat_display.insert(tk.END, "\n")  # Add spacing
+                chat_display.insert(tk.END, self.username, "user_name")  # Change "You" to username
+                chat_display.insert(tk.END, f": {message.content}\n")
+            else:
+                chat_display.insert(tk.END, "\n")  # Add spacing
+                chat_display.insert(tk.END, self.chatter.name, "assistant_name")
+                chat_display.insert(tk.END, f": {message.content}\n")
+        
+        chat_display.config(state='disabled')
+        chat_display.see(tk.END)
+
+# Run the app
+root = tk.Tk()
+
+# Set default font size for all tkinter widgets
+default_font = ('Segoe UI', 14)  # Changed from TkDefaultFont to Segoe UI
+root.option_add("*Font", default_font)
+root.option_add("*Entry.Font", default_font)
+root.option_add("*Text.Font", default_font)
+root.option_add("*Button.Font", default_font)
+root.option_add("*Label.Font", default_font)
+
+app = TextPopupApp(root)
+root.mainloop()
--- a/smol_tools/requirements.txt
+++ b/smol_tools/requirements.txt
+tkmacosx>=1.0.5
+pynput>=1.7.7
+llama-cpp-python>=0.3.1
+pyperclip>=1.9.0
+transformers>=4.46.2
+pygments>=2.18.0
\ No newline at end of file
--- a/smol_tools/smol_tools/agent.py
+++ b/smol_tools/smol_tools/agent.py
+from .base import SmolTool
+from typing import Generator, List, Dict, Any, Callable
+import json
+import re
+from datetime import datetime
+import random
+from transformers import tool, CodeAgent
+import requests
+import webbrowser
+
+@tool
+def get_random_number_between(min: int, max: int) -> int:
+    """
+    Gets a random number between min and max.
+
+    Args:
+        min: The minimum number.
+        max: The maximum number.
+
+    Returns:
+        A random number between min and max.
+    """
+    return random.randint(min, max)
+
+
+@tool
+def get_weather(city: str) -> str:
+    """
+    Returns the weather forecast for a given city.
+
+    Args:
+        city: The name of the city.
+
+    Returns:
+        A string with a mock weather forecast.
+    """
+    url = 'https://wttr.in/{}?format=+%C,+%t'.format(city)
+    res = requests.get(url).text
+
+    return f"The weather in {city} is {res.split(',')[0]} with a high of {res.split(',')[1][:-2]} degrees Celsius."
+
+@tool
+def get_current_time() -> str:
+    """
+    This is a tool that returns the current time.
+    It returns the current time as HH:MM.
+    """
+    return f"The current time is {datetime.now().hour}:{datetime.now().minute}."
+
+@tool
+def open_webbrowser(url: str) -> str:
+    """
+    This is a tool that opens a web browser to the given website.
+    If the user asks to open a website or a browser, you should use this tool.
+
+    Args:
+        url: The url to open.
+    """
+    webbrowser.open(url)
+    return f"I opened {url.replace('https://', '').replace('www.', '')} in the browser."
+
+
+class SmolToolAgent(SmolTool):
+    def __init__(self):
+        self.tools = [get_random_number_between, get_current_time, open_webbrowser, get_weather]
+        self.toolbox = {tool.name: tool for tool in self.tools}
+        self.json_code_agent = CodeAgent(tools=self.tools, llm_engine=self.llm_engine, system_prompt=self._get_system_prompt())
+        super().__init__(
+            model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
+            model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
+            system_prompt=self._get_system_prompt(),
+            prefix_text=""
+        )
+
+    def llm_engine(self, messages, stop_sequences=["Task", "<|endoftext|>"]) -> str:
+        output = ""
+        for chunk in self.model.create_chat_completion(
+            messages=messages,
+            max_tokens=2048,
+            temperature=0.0,
+            top_p=1.0,
+            top_k=50,
+            repeat_penalty=1.0,
+            stream=True
+        ):
+            content = chunk['choices'][0]['delta'].get('content')
+            if content:
+                if content in ["<end_action>", "<|endoftext|>"]:
+                    break
+                output += content
+        return output
+
+    def _get_system_prompt(self) -> str:
+        return """You are an expert in composing functions. You are given a question and a set of possible functions. 
+Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
+If none of the functions can be used, point it out and refuse to answer. 
+If the given question lacks the parameters required by the function, also point it out.
+
+You have access to the following tools:
+<<tool_descriptions>>
+
+<<managed_agents_descriptions>>
+
+You can use imports in your code, but only from the following list of modules: <<authorized_imports>>
+
+The output MUST strictly adhere to the following format, and NO other text MUST be included.
+The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
+<tool_call>[
+{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
+... (more tool calls as required)
+]</tool_call>"""
+
+    def _parse_response(self, text: str) -> List[Dict[str, Any]]:
+        pattern = r"<tool_call>(.*?)</tool_call>"
+        matches = re.findall(pattern, text, re.DOTALL)
+        if matches:
+            return json.loads(matches[0])
+        return text
+
+    def _call_tools(self, tool_calls: List[Dict[str, Any]]) -> List[str]:
+        tool_responses = []
+        for tool_call in tool_calls:
+            if tool_call["name"] in self.toolbox:
+                tool_responses.append(
+                    self.toolbox[tool_call["name"]](**tool_call["arguments"])
+                )
+            else:
+                tool_responses.append(f"Tool {tool_call['name']} not found.")
+        return tool_responses
+
+    def process(self, text: str) -> Generator[str, None, None]:
+        response = self.json_code_agent.run(text, return_generated_code=True)
+        # Parse and execute the tool calls
+        try:
+            tool_calls = self._parse_response(response)
+            if tool_calls in [response, [], ""]:
+                yield response
+                return
+            tool_responses = self._call_tools(tool_calls)
+        except Exception as e:
+            print("error", e)
+            yield response
+            return
+
+        # Yield each tool response
+        for response in tool_responses:
+            yield str(response) 
\ No newline at end of file
--- a/smol_tools/smol_tools/base.py
+++ b/smol_tools/smol_tools/base.py
+from abc import ABC, abstractmethod
+from typing import Generator, List, Dict, Any, Union, Tuple
+from llama_cpp import Llama
+
+class SmolTool(ABC):
+    # Class-level cache for model instances
+    _model_cache: Dict[Tuple[str, str], Llama] = {}
+
+    def __init__(self, model_repo: str, model_filename: str, system_prompt: str, prefix_text: str = "", n_ctx: int = 8192):
+        self.system_prompt = system_prompt
+        self.prefix_text = prefix_text
+        
+        # Create a cache key from the model repo and filename
+        cache_key = (model_repo, model_filename)
+        
+        # Track if this is a new model load
+        is_new_model = cache_key not in self._model_cache
+        
+        # Try to get the model from cache, or create and cache a new one
+        if is_new_model:
+            print(f"Loading model {model_filename} from {model_repo}...")
+            self._model_cache[cache_key] = Llama.from_pretrained(
+                repo_id=model_repo,
+                filename=model_filename,
+                n_ctx=n_ctx,
+                verbose=False
+            )
+        
+        self.model = self._model_cache[cache_key]
+        
+        # Only warm up for newly loaded models
+        if is_new_model:
+            self._warm_up()
+
+    def _warm_up(self):
+        """Warm up the model with a test prompt"""
+        print(f"Warming up {self.__class__.__name__}...")
+        test_text = "This is a test message to warm up the model."
+        # Consume the generator to complete the warm-up
+        for _ in self.process(test_text):
+            pass
+        print(f"{self.__class__.__name__} ready!")
+
+    @abstractmethod
+    def process(self, text: str) -> Generator[str, None, None]:
+        """Process the input text and yield results as they're generated"""
+        pass
+
+    def _create_chat_completion(
+        self, 
+        messages: List[Dict[str, str]], 
+        temperature: float = 0.4,
+        top_p: float = 0.9,
+        top_k: int = 50,
+        repeat_penalty: float = 1.2,
+        max_tokens: int = 256
+    ) -> Generator[str, None, None]:
+        """Helper method to create chat completions with standard parameters"""
+        output = ""
+        for chunk in self.model.create_chat_completion(
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repeat_penalty=repeat_penalty,
+            stream=True
+        ):
+            content = chunk['choices'][0]['delta'].get('content')
+            if content:
+                if content in ["<end_action>", "<|endoftext|>"]:
+                    break
+                output += content
+                yield output
\ No newline at end of file
--- a/smol_tools/smol_tools/chatter.py
+++ b/smol_tools/smol_tools/chatter.py
+from .base import SmolTool
+from typing import Generator, List, Dict
+from dataclasses import dataclass
+from datetime import datetime
+import json
+import os
+
+@dataclass
+class ChatMessage:
+    role: str  # "user" or "assistant"
+    content: str
+    timestamp: datetime
+
+    # Add methods to convert to/from dict for JSON serialization
+    def to_dict(self):
+        return {
+            'role': self.role,
+            'content': self.content,
+            'timestamp': self.timestamp.isoformat()
+        }
+
+    @classmethod
+    def from_dict(cls, data):
+        return cls(
+            role=data['role'],
+            content=data['content'],
+            timestamp=datetime.fromisoformat(data['timestamp'])
+        )
+
+class SmolChatter(SmolTool):
+    def __init__(self):
+        self.chat_history: List[ChatMessage] = []
+        self.chat_archive: Dict[str, List[ChatMessage]] = {}
+        self.current_chat_id = None
+        self.chats_dir = "saved_chats"
+        self._original_chat_state = None  # To track modifications
+        self.name = "SmolLM2-1.7B"
+        
+        # Create chats directory if it doesn't exist
+        if not os.path.exists(self.chats_dir):
+            os.makedirs(self.chats_dir)
+            
+        super().__init__(
+            model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
+            model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
+            system_prompt="You are a helpful AI assistant named SmolLM, trained by Hugging Face..",
+        )
+
+    def start_new_chat(self):
+        """Start a new chat with a unique ID"""
+        self.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.chat_history = []
+        self._original_chat_state = None
+
+    def has_current_chat(self) -> bool:
+        """Check if there are any messages in the current chat"""
+        return len(self.chat_history) > 0
+
+    def save_current_chat(self, title: str = None, overwrite: bool = False):
+        """Save the current chat to disk if it has any messages"""
+        if not self.chat_history:
+            return
+            
+        if title:
+            # If overwriting, use existing chat_id if it matches the title
+            if not overwrite or self.current_chat_id != title:
+                self.current_chat_id = title
+        elif not self.current_chat_id:
+            self.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+            
+        # Convert chat history to serializable format
+        chat_data = {
+            'id': self.current_chat_id,
+            'messages': [msg.to_dict() for msg in self.chat_history]
+        }
+        
+        # Save to file
+        filename = f"{self.chats_dir}/chat_{self.current_chat_id}.json"
+        with open(filename, 'w') as f:
+            json.dump(chat_data, f)
+            
+        # Update original state to reflect saved state
+        self._original_chat_state = [msg.to_dict() for msg in self.chat_history]
+
+    def load_chat(self, chat_id: str):
+        """Load a specific chat from disk"""
+        filename = f"{self.chats_dir}/chat_{chat_id}.json"
+        try:
+            with open(filename, 'r') as f:
+                data = json.load(f)
+                self.current_chat_id = data['id']
+                self.chat_history = [ChatMessage.from_dict(msg) for msg in data['messages']]
+                # Store original state for modification tracking
+                self._original_chat_state = [msg.to_dict() for msg in self.chat_history]
+        except FileNotFoundError:
+            print(f"Chat {chat_id} not found")
+
+    def is_chat_modified(self) -> bool:
+        """Check if the current chat has been modified since loading"""
+        if self._original_chat_state is None:
+            # New chat that hasn't been saved yet
+            return len(self.chat_history) > 0
+            
+        current_state = [msg.to_dict() for msg in self.chat_history]
+        return current_state != self._original_chat_state
+
+    def get_saved_chats(self) -> List[str]:
+        """Get list of saved chat IDs"""
+        chats = []
+        for filename in os.listdir(self.chats_dir):
+            if filename.startswith('chat_') and filename.endswith('.json'):
+                chat_id = filename[5:-5]  # Remove 'chat_' prefix and '.json' suffix
+                chats.append(chat_id)
+        return sorted(chats, reverse=True)  # Most recent first
+
+    def _warm_up(self):
+        super()._warm_up()
+        self.clear_chat_history()
+
+    def process(self, text: str) -> Generator[str, None, None]:
+        # Add user message to history
+        self.chat_history.append(ChatMessage(
+            role="user",
+            content=text,
+            timestamp=datetime.now()
+        ))
+        
+        # Build messages including chat history
+        messages = [{"role": "system", "content": self.system_prompt}]
+        # Include last 5 messages for context
+        for msg in self.chat_history:
+            messages.append({"role": msg.role, "content": msg.content})
+
+        # Generate response
+        response = ""
+        for chunk in self._create_chat_completion(messages, max_tokens=1024):
+            response = chunk
+            yield chunk
+        
+        # Add assistant's response to history
+        self.chat_history.append(ChatMessage(
+            role="assistant",
+            content=response,
+            timestamp=datetime.now()
+        ))
+
+    def get_chat_history(self) -> List[ChatMessage]:
+        return self.chat_history
+    
+    def clear_chat_history(self):
+        self.chat_history = []
+
+    def get_current_chat_id(self) -> str:
+        """Get the ID of the current chat"""
+        return self.current_chat_id
\ No newline at end of file
--- a/smol_tools/smol_tools/rewriter.py
+++ b/smol_tools/smol_tools/rewriter.py
+from .base import SmolTool
+from typing import Generator
+
+class SmolRewriter(SmolTool):
+    def __init__(self):
+        super().__init__(
+            model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
+            model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
+            system_prompt="You are an AI writing assistant. Your task is to rewrite the user's email to make it more professional and approachable while maintaining its main points and key message. Do not return any text other than the rewritten message.",
+            prefix_text="Rewrite the message below to make it more professional and approachable while maintaining its main points and key message. Do not add any new information or return any text other than the rewritten message\nThe message:"
+        )
+
+    def process(self, text: str) -> Generator[str, None, None]:
+        messages = [
+            {"role": "system", "content": self.system_prompt},
+            {"role": "user", "content": f"{self.prefix_text}\n{text}"}
+        ]
+        yield from self._create_chat_completion(messages, temperature=0.4, repeat_penalty=1.0, top_k=0, max_tokens=1024)
\ No newline at end of file
--- a/smol_tools/smol_tools/summarizer.py
+++ b/smol_tools/smol_tools/summarizer.py
+from .base import SmolTool
+from typing import Generator, Optional
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List
+
+@dataclass
+class SummaryMessage:
+    role: str  # "user" or "assistant"
+    content: str
+    timestamp: datetime
+
+class SmolSummarizer(SmolTool):
+    def __init__(self):
+        self.name = "SmolLM2-1.7B"
+        
+        super().__init__(
+            model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
+            model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
+            system_prompt="Concisely summarize the main points of the input text in up to three sentences, focusing on key information and events.",
+        )
+
+    def process(self, text: str, question: Optional[str] = None) -> Generator[str, None, None]:
+        if question is None:
+            print("Summarizing text")
+            prompt = f"{self.prefix_text}\n{text}"
+            messages = [
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": prompt},
+                {"role": "assistant", "content": "This is a short summary of the text:"}
+            ]
+        else:
+            print("Answering question")
+            prompt = f"Original text:\n{text}\n\nQuestion: {question}"
+            messages = [
+                {"role": "user", "content": prompt},
+            ]
+
+        for chunk in self._create_chat_completion(messages, max_tokens=1024, temperature=0.1, top_p=0.9):
+            yield chunk
--- a/smol_tools/smol_tools/titler.py
+++ b/smol_tools/smol_tools/titler.py
+from .base import SmolTool
+from typing import Generator
+
+class SmolTitler(SmolTool):
+    def __init__(self):
+        super().__init__(
+            model_repo="andito/SmolLM2-1.7B-Instruct-F16-GGUF",
+            model_filename="smollm2-1.7b-8k-dpo-f16.gguf",
+            system_prompt="",
+            prefix_text="Create a title for this conversation:",
+        )
+
+    def process(self, text: str) -> Generator[str, None, None]:
+        messages = [
+            {"role": "user", "content": f"{self.prefix_text}\n{text}"}
+        ]
+        yield from self._create_chat_completion(messages, max_tokens=128, temperature=0.6, top_p=0.9, top_k=0, repeat_penalty=1.1)
\ No newline at end of file