numpy<2.0.0
datasets
scipy
#torch
#torchvision
#torchaudio
tqdm
transformers<4.54.0
math_verify
word2number
accelerate
rapidfuzz
colorlog
appdirs
datasketch
modelscope
addict
pytest
rich
docstring_parser
pydantic
nltk
colorama
gradio>5
json5
tiktoken

# text2sql
func_timeout
sqlglot
pymysql
# general text
fasttext-wheel
langkit
openai
sentencepiece
datasketch
presidio_analyzer[transformers]
presidio_anonymizer
vendi-score==0.0.3
google-api-core
google-api-python-client
evaluate
contractions
symspellpy
simhash

# knowledge base cleaning
chonkie
trafilatura
lxml_html_clean
pymupdf
httpx[socks]

# dataflow agent
cloudpickle
fastapi
httpx
pandas
psutil
pyfiglet
pyyaml
requests
termcolor
uvicorn
sseclient-py

# speech
librosa
soundfile

# map visualize
# matplotlib - removed as no longer needed

# google vertex ai
google-cloud-aiplatform>=1.55
google-cloud-bigquery
google-genai
gcsfs
