Commit e1ae9aa0 authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

refactor: Simplify codespell configuration, allow contractions, add custom dictionary (#28)

parent e159e53f
......@@ -120,7 +120,7 @@ $global:copyright_results = @{
# === end common.ps1 extensions ===
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml')
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt')
write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models')
write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
......
......@@ -41,9 +41,8 @@ repos:
rev: v2.2.4
hooks:
- id: codespell
# Use pyproject.toml for codespell configuration
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$|.*lib/llm/tests/data.*)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
......
dynamo->dynemo
dynmo->dynemo
......@@ -435,7 +435,7 @@ impl Decoder {
// return only new bytes after pre_append .. offset+seq.len()
// example: seq = "ox", token = "boxes", return "b"
// note: this changes when we start jailing tokens for partial matches
// on the suffix of teh jail with prefixes of the stop sequences
// on the suffix of the jail with prefixes of the stop sequences
//
// we might have returned a partial match, if so, then offset < pre_append
// in that case, we return the empty string
......
......@@ -129,8 +129,8 @@ pub fn tojson(value: Value, kwargs: Kwargs) -> Result<Value, Error> {
let mut buf = Vec::new();
let repeat = b" ".repeat(indent);
let formatter = serde_json::ser::PrettyFormatter::with_indent(&repeat);
let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter);
value.serialize(&mut ser).unwrap();
let mut serializer = serde_json::Serializer::with_formatter(&mut buf, formatter);
value.serialize(&mut serializer).unwrap();
String::from_utf8(buf).map_err(|err| {
Error::new(ErrorKind::BadSerialization, "cannot serialize to JSON").with_source(err)
})
......
......@@ -79,7 +79,7 @@ pub struct DistributedRuntime {
// local registry for components
// the registry allows us to use share runtime resources across instances of the same component object.
// take fo example two instances of a client to the same remote component. The registry allows us to use
// take for example two instances of a client to the same remote component. The registry allows us to use
// a single endpoint watcher for both clients, this keeps the number background tasking watching specific
// paths in etcd to a minimum.
component_registry: component::Registry,
......
......@@ -81,7 +81,7 @@ where
.map_err(PipelineError::GenerateError);
// the prolouge is sent to the client to indicate that the stream is ready to receive data
// or if teh generate call failed, the error is sent to the client
// or if the generate call failed, the error is sent to the client
let mut stream = match stream {
Ok(stream) => {
tracing::trace!("Successfully generated response stream; sending prologue");
......
......@@ -16,7 +16,7 @@
pub mod events;
use super::{DistributedRuntime, Runtime};
/// A trait for objects taht proivde access to the [Runtime]
/// A trait for objects that proivde access to the [Runtime]
pub trait RuntimeProvider {
fn rt(&self) -> &Runtime;
}
......
......@@ -50,17 +50,15 @@ build-backend = "maturin"
# this is only to allow you to run codespell interactively
# this also overrides the grpc_generated folder, since it is generated
# TODO add skip files for generated code
# skip = "./.git,./.github,./src/grpc_generated"
skip = "./.git,./.github,./lib/llm/tests/data"
# ignore short words, and typename parameters like OffsetT
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
# ignore allowed words
# ignoring atleast to avoid testing::AtLeast from getting flagged
ignore-words-list = "atleast,afterall"
# Ignore data files and auto-generated files
skip = "./.git,./.github,./lib/llm/tests/data,*.lock,*.sum"
# ignore allowed words used in code
ignore-words-list = "afterall,ser,ende"
# use the 'clear' dictionary for unambiguous spelling mistakes
builtin = "clear"
# use custom dictionary in addition to the built-in one
dictionary = "./codespell.txt"
# disable warnings about binary files and wrong encoding
quiet-level = 3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment