"vscode:/vscode.git/clone" did not exist on "af9ee90e98e4089855f9aab7ae56da40e0af16e5"
Commit e1ae9aa0 authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

refactor: Simplify codespell configuration, allow contractions, add custom dictionary (#28)

parent e159e53f
...@@ -120,7 +120,7 @@ $global:copyright_results = @{ ...@@ -120,7 +120,7 @@ $global:copyright_results = @{
# === end common.ps1 extensions === # === end common.ps1 extensions ===
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml') $ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt')
write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']." write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models') $ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models')
write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']." write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
......
...@@ -41,9 +41,8 @@ repos: ...@@ -41,9 +41,8 @@ repos:
rev: v2.2.4 rev: v2.2.4
hooks: hooks:
- id: codespell - id: codespell
# Use pyproject.toml for codespell configuration
additional_dependencies: [tomli] additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$|.*lib/llm/tests/data.*)
# More details about these pre-commit hooks here: # More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html # https://pre-commit.com/hooks.html
......
dynamo->dynemo
dynmo->dynemo
...@@ -435,7 +435,7 @@ impl Decoder { ...@@ -435,7 +435,7 @@ impl Decoder {
// return only new bytes after pre_append .. offset+seq.len() // return only new bytes after pre_append .. offset+seq.len()
// example: seq = "ox", token = "boxes", return "b" // example: seq = "ox", token = "boxes", return "b"
// note: this changes when we start jailing tokens for partial matches // note: this changes when we start jailing tokens for partial matches
// on the suffix of teh jail with prefixes of the stop sequences // on the suffix of the jail with prefixes of the stop sequences
// //
// we might have returned a partial match, if so, then offset < pre_append // we might have returned a partial match, if so, then offset < pre_append
// in that case, we return the empty string // in that case, we return the empty string
......
...@@ -129,8 +129,8 @@ pub fn tojson(value: Value, kwargs: Kwargs) -> Result<Value, Error> { ...@@ -129,8 +129,8 @@ pub fn tojson(value: Value, kwargs: Kwargs) -> Result<Value, Error> {
let mut buf = Vec::new(); let mut buf = Vec::new();
let repeat = b" ".repeat(indent); let repeat = b" ".repeat(indent);
let formatter = serde_json::ser::PrettyFormatter::with_indent(&repeat); let formatter = serde_json::ser::PrettyFormatter::with_indent(&repeat);
let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter); let mut serializer = serde_json::Serializer::with_formatter(&mut buf, formatter);
value.serialize(&mut ser).unwrap(); value.serialize(&mut serializer).unwrap();
String::from_utf8(buf).map_err(|err| { String::from_utf8(buf).map_err(|err| {
Error::new(ErrorKind::BadSerialization, "cannot serialize to JSON").with_source(err) Error::new(ErrorKind::BadSerialization, "cannot serialize to JSON").with_source(err)
}) })
......
...@@ -79,7 +79,7 @@ pub struct DistributedRuntime { ...@@ -79,7 +79,7 @@ pub struct DistributedRuntime {
// local registry for components // local registry for components
// the registry allows us to use share runtime resources across instances of the same component object. // the registry allows us to use share runtime resources across instances of the same component object.
// take fo example two instances of a client to the same remote component. The registry allows us to use // take for example two instances of a client to the same remote component. The registry allows us to use
// a single endpoint watcher for both clients, this keeps the number background tasking watching specific // a single endpoint watcher for both clients, this keeps the number background tasking watching specific
// paths in etcd to a minimum. // paths in etcd to a minimum.
component_registry: component::Registry, component_registry: component::Registry,
......
...@@ -81,7 +81,7 @@ where ...@@ -81,7 +81,7 @@ where
.map_err(PipelineError::GenerateError); .map_err(PipelineError::GenerateError);
// the prolouge is sent to the client to indicate that the stream is ready to receive data // the prolouge is sent to the client to indicate that the stream is ready to receive data
// or if teh generate call failed, the error is sent to the client // or if the generate call failed, the error is sent to the client
let mut stream = match stream { let mut stream = match stream {
Ok(stream) => { Ok(stream) => {
tracing::trace!("Successfully generated response stream; sending prologue"); tracing::trace!("Successfully generated response stream; sending prologue");
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
pub mod events; pub mod events;
use super::{DistributedRuntime, Runtime}; use super::{DistributedRuntime, Runtime};
/// A trait for objects taht proivde access to the [Runtime] /// A trait for objects that proivde access to the [Runtime]
pub trait RuntimeProvider { pub trait RuntimeProvider {
fn rt(&self) -> &Runtime; fn rt(&self) -> &Runtime;
} }
......
...@@ -50,17 +50,15 @@ build-backend = "maturin" ...@@ -50,17 +50,15 @@ build-backend = "maturin"
# this is only to allow you to run codespell interactively # this is only to allow you to run codespell interactively
# this also overrides the grpc_generated folder, since it is generated # this also overrides the grpc_generated folder, since it is generated
# TODO add skip files for generated code # Ignore data files and auto-generated files
# skip = "./.git,./.github,./src/grpc_generated" skip = "./.git,./.github,./lib/llm/tests/data,*.lock,*.sum"
skip = "./.git,./.github,./lib/llm/tests/data"
# ignore allowed words used in code
# ignore short words, and typename parameters like OffsetT ignore-words-list = "afterall,ser,ende"
ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
# ignore allowed words
# ignoring atleast to avoid testing::AtLeast from getting flagged
ignore-words-list = "atleast,afterall"
# use the 'clear' dictionary for unambiguous spelling mistakes # use the 'clear' dictionary for unambiguous spelling mistakes
builtin = "clear" builtin = "clear"
# use custom dictionary in addition to the built-in one
dictionary = "./codespell.txt"
# disable warnings about binary files and wrong encoding # disable warnings about binary files and wrong encoding
quiet-level = 3 quiet-level = 3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment