Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
72869ef8
"src/targets/vscode:/vscode.git/clone" did not exist on "d353641ddbad318ad51319547a0f4ff9e2f0b57b"
Unverified
Commit
72869ef8
authored
Dec 12, 2023
by
Li Zhang
Committed by
GitHub
Dec 12, 2023
Browse files
fix cache verification (#821)
parent
cfa80974
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
10 deletions
+6
-10
src/turbomind/models/llama/BlockManager.cc
src/turbomind/models/llama/BlockManager.cc
+1
-1
src/turbomind/models/llama/SequenceManager.cc
src/turbomind/models/llama/SequenceManager.cc
+5
-7
src/turbomind/models/llama/SequenceManager.h
src/turbomind/models/llama/SequenceManager.h
+0
-2
No files found.
src/turbomind/models/llama/BlockManager.cc
View file @
72869ef8
...
...
@@ -207,7 +207,7 @@ int BlockManager::Lock(const BlockIds& ids)
for
(
const
auto
&
i
:
ids
)
{
auto
&
b
=
blocks_
[
i
];
FT_CHECK
(
is_cached
(
b
));
FT_CHECK
_WITH_INFO
(
is_cached
(
b
),
to_string
(
b
));
if
(
++
b
.
use_count
==
1
)
{
lock
.
push_back
(
i
);
FT_CHECK
(
is_active
(
b
));
...
...
src/turbomind/models/llama/SequenceManager.cc
View file @
72869ef8
...
...
@@ -93,17 +93,16 @@ void SequenceManager::VerifyAndLockCached(const Sequences& sequences)
continue
;
}
FT_CHECK
(
seq
.
blocks
.
size
()
==
seq
.
block_unique_ids
.
size
());
if
(
need_verify_
)
{
const
int
count
=
block_manager_
->
Verify
(
seq
.
blocks
,
seq
.
block_unique_ids
);
seq
.
blocks
.
resize
(
count
);
seq
.
block_unique_ids
.
resize
(
count
);
}
// Verify cache blocks that may be invalidated
const
int
count
=
block_manager_
->
Verify
(
seq
.
blocks
,
seq
.
block_unique_ids
);
seq
.
blocks
.
resize
(
count
);
seq
.
block_unique_ids
.
resize
(
count
);
blocks
.
insert
(
blocks
.
end
(),
seq
.
blocks
.
begin
(),
seq
.
blocks
.
end
());
seq
.
cache_len
=
std
::
min
<
int
>
(
seq
.
cache_len
,
seq
.
blocks
.
size
()
*
block_seq_len_
);
seq
.
status
=
Sequence
::
kLocked
;
}
block_manager_
->
Lock
(
blocks
);
need_verify_
=
false
;
}
void
SequenceManager
::
CommitUnlockAndFree
()
...
...
@@ -435,7 +434,6 @@ auto SequenceManager::Materialize(Sequences sequences,
// evict cached blocks -> free
if
(
schedule
.
evict
)
{
block_manager_
->
Evict
(
schedule
.
evict
);
need_verify_
=
true
;
}
// allocate & assign blocks
...
...
src/turbomind/models/llama/SequenceManager.h
View file @
72869ef8
...
...
@@ -127,8 +127,6 @@ private:
int
rank_
;
size_t
val_offset_
{};
bool
need_verify_
{};
// Use `std::map` to avoid reference invalidation
std
::
map
<
uint64_t
,
Sequence
>
sequences_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment