Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
4af26ea9
Commit
4af26ea9
authored
Apr 29, 2024
by
赵小蒙
Browse files
update s3 path join func
parent
0a72b97d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
magic_pdf/rw/S3ReaderWriter.py
magic_pdf/rw/S3ReaderWriter.py
+4
-4
No files found.
magic_pdf/rw/S3ReaderWriter.py
View file @
4af26ea9
from
magic_pdf.rw.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.rw.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.libs.commons
import
parse_aws_param
,
parse_bucket_key
from
magic_pdf.libs.commons
import
parse_aws_param
,
parse_bucket_key
,
join_path
import
boto3
import
boto3
from
loguru
import
logger
from
loguru
import
logger
from
boto3.s3.transfer
import
TransferConfig
from
boto3.s3.transfer
import
TransferConfig
...
@@ -30,7 +30,7 @@ class S3ReaderWriter(AbsReaderWriter):
...
@@ -30,7 +30,7 @@ class S3ReaderWriter(AbsReaderWriter):
if
s3_relative_path
.
startswith
(
"s3://"
):
if
s3_relative_path
.
startswith
(
"s3://"
):
s3_path
=
s3_relative_path
s3_path
=
s3_relative_path
else
:
else
:
s3_path
=
os
.
path
.
join
(
self
.
path
,
s3_relative_path
)
s3_path
=
join_path
(
self
.
path
,
s3_relative_path
)
bucket_name
,
key
=
parse_bucket_key
(
s3_path
)
bucket_name
,
key
=
parse_bucket_key
(
s3_path
)
res
=
self
.
client
.
get_object
(
Bucket
=
bucket_name
,
Key
=
key
)
res
=
self
.
client
.
get_object
(
Bucket
=
bucket_name
,
Key
=
key
)
body
=
res
[
"Body"
].
read
()
body
=
res
[
"Body"
].
read
()
...
@@ -46,7 +46,7 @@ class S3ReaderWriter(AbsReaderWriter):
...
@@ -46,7 +46,7 @@ class S3ReaderWriter(AbsReaderWriter):
if
s3_relative_path
.
startswith
(
"s3://"
):
if
s3_relative_path
.
startswith
(
"s3://"
):
s3_path
=
s3_relative_path
s3_path
=
s3_relative_path
else
:
else
:
s3_path
=
os
.
path
.
join
(
self
.
path
,
s3_relative_path
)
s3_path
=
join_path
(
self
.
path
,
s3_relative_path
)
if
mode
==
MODE_TXT
:
if
mode
==
MODE_TXT
:
body
=
content
.
encode
(
encoding
)
# Encode text data as bytes
body
=
content
.
encode
(
encoding
)
# Encode text data as bytes
elif
mode
==
MODE_BIN
:
elif
mode
==
MODE_BIN
:
...
@@ -61,7 +61,7 @@ class S3ReaderWriter(AbsReaderWriter):
...
@@ -61,7 +61,7 @@ class S3ReaderWriter(AbsReaderWriter):
if
path
.
startswith
(
"s3://"
):
if
path
.
startswith
(
"s3://"
):
s3_path
=
path
s3_path
=
path
else
:
else
:
s3_path
=
os
.
path
.
join
(
self
.
path
,
path
)
s3_path
=
join_path
(
self
.
path
,
path
)
bucket_name
,
key
=
parse_bucket_key
(
s3_path
)
bucket_name
,
key
=
parse_bucket_key
(
s3_path
)
range_header
=
f
'bytes=
{
byte_start
}
-
{
byte_end
}
'
if
byte_end
else
f
'bytes=
{
byte_start
}
-'
range_header
=
f
'bytes=
{
byte_start
}
-
{
byte_end
}
'
if
byte_end
else
f
'bytes=
{
byte_start
}
-'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment