Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
97a37548
Commit
97a37548
authored
Apr 25, 2020
by
Julien Chaumond
Browse files
rm boto3 dependency
parent
4e817ff4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
75 deletions
+11
-75
.github/workflows/github-torch-hub.yml
.github/workflows/github-torch-hub.yml
+1
-1
hubconf.py
hubconf.py
+1
-1
setup.py
setup.py
+0
-2
src/transformers/file_utils.py
src/transformers/file_utils.py
+9
-71
No files found.
.github/workflows/github-torch-hub.yml
View file @
97a37548
...
@@ -21,7 +21,7 @@ jobs:
...
@@ -21,7 +21,7 @@ jobs:
-
name
:
Install dependencies
-
name
:
Install dependencies
run
:
|
run
:
|
pip install torch
pip install torch
pip install numpy tokenizers
boto3
filelock requests tqdm regex sentencepiece sacremoses
pip install numpy tokenizers filelock requests tqdm regex sentencepiece sacremoses
-
name
:
Torch hub list
-
name
:
Torch hub list
run
:
|
run
:
|
...
...
hubconf.py
View file @
97a37548
...
@@ -16,7 +16,7 @@ from transformers import (
...
@@ -16,7 +16,7 @@ from transformers import (
)
)
dependencies
=
[
"torch"
,
"numpy"
,
"tokenizers"
,
"boto3"
,
"filelock"
,
"requests"
,
"tqdm"
,
"regex"
,
"sentencepiece"
,
"sacremoses"
]
dependencies
=
[
"torch"
,
"numpy"
,
"tokenizers"
,
"filelock"
,
"requests"
,
"tqdm"
,
"regex"
,
"sentencepiece"
,
"sacremoses"
]
@
add_start_docstrings
(
AutoConfig
.
__doc__
)
@
add_start_docstrings
(
AutoConfig
.
__doc__
)
...
...
setup.py
View file @
97a37548
...
@@ -99,8 +99,6 @@ setup(
...
@@ -99,8 +99,6 @@ setup(
"tokenizers == 0.7.0"
,
"tokenizers == 0.7.0"
,
# dataclasses for Python versions that don't have it
# dataclasses for Python versions that don't have it
"dataclasses;python_version<'3.7'"
,
"dataclasses;python_version<'3.7'"
,
# accessing files from S3 directly
"boto3"
,
# filesystem locks e.g. to prevent parallel downloads
# filesystem locks e.g. to prevent parallel downloads
"filelock"
,
"filelock"
,
# for downloading models over HTTPS
# for downloading models over HTTPS
...
...
src/transformers/file_utils.py
View file @
97a37548
...
@@ -19,10 +19,7 @@ from typing import Optional
...
@@ -19,10 +19,7 @@ from typing import Optional
from
urllib.parse
import
urlparse
from
urllib.parse
import
urlparse
from
zipfile
import
ZipFile
,
is_zipfile
from
zipfile
import
ZipFile
,
is_zipfile
import
boto3
import
requests
import
requests
from
botocore.config
import
Config
from
botocore.exceptions
import
ClientError
from
filelock
import
FileLock
from
filelock
import
FileLock
from
tqdm.auto
import
tqdm
from
tqdm.auto
import
tqdm
...
@@ -144,7 +141,7 @@ def add_end_docstrings(*docstr):
...
@@ -144,7 +141,7 @@ def add_end_docstrings(*docstr):
def
is_remote_url
(
url_or_filename
):
def
is_remote_url
(
url_or_filename
):
parsed
=
urlparse
(
url_or_filename
)
parsed
=
urlparse
(
url_or_filename
)
return
parsed
.
scheme
in
(
"http"
,
"https"
,
"s3"
)
return
parsed
.
scheme
in
(
"http"
,
"https"
)
def
hf_bucket_url
(
identifier
,
postfix
=
None
,
cdn
=
False
)
->
str
:
def
hf_bucket_url
(
identifier
,
postfix
=
None
,
cdn
=
False
)
->
str
:
...
@@ -297,55 +294,6 @@ def cached_path(
...
@@ -297,55 +294,6 @@ def cached_path(
return
output_path
return
output_path
def
split_s3_path
(
url
):
"""Split a full s3 path into the bucket name and path."""
parsed
=
urlparse
(
url
)
if
not
parsed
.
netloc
or
not
parsed
.
path
:
raise
ValueError
(
"bad s3 path {}"
.
format
(
url
))
bucket_name
=
parsed
.
netloc
s3_path
=
parsed
.
path
# Remove '/' at beginning of path.
if
s3_path
.
startswith
(
"/"
):
s3_path
=
s3_path
[
1
:]
return
bucket_name
,
s3_path
def
s3_request
(
func
):
"""
Wrapper function for s3 requests in order to create more helpful error
messages.
"""
@
wraps
(
func
)
def
wrapper
(
url
,
*
args
,
**
kwargs
):
try
:
return
func
(
url
,
*
args
,
**
kwargs
)
except
ClientError
as
exc
:
if
int
(
exc
.
response
[
"Error"
][
"Code"
])
==
404
:
raise
EnvironmentError
(
"file {} not found"
.
format
(
url
))
else
:
raise
return
wrapper
@
s3_request
def
s3_etag
(
url
,
proxies
=
None
):
"""Check ETag on S3 object."""
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
))
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_object
=
s3_resource
.
Object
(
bucket_name
,
s3_path
)
return
s3_object
.
e_tag
@
s3_request
def
s3_get
(
url
,
temp_file
,
proxies
=
None
):
"""Pull a file directly from S3."""
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
))
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_resource
.
Bucket
(
bucket_name
).
download_fileobj
(
s3_path
,
temp_file
)
def
http_get
(
url
,
temp_file
,
proxies
=
None
,
resume_size
=
0
,
user_agent
=
None
):
def
http_get
(
url
,
temp_file
,
proxies
=
None
,
resume_size
=
0
,
user_agent
=
None
):
ua
=
"transformers/{}; python/{}"
.
format
(
__version__
,
sys
.
version
.
split
()[
0
])
ua
=
"transformers/{}; python/{}"
.
format
(
__version__
,
sys
.
version
.
split
()[
0
])
if
is_torch_available
():
if
is_torch_available
():
...
@@ -406,17 +354,13 @@ def get_from_cache(
...
@@ -406,17 +354,13 @@ def get_from_cache(
etag
=
None
etag
=
None
if
not
local_files_only
:
if
not
local_files_only
:
# Get eTag to add to filename, if it exists.
try
:
if
url
.
startswith
(
"s3://"
):
response
=
requests
.
head
(
url
,
allow_redirects
=
True
,
proxies
=
proxies
,
timeout
=
etag_timeout
)
etag
=
s3_etag
(
url
,
proxies
=
proxies
)
if
response
.
status_code
==
200
:
else
:
etag
=
response
.
headers
.
get
(
"ETag"
)
try
:
except
(
EnvironmentError
,
requests
.
exceptions
.
Timeout
):
response
=
requests
.
head
(
url
,
allow_redirects
=
True
,
proxies
=
proxies
,
timeout
=
etag_timeout
)
# etag is already None
if
response
.
status_code
==
200
:
pass
etag
=
response
.
headers
.
get
(
"ETag"
)
except
(
EnvironmentError
,
requests
.
exceptions
.
Timeout
):
# etag is already None
pass
filename
=
url_to_filename
(
url
,
etag
)
filename
=
url_to_filename
(
url
,
etag
)
...
@@ -483,13 +427,7 @@ def get_from_cache(
...
@@ -483,13 +427,7 @@ def get_from_cache(
with
temp_file_manager
()
as
temp_file
:
with
temp_file_manager
()
as
temp_file
:
logger
.
info
(
"%s not found in cache or force_download set to True, downloading to %s"
,
url
,
temp_file
.
name
)
logger
.
info
(
"%s not found in cache or force_download set to True, downloading to %s"
,
url
,
temp_file
.
name
)
# GET file object
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
if
url
.
startswith
(
"s3://"
):
if
resume_download
:
logger
.
warn
(
'Warning: resumable downloads are not implemented for "s3://" urls'
)
s3_get
(
url
,
temp_file
,
proxies
=
proxies
)
else
:
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
os
.
replace
(
temp_file
.
name
,
cache_path
)
os
.
replace
(
temp_file
.
name
,
cache_path
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment