test_s3.py 1.66 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import json
import os

import pytest

from magic_pdf.data.io.s3 import S3Reader, S3Writer


@pytest.mark.skipif(
    os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
)
def test_s3_reader():
    """test s3 reader.

    must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
    export S3_ENDPOINT=xxx
    """

    bucket = os.getenv('S3_BUCKET', '')
    ak = os.getenv('S3_ACCESS_KEY', '')
    sk = os.getenv('S3_SECRET_KEY', '')
    endpoint_url = os.getenv('S3_ENDPOINT', '')
    reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
    bits = reader.read(
        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
    )
    assert len(bits) > 0

    bits = reader.read_at(
        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl',
        566,
        713,
    )
    assert len(json.loads(bits)) > 0


@pytest.mark.skipif(
    os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
)
def test_s3_writer():
    """test s3 reader.

    must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
    export S3_ENDPOINT=xxx
    """
    bucket = os.getenv('S3_BUCKET', '')
    ak = os.getenv('S3_ACCESS_KEY', '')
    sk = os.getenv('S3_SECRET_KEY', '')
    endpoint_url = os.getenv('S3_ENDPOINT', '')
    writer = S3Writer(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
    test_fn = 'unittest/io/test.jsonl'
    writer.write(test_fn, '123'.encode())
    reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
    bits = reader.read(test_fn)
    assert bits.decode() == '123'