test_s3.py 1.71 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import json
import os

import pytest

from magic_pdf.data.data_reader_writer import S3DataReader, S3DataWriter


@pytest.mark.skipif(
    os.getenv('S3_ACCESS_KEY', None) is None, reason='need s3 config!'
)
def test_multi_bucket_s3_reader_writer():
    """test multi bucket s3 reader writer must config s3 config in the
    environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export
    S3_SECRET_KEY=xxx export S3_ENDPOINT=xxx."""
    bucket = os.getenv('S3_BUCKET', '')
    ak = os.getenv('S3_ACCESS_KEY', '')
    sk = os.getenv('S3_SECRET_KEY', '')
    endpoint_url = os.getenv('S3_ENDPOINT', '')

    reader = S3DataReader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
    writer = S3DataWriter(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)

    bits = reader.read('meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl')

    assert bits == reader.read(
        f's3://{bucket}/meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
    )

    bits = reader.read(
        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl?bytes=566,713'
    )
    assert bits == reader.read_at(
        'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl', 566, 713
    )
    assert len(json.loads(bits)) > 0

    writer.write_string(
        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt', 'abc'
    )

    assert 'abc'.encode() == reader.read(
        'unittest/data/data_reader_writer/multi_bucket_s3_data/test01.txt'
    )

    writer.write(
        f'{bucket}/unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt',
        '123'.encode(),
    )

    assert '123'.encode() == reader.read(
        'unittest/data/data_reader_writer/multi_bucket_s3_data/test02.txt'
    )