gen_logger.py 3.4 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Tuple

from ..py_functional import is_package_available


if is_package_available("wandb"):
    import wandb  # type: ignore


if is_package_available("swanlab"):
    import swanlab  # type: ignore


@dataclass
class GenerationLogger(ABC):
    @abstractmethod
chenych's avatar
Update  
chenych committed
34
    def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None: ...
chenych's avatar
chenych committed
35
36
37
38


@dataclass
class ConsoleGenerationLogger(GenerationLogger):
chenych's avatar
Update  
chenych committed
39
40
41
    def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
        for inp, out, lab, score in samples:
            print(f"[prompt] {inp}\n[output] {out}\n[ground_truth] {lab}\n[score] {score}\n")
chenych's avatar
chenych committed
42
43
44
45


@dataclass
class WandbGenerationLogger(GenerationLogger):
chenych's avatar
Update  
chenych committed
46
    def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
chenych's avatar
chenych committed
47
48
        # Create column names for all samples
        columns = ["step"] + sum(
chenych's avatar
Update  
chenych committed
49
50
            [[f"input_{i + 1}", f"output_{i + 1}", f"label_{i + 1}", f"score_{i + 1}"] for i in range(len(samples))],
            [],
chenych's avatar
chenych committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        )

        if not hasattr(self, "validation_table"):
            # Initialize the table on first call
            self.validation_table = wandb.Table(columns=columns)

        # Create a new table with same columns and existing data
        # Workaround for https://github.com/wandb/wandb/issues/2981#issuecomment-1997445737
        new_table = wandb.Table(columns=columns, data=self.validation_table.data)

        # Add new row with all data
        row_data = [step]
        for sample in samples:
            row_data.extend(sample)

        new_table.add_data(*row_data)
        wandb.log({"val/generations": new_table}, step=step)
        self.validation_table = new_table


@dataclass
class SwanlabGenerationLogger(GenerationLogger):
chenych's avatar
Update  
chenych committed
73
    def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
chenych's avatar
chenych committed
74
75
        swanlab_text_list = []
        for i, sample in enumerate(samples):
chenych's avatar
Update  
chenych committed
76
77
78
            row_text = "\n\n---\n\n".join(
                (f"input: {sample[0]}", f"output: {sample[1]}", f"label: {sample[2]}", f"score: {sample[3]}")
            )
chenych's avatar
chenych committed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
            swanlab_text_list.append(swanlab.Text(row_text, caption=f"sample {i + 1}"))

        swanlab.log({"val/generations": swanlab_text_list}, step=step)


GEN_LOGGERS = {
    "console": ConsoleGenerationLogger,
    "wandb": WandbGenerationLogger,
    "swanlab": SwanlabGenerationLogger,
}


@dataclass
class AggregateGenerationsLogger:
    def __init__(self, loggers: List[str]):
        self.loggers: List[GenerationLogger] = []

        for logger in loggers:
            if logger in GEN_LOGGERS:
                self.loggers.append(GEN_LOGGERS[logger]())

chenych's avatar
Update  
chenych committed
100
    def log(self, samples: List[Tuple[str, str, str, float]], step: int) -> None:
chenych's avatar
chenych committed
101
102
        for logger in self.loggers:
            logger.log(samples, step)