import sys import subprocess import re import paddle def check_version(): try: version = paddle.__version__ version_pattern = r"(\d+)\.(\d+)\.(\d+)" match = re.match(version_pattern, version) major, minor, patch_version = map(int, match.groups()) return major, minor except ImportError: print("PaddlePaddle is not installed.") sys.exit(1) major, minor = check_version() # PaddlePaddle 2.4及以上版本 if major >= 3 or (major == 2 and minor >= 4): import os import paddle from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("gpt2") tokenizer = AutoTokenizer.from_pretrained("gpt2") prompt = "Once upon a time" input_ids = tokenizer(prompt, return_tensors="pd")["input_ids"] output_ids = model.generate( input_ids=input_ids, max_length=50, do_sample=True, top_k=50, top_p=0.95, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) output_ids = output_ids[0].numpy().flatten() generated_text = tokenizer.decode(output_ids, skip_special_tokens=True) print("Generated Text: ", generated_text) # PaddlePaddle 2.0到2.3版本 elif 2 <= major < 3 and 0 <= minor < 4: import paddle from paddlenlp.transformers import BertTokenizer, BertForSequenceClassification model_name = "bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained(model_name, num_classes=2) texts = ["PaddlePaddle is an awesome deep learning framework!", "I don't like the weather today."] input_ids = [] token_type_ids = [] for text in texts: encoded_inputs = tokenizer.encode(text, max_seq_len=128, pad_to_max_seq_len=True) input_ids.append(encoded_inputs['input_ids']) token_type_ids.append(encoded_inputs['token_type_ids']) input_ids = paddle.to_tensor(input_ids, dtype='int64') token_type_ids = paddle.to_tensor(token_type_ids, dtype='int64') with paddle.no_grad(): logits = model(input_ids, token_type_ids=token_type_ids) probs = paddle.nn.functional.softmax(logits, axis=-1) predictions = paddle.argmax(probs, axis=-1) for text, pred in zip(texts, predictions.numpy()): label = "Positive" if pred == 1 else "Negative" print(f"Text: {text}\nPrediction: {label}\n") # PaddlePaddle 2.0以下版本 else: import paddle.fluid as fluid import numpy as np from paddle.fluid.dygraph import Embedding, Linear from paddle.fluid.dygraph.base import to_variable import paddle.fluid.layers as layers class SimpleTextClassifier(fluid.dygraph.Layer): def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes): super(SimpleTextClassifier, self).__init__() self.embedding = Embedding(size=[vocab_size, embedding_dim]) self.gru_cell = layers.GRUCell(hidden_size=hidden_size, param_attr=fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()), bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(0.0))) self.fc = Linear(hidden_size, num_classes) def forward(self, x): x = self.embedding(x) batch_size, seq_len, _ = x.shape hidden = fluid.layers.zeros([batch_size, self.gru_cell.hidden_size], dtype='float32') for t in range(seq_len): step_input = x[:, t, :] hidden, _ = self.gru_cell(step_input, hidden) logits = self.fc(hidden) return logits def preprocess_text(text, vocab, seq_len=20): text_ids = [vocab.get(word, 0) for word in text.split()] text_ids = text_ids[:seq_len] + [0] * (seq_len - len(text_ids)) return np.array([text_ids], dtype='int64') def infer(text, model, vocab, label_list): with fluid.dygraph.guard(): model.eval() text_data = preprocess_text(text, vocab) text_var = to_variable(text_data) logits = model(text_var) prediction = layers.softmax(logits) predicted_class = np.argmax(prediction.numpy()) return label_list[predicted_class] vocab = {"hello": 1, "world": 2} label_list = ["positive", "negative"] vocab_size = len(vocab) + 1 embedding_dim = 128 hidden_size = 64 num_classes = len(label_list) with fluid.dygraph.guard(): model = SimpleTextClassifier(vocab_size, embedding_dim, hidden_size, num_classes) model.eval() text = "hello world" predicted_label = infer(text, model, vocab, label_list) print(f"Predicted label: {predicted_label}") print("finish")