import torch
import torch.nn.purposeful as F
class DPOTrainer:
def __init__(self, mannequin, ref_model, beta=0.1, lr=1e-5):
self.mannequin = mannequin
self.ref_model = ref_model
self.beta = beta
self.optimizer = torch.optim.AdamW(self.mannequin.parameters(),…
