# PyTorch 深度学习指南 ## 基础模板 ### 数据加载 ```python from torch.utils.data import Dataset, DataLoader class CustomDataset(Dataset): def __init__(self, data, labels, transform=None): self.data = data self.labels = labels self.transform = transform def __len__(self): return len(self.data) def __getitem__(self, idx): x = self.data[idx] y = self.labels[idx] if self.transform: x = self.transform(x) return {'x': x, 'y': y} # DataLoader train_loader = DataLoader( dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True ) ``` ### 模型定义 ```python import torch.nn as nn class MLP(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1): super().__init__() self.layers = nn.Sequential( nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, output_dim) ) def forward(self, x): return self.layers(x) class CNN(nn.Module): def __init__(self, num_classes): super().__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2), ) self.classifier = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(128, num_classes) ) def forward(self, x): x = self.features(x) return self.classifier(x) ``` ### 训练循环 ```python def train_epoch(model, loader, optimizer, criterion, device): model.train() total_loss = 0 correct = 0 total = 0 for batch in loader: x = batch['x'].to(device) y = batch['y'].to(device) optimizer.zero_grad() outputs = model(x) loss = criterion(outputs, y) loss.backward() # 梯度裁剪(可选) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() total_loss += loss.item() _, predicted = outputs.max(1) total += y.size(0) correct += predicted.eq(y).sum().item() return total_loss / len(loader), correct / total @torch.no_grad() def evaluate(model, loader, criterion, device): model.eval() total_loss = 0 correct = 0 total = 0 for batch in loader: x = batch['x'].to(device) y = batch['y'].to(device) outputs = model(x) loss = criterion(outputs, y) total_loss += loss.item() _, predicted = outputs.max(1) total += y.size(0) correct += predicted.eq(y).sum().item() return total_loss / len(loader), correct / total ``` ### 完整训练流程 ```python def train(config): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 模型 model = Model(config).to(device) # 优化器 optimizer = torch.optim.AdamW( model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'] ) # 学习率调度 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=config['epochs'] ) # 损失函数 criterion = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(config['epochs']): train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion, device) val_loss, val_acc = evaluate(model, val_loader, criterion, device) scheduler.step() print(f"Epoch {epoch+1}: train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, " f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}") if val_acc > best_acc: best_acc = val_acc torch.save(model.state_dict(), 'best_model.pt') return model ``` ## 常用技巧 ### 混合精度训练 ```python from torch.cuda.amp import autocast, GradScaler scaler = GradScaler() for batch in loader: optimizer.zero_grad() with autocast(): outputs = model(batch['x']) loss = criterion(outputs, batch['y']) scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() ``` ### 早停 ```python class EarlyStopping: def __init__(self, patience=5, min_delta=0): self.patience = patience self.min_delta = min_delta self.counter = 0 self.best_loss = None self.early_stop = False def __call__(self, val_loss): if self.best_loss is None: self.best_loss = val_loss elif val_loss > self.best_loss - self.min_delta: self.counter += 1 if self.counter >= self.patience: self.early_stop = True else: self.best_loss = val_loss self.counter = 0 ``` ### 模型保存/加载 ```python # 保存 torch.save({ 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epoch': epoch, 'loss': loss }, 'checkpoint.pt') # 加载 checkpoint = torch.load('checkpoint.pt') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) ``` ### 冻结层 ```python # 冻结所有层 for param in model.parameters(): param.requires_grad = False # 解冻最后几层 for param in model.classifier.parameters(): param.requires_grad = True ``` ## 分布式训练 ```python import torch.distributed as dist from torch.nn.parallel import DistributedDataParallel as DDP # 初始化 dist.init_process_group(backend='nccl') local_rank = int(os.environ['LOCAL_RANK']) torch.cuda.set_device(local_rank) # 模型 model = Model().cuda() model = DDP(model, device_ids=[local_rank]) # DataLoader sampler = torch.utils.data.distributed.DistributedSampler(dataset) loader = DataLoader(dataset, sampler=sampler, batch_size=32) ```