几个函数:
dir()
:里面有什么。
help()
:怎么用。
torch.cuda.is_available
:返回True
表示cuda可用。
torch.__version__
:显示torch
版本信息。
dataset
一个.jpg
对应一个.txt
,注意:文件名应该相同。
例子:
from torch.utils.data import Dataset
from PIL import Image
import os
class MyData(Dataset):
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(self.root_dir, self.label_dir)
self.img_path = os.listdir(self.path)
def __getitem__(self, idx):
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = Image.open(img_item_path)
# img.show()
label = self.label_dir
return img, label
def __len__(self):
return len(self.img_path)
# 实例化
root_dir = "dataset/train"
ants_label_dir = "ants"
bees_label_dir = "bees"
ants_dataset = MyData(root_dir, ants_label_dir)
# 后续可用idx访问,如:ants_dataset[0]
bees_dataset = MyData(root_dir, bees_label_dir)
train_dataset = ants_dataset + bees_dataset
tensorboard(直观展示)
在terminal中打开tensorboard:tensorboard --logdir=logs --port=xxxx
,默认端口为localhost:6006
重新绘制的话,要删掉logs
或新建文件夹
例子:
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
writer = SummaryWriter("logs")
img_path = "xxx"
img_pil = Image.open(img_path)
img_array = np.array(img_pil)
# add_image()
writer.add_image("test", img_array, 1, dataformats='HWC')
# (tag, img_tensor, global_step) 注意:np默认(高度, 宽度, 3通道)
# add_scalar()
for i in range(100):
writer.add_scalar("y=2x", 2 * i, i) # (tag, y轴, x轴)
writer.close()
常用的transforms
tensor
有深度学习所需要的很多属性,是pytorch
的核心之一
小知识点:
ctrl + p
提示参数alt + enter
显示报错解决方案- 关注输入和输出的类型,关注函数的参数
- 不知道数据类型的时候,多用print(type())
totensor
from PIL import Image
from torchvision import transforms
import cv2
from torch.utils.tensorboard import SummaryWriter
img_path = "xxx"
pil_img = Image.open(img_path) # PIL Image
cv_img = cv2.imread(img_path) # numpy.ndarray
tensor_trans = transforms.ToTensor()
tensor_img_pil = tensor_trans(pil_img) # pil -> tensor
tensor_img_cv2 = tensor_trans(cv_img) # numpy.ndarray -> tensor
writer = SummaryWriter("logs")
writer.add_image("Tensor_img", tensor_img_pil)
writer.close()
normalize
trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
# (mean, std) rgb三通道,
# 公式:input[channel] = (input[channel] - mean[channel]) / std[channel]
img_norm = trans_norm(img_tensor)
resize
trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img) # 这里需要一个PIL Image,返回值也是PIL Image
compose
trans_compose = transforms.Compose([trans_resize, trans_totensor]) # 将两个方法合在一起,注意顺序
img_compose = trans_compose(img)
randomcrop
trans_random = transforms.RandomCrop((500, 1000))
trans_compose = transforms.Compose([trans_random, trans_totensor])
for i in range(10):
img_crop = trans_compose(img)
writer.add_image("RandomCrop", img_crop, i)
torchvision
import torchvision
from torch.utils.tensorboard import SummaryWriter
dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transform, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transform, download=True)
writer = SummaryWriter("p10")
for i in range(10):
img, target = test_set[i]
writer.add_image("test_set", img, i)
writer.close()
dataloader
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transforms=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
img, target = test_data[0]
print(img.shape)
print(target)
writer = SummaryWriter("dataloader")
step = 0
for data in test_loader:
imgs, targets = data
writer.add_images("test_data", imgs, step)
step += 1
writer.close()
神经网络:torch.nn
搭建例子:
from torch import nn # neural network
import torch
class Module(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
output = input + 1
return output
module = Module()
x = torch.tensor(1.0)
output = Module(x)
print(output)
卷积操作
输入图像 -> 卷积核(对应相乘再相加) -> 卷积后的输出
卷积操作是一种强大的数学工具,它通过将一个函数(或信号)与另一个函数(核)结合来提取信息、生成新的特征或实现特定的效果。
import torch
import torch.nn.functional as F
input = torch.tensor([[]]) # 一个2维5 * 5数组
kernel = torch.tensor([[]]) # 一个2维3 * 3数组
input = torch.reshape(input, (1, 1, 5, 5)) # 函数要求有4个参数
kernel = torch.reshape(kernel, (1, 1, 3, 3))
output = F.conv2d(input, kernel, stride=1, padding=1)
print(output)
卷积层
注意:只用设置kernel_size
,而kernel
的具体数据是算法自适应的。
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
# 省略import package
dataset = torchvisin.datasets.CIFAR10("../data", train=False, transforms=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self, x):
x = self.conv1(x)
return x
Module = Module()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = Module(imgs)
writer.add_images("input", imgs, step)
output = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images("output", output, step)
step += 1
writer.close()
最大池化
torch.nn.MaxPool2d(kernel_size, stride, padding, dilation, ceil_mode)
在kernel
里面选最大值
目的:保留数据特征,减小数据量,如:1080p -> 720p
# 省略import package
input = torch.tensor([[]], dtype=torch.float32) # 输入一个2维数组
input = torch.reshape(input, (-1, 1, 5, 5))
class Module(nn.Module):
def __init__(self):
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
Module = Module()
output = Module(input)
dataset = torchvisin.datasets.CIFAR10("../data", train=False, transforms=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=64)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
Module = Module()
writer = SummaryWriter("logs_maxpool")
step = 0
for data in dataloader:
imgs, tags = data
writer.add_images("input", imgs, step)
ouput = Module(input)
writer.add_images("output", output, step)
step += 1
writer.close()
非线性激活
ReLU
和Sigmod
input = torch.tensor([[]])
input = torch.reshape(input, (-1, 1, 2, 2))
dataset = torchvision.datasets.CIFAR10("../data", train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.relu1 = ReLU()
self.sigmoid1 = Sigmoid()
def forward(self, input):
output = self.sigmoid1(input)
return output
Module = Module()
writer = SummaryWriter("../logs_sigmoid")
step = 0
for data in dataloader:
imgs, tags = data
writer.add_images("input", imgs, step)
output = Module(input)
writer.add_images("output", output, step)
step += 1
writer.close()
线性层
dataset = torchvision.datasets.CIFAR10("../data", train=False, download=True, transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.linear1 = Linear(196608, 10)
def forward(self, input):
output = self.linear1(input)
return output
Module = Module()
for data in dataloader:
imgs, tags = data
output = torch.flatten(imgs) # 展平成1维
output = Module(output)
nn搭建例子
Sequential
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2), # 这里要计算一下padding和stride
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(), # 需要看一下展成有多大再经过Linear
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model1(input)
return output
Module = Module()
input = torch.ones((64, 3, 32, 32))
output = Module(input)
writer = SummaryWriter("logs_seq")
writer.add_graph(Module, input)
writer.close()
损失函数与反向传播
L1Loss
:mean absolute error
MSELoss
:mean squared error
CrossEntropyLoss
: the cross entropy loss between input logits and target,分类问题
损失函数作用:
- 计算实际输出和目标之间的差距
- 为我们更新输出提供一定的依据(反向传播),grad
.backword()
:反向传播
inputs = torch.tensor([1, 2, 3], dtype=torch.float32)
targets = torch.tensor([1, 2, 5], dtype=torch.float32)
inputs = torch.reshape(inputs, (1, 1, 1, 3))
targets = torch.reshape(targets, (1, 1, 1, 3))
l1loss = L1Loss()
mseloss = MSELoss()
res_l1 = l1loss(inputs, targets)
res_mse = mseloss(inputs, targets)
x = torch.tensor([0.1, 0.2, 0.3])
y = torch.tensor([1])
x = torch.reshape(x, (1, 3))
crossloss = CrossEntropyLoss()
res_cross = crossloss(x, y) # x是预测概率,y是实际需要预测的下标
例子:CIFAR10分类任务
dataset = torchvision.datasets.CIFAR10("../data", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2), # 这里要计算一下padding和stride
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(), # 需要看一下展成有多大再经过Linear
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model1(input)
return output
Module = Module()
loss = CrossEntropyLoss()
for data in dataloader:
imgs, targets = data
outputs = Module(imgs)
res_loss = loss(outputs, targets)
res_loss.backward()
优化器
torch.optim(params, learning_rate)
dataset = torchvision.datasets.CIFAR10("../data", train=False, transform=torchvision.transforms.ToTensor(), download=True)
dataloader = DataLoader(dataset, batch_size=1)
class Module(nn.Module):
def __init__(self):
super().__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2), # 这里要计算一下padding和stride
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(), # 需要看一下展成有多大再经过Linear
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model1(input)
return output
Module = Module()
optim = torch.optim.SGD(Module.parameters(), lr=0.01)
loss = CrossEntropyLoss()
for epoch in range(20):
run_loss = 0.0 # 每一轮的整体loss
# 一轮学习
for data in dataloader:
imgs, targets = data
outputs = Module(imgs)
res_loss = loss(outputs, targets)
optim.zero_grad()
res_loss.backward()
optim.step()
run_loss += res_loss
print(run_loss)
网络模型API
PyTorch Domains
# train_data = torchvision.datasets.ImageNet("../data", split="train", download=True, transform=torchvision.transforms.ToTensor()) # 若不能公开访问下载,需要手动寻找下载
vgg16_false = torchvision.models.vgg16(pretrained=False) # 初始化的参数
vgg16_true = torchvision.models.vgg16(pretrained=True) # 参数在数据集里训练好了
train_data = torchvision.datasets.CIFAR10("../data", train=True, transform=torchvision.transforms.ToTensor(), download=True)
vgg16_true.add_module('add_linear', nn.Linear(1000, 10))
vgg16_false.classifer[6] = nn.Linear(4096, 10)
模型的保存和加载
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式1:模型结构 + 模型参数,需要import class
torch.save(vgg16, "vgg16_method1.pth")
model = torch.load("vgg16_method1.pth")
# 保存方式2:模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
完整的模型训练套路
train_data = torchvision.datasets.CIFAR10(root='../data', train=True, transform=torchvision.transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10(root='../data', train=False, transform=torchvision.transforms.ToTensor())
train_data_size = len(train_data)
test_data_size = len(test_data)
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class Module(nn.Module):
def __init__(self):
super(Module, self)__init__()
self.model = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)
def forward(self, input):
output = self.model(input)
return output
Module = Module()
if torch.cuda.is_available():
Module = Module.cuda()
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.cuda()
learning_rate = 1e-2
optim = torch.optim.SGD(Module.parameters(), lr=learning_rate)
total_train_step = 0
total_test_step = 0
epoch = 10
writer = SummaryWriter("../logs_train")
for i in range(epoch):
print("-----第 {} 轮训练开始-----".format(i + 1))
Module.train()
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = Module(imgs)
loss = loss_fn(outputs, targets)
optim.zero_grad()
loss.backward()
optim.step()
total_train_step += 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
# 测试步骤开始
Module.eval()
total_test_loss = 0
total_acc = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = Module(imgs)
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
acc = (outputs.argmax(1) == targers).sum() # 0是纵向,1是横向
total_acc += acc
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_acc / test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_acc", total_acc / test_data_size, total_test_step)
total_test_step += 1
torch.save(Module, "Module_{}.pth".format(i))
# torch.save(Module.state_dict(), "Module_{}.pth".format(i))
print("模型已保存")
writer.close()
利用GPU训练
在terminal
中使用nvidia-smi
:显示GPU信息
使用Google Colab
对网络模型、数据、损失函数使用
import time
start_time = time.time()
if torch.cuda.is_available:
xxx = xxx.cuda()
end_time = time.time()
print(end_time - start_time) # 可以展示使用GPU训练的时间改进
或者
device1 = torch.device('cpu')
device2 = torch.device('cuda:0') # cuda:0, cuda:1 第几个显卡
device3 = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # !!!
Module = Module.to(device1)
loss_fn = loss_fn.to(device1)
imgs = imgs.to(device1)
targets = targets.to(device1)
完整的模型验证套路
利用已经训练好的模型,然后给它提供输入
img_path = "../imgs/dog.png"
img = Image.open(img_path)
img = img.convert('RGB') # png是RGB + 透明度
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), torchvision.transforms.ToTensor()])
img = transform(img)
model = torch.load('Module_0.pth', map_location=torch.device('cpu'))
img = torch.reshape(img, (1, 3, 32, 32)) # 要注意batch_size会多一个维度
model.eval()
with torch.no_grad():
output = model(img)
print(output.argmax(1))
GitHub开源项目
看参数时,将required=xxx
改为default=xxx