From 627c95d48b98f58c3320c562c98908629d6c4a45 Mon Sep 17 00:00:00 2001 From: BaofengZan Date: Fri, 7 Aug 2020 17:40:15 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=8F=8D=E5=8D=B7?= =?UTF-8?q?=E7=A7=AF=E4=B8=BAupsample=E6=96=B9=E4=BE=BF=E4=BD=BF=E7=94=A8t?= =?UTF-8?q?ensorrt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/head/DBHead.py | 21 +++++++++++---- models/model.py | 4 ++- models/neck/FPN.py | 36 ++++++++++++++++++-------- tools/predict.py | 60 ++++++++++++++++++++++++++++++++++++++----- tools/train.py | 2 +- 5 files changed, 100 insertions(+), 23 deletions(-) diff --git a/models/head/DBHead.py b/models/head/DBHead.py index c9986bb..c967e72 100644 --- a/models/head/DBHead.py +++ b/models/head/DBHead.py @@ -3,19 +3,27 @@ # @Author : zhoujun import torch from torch import nn +import torch.nn.functional as F class DBHead(nn.Module): - def __init__(self, in_channels, out_channels, k = 50): + def __init__(self, in_channels, out_channels, k = 50): # debug ==> 256 2 k=50 super().__init__() self.k = k self.binarize = nn.Sequential( nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), - nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), + # nn.Upsample(scale_factor=2, mode='nearest'), + # ConvTranspose2d (self, in_channels, out_channels, kernel_size, stride=1, + # padding=0, output_padding=0, groups=1, bias=True, + # dilation=1, padding_mode='zeros'): + #nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍 + nn.Upsample(scale_factor=2, mode='nearest'), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), - nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), + #nn.ConvTranspose2d(in_channels // 4, 1, 2, 2), + nn.Upsample(scale_factor=2, mode='nearest'), + nn.Conv2d(in_channels//4, 1, 3, padding=1), # 311 大小不变 nn.Sigmoid()) self.binarize.apply(self.weights_init) @@ -41,9 +49,10 @@ def weights_init(self, m): m.bias.data.fill_(1e-4) def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False): - in_channels = inner_channels + in_channels = inner_channels # 256 if serial: in_channels += 1 + self.thresh = nn.Sequential( nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias), nn.BatchNorm2d(inner_channels // 4), @@ -67,7 +76,9 @@ def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False): module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True)) return nn.Sequential(module_list) else: - return nn.ConvTranspose2d(in_channels, out_channels, 2, 2) + #return nn.ConvTranspose2d(in_channels, out_channels, 2, 2) + return nn.Sequential(nn.Upsample(scale_factor=2, mode='nearest'), + nn.Conv2d(in_channels, out_channels, 3, 1, 1)) def step_function(self, x, y): return torch.reciprocal(1 + torch.exp(-self.k * (x - y))) diff --git a/models/model.py b/models/model.py index 9e67f54..4173548 100644 --- a/models/model.py +++ b/models/model.py @@ -31,7 +31,9 @@ def forward(self, x): backbone_out = self.backbone(x) neck_out = self.neck(backbone_out) y = self.head(neck_out) - y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) + # y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True) + # 有点区别,就是F中的是一个函数,在nn.sequential()中,不能作为一个层,而nn.upsample中的则可以 + y = F.interpolate(y, size=(H, W)) # 使用最近邻训练的可以用TRTAPI实现 return y diff --git a/models/neck/FPN.py b/models/neck/FPN.py index 0d30f5e..96fe9f1 100644 --- a/models/neck/FPN.py +++ b/models/neck/FPN.py @@ -11,23 +11,25 @@ class FPN(nn.Module): def __init__(self, in_channels, inner_channels=256, **kwargs): """ - :param in_channels: 基础网络输出的维度 + :param in_channels: 基础网络输出的维度 [64, 128, 256, 512] :param kwargs: """ super().__init__() inplace = True self.conv_out = inner_channels - inner_channels = inner_channels // 4 + inner_channels = inner_channels // 4 # 256 // 4 = 64 # reduce layers self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace) self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace) self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace) self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace) # Smooth layers - self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) + self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) # 311 self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) + #self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + self.conv = nn.Sequential( nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1), nn.BatchNorm2d(self.conv_out), @@ -39,11 +41,22 @@ def forward(self, x): c2, c3, c4, c5 = x # Top-down p5 = self.reduce_conv_c5(c5) - p4 = self._upsample_add(p5, self.reduce_conv_c4(c4)) + #p4 = self._upsample_add(p5, self.reduce_conv_c4(c4)) + c4_1 = self.reduce_conv_c4(c4) + p4_1 = F.upsample(p5, size=c4_1.size()[2:]) + p4 = p4_1 + c4_1 + p4 = self.smooth_p4(p4) - p3 = self._upsample_add(p4, self.reduce_conv_c3(c3)) + + #p3 = self._upsample_add(p4, self.reduce_conv_c3(c3)) + c3_1 = self.reduce_conv_c3(c3) + p3_1 = F.upsample(p4, size=c3_1.size()[2:]) + p3 = p3_1 + c3_1 p3 = self.smooth_p3(p3) - p2 = self._upsample_add(p3, self.reduce_conv_c2(c2)) + #p2 = self._upsample_add(p3, self.reduce_conv_c2(c2)) + c2_1 = self.reduce_conv_c2(c2) + p2_1 = F.upsample(p3, size=c2_1.size()[2:]) + p2 = p2_1 + c2_1 p2 = self.smooth_p2(p2) x = self._upsample_cat(p2, p3, p4, p5) @@ -51,11 +64,14 @@ def forward(self, x): return x def _upsample_add(self, x, y): - return F.interpolate(x, size=y.size()[2:]) + y + return F.upsample(x, size=y.size()[2:]) + y def _upsample_cat(self, p2, p3, p4, p5): h, w = p2.size()[2:] - p3 = F.interpolate(p3, size=(h, w)) - p4 = F.interpolate(p4, size=(h, w)) - p5 = F.interpolate(p5, size=(h, w)) + #p3 = F.interpolate(p3, size=(h, w)) + p3 = F.upsample(p3, size=(h, w)) + #p4 = F.interpolate(p4, size=(h, w)) + p4 = F.upsample(p4, size=(h, w)) + #p5 = F.interpolate(p5, size=(h, w)) + p5 = F.upsample(p5, size=(h, w)) return torch.cat([p2, p3, p4, p5], dim=1) diff --git a/tools/predict.py b/tools/predict.py index c58d262..ffbe0e6 100644 --- a/tools/predict.py +++ b/tools/predict.py @@ -5,6 +5,7 @@ import os import sys import pathlib +import struct __dir__ = pathlib.Path(os.path.abspath(__file__)) sys.path.append(str(__dir__)) sys.path.append(str(__dir__.parent.parent)) @@ -35,7 +36,7 @@ def resize_image(img, short_size): class Pytorch_model: - def __init__(self, model_path, post_p_thre=0.7, gpu_id=None): + def __init__(self, model_path, post_p_thre=0.7, gpu_id=None, save_wts=False): ''' 初始化pytorch模型 :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件) @@ -59,6 +60,19 @@ def __init__(self, model_path, post_p_thre=0.7, gpu_id=None): self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.model.eval() + # 保存wts + # save wts + if save_wts: + f = open('DBNet.wts', 'w') + f.write('{}\n'.format(len(self.model.state_dict().keys()))) + for k, v in self.model.state_dict().items(): + vr = v.reshape(-1).cpu().numpy() + f.write('{} {} '.format(k, len(vr))) + for vv in vr: + f.write(' ') + f.write(struct.pack('>f', float(vv)).hex()) + f.write('\n') + self.transform = [] for t in config['dataset']['train']['dataset']['args']['transforms']: @@ -75,10 +89,12 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024 ''' assert os.path.exists(img_path), 'file is not exists' img = cv2.imread(img_path, 1 if self.img_mode != 'GRAY' else 0) + #img = cv2.imread("E:\\Datasets\\ICDAR2015\\test\\img\\img_10.jpg", 1 if self.img_mode != 'GRAY' else 0) if self.img_mode == 'RGB': img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w = img.shape[:2] - img = resize_image(img, short_size) + #img = resize_image(img, short_size) + img = cv2.resize(img, (640, 640)) # 将图片由(w,h)变为(1,img_channel,h,w) tensor = self.transform(img) tensor = tensor.unsqueeze_(0) @@ -88,8 +104,10 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024 with torch.no_grad(): if str(self.device).__contains__('cuda'): torch.cuda.synchronize(self.device) - start = time.time() + start = time.clock() preds = self.model(tensor) + t = time.clock() - start + print("infer time: (ms) ", t*1000) if str(self.device).__contains__('cuda'): torch.cuda.synchronize(self.device) box_list, score_list = self.post_process(batch, preds, is_output_polygon=is_output_polygon) @@ -107,6 +125,28 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024 t = time.time() - start return preds[0, 0, :, :].detach().cpu().numpy(), box_list, score_list, t + def export_onnx(self): + img = torch.zeros((1, 3, 640, 640)).cuda() # image size(1,3,320,192) iDetection + # tensor = self.transform(img) + # tensor = tensor.unsqueeze_(0) + #tensor = img.to(self.device) + y = self.model(img) + try: + import onnx + + print('\nStarting ONNX export with onnx %s...' % onnx.__version__) + f = ("mode.onnx") # filename + #self.model.fuse() # only for ONNX + torch.onnx.export(self.model, img, f, verbose=False, opset_version=12, input_names=['images'], + output_names=['classes', 'boxes'] if y is None else ['output']) + + # Checks + onnx_model = onnx.load(f) # load onnx model + onnx.checker.check_model(onnx_model) # check onnx model + print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model + print('ONNX export success, saved as %s' % f) + except Exception as e: + print('ONNX export failure: %s' % e) def save_depoly(model, input, save_path): traced_script_model = torch.jit.trace(model, input) @@ -116,13 +156,15 @@ def save_depoly(model, input, save_path): def init_args(): import argparse parser = argparse.ArgumentParser(description='DBNet.pytorch') - parser.add_argument('--model_path', default=r'model_best.pth', type=str) - parser.add_argument('--input_folder', default='./test/input', type=str, help='img path for predict') + parser.add_argument('--model_path', default=r'E:\LearningCodes\DBNET\DBNet.pytorch\model_best.pth', type=str) + parser.add_argument('--input_folder', default=r'E:\Datasets\ICDAR2015\test\img', type=str, help='img path for predict') parser.add_argument('--output_folder', default='./test/output', type=str, help='img path for output') parser.add_argument('--thre', default=0.3,type=float, help='the thresh of post_processing') parser.add_argument('--polygon', action='store_true', help='output polygon or box') parser.add_argument('--show', action='store_true', help='show result') parser.add_argument('--save_resut', action='store_true', help='save box and score to txt file') + parser.add_argument('--save_wts', default=False, help='save box and score to txt file') + parser.add_argument('--onnx', default=False, help='save box and score to txt file') args = parser.parse_args() return args @@ -137,9 +179,15 @@ def init_args(): print(args) os.environ['CUDA_VISIBLE_DEVICES'] = str('0') # 初始化网络 - model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0) + model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0, save_wts=args.save_wts) + if(args.onnx): + model.export_onnx() + if(args.save_wts): + exit(0) + img_folder = pathlib.Path(args.input_folder) for img_path in tqdm(get_file_list(args.input_folder, p_postfix=['.jpg'])): + preds, boxes_list, score_list, t = model.predict(img_path, is_output_polygon=args.polygon) img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list) if args.show: diff --git a/tools/train.py b/tools/train.py index 697c216..71c92e9 100644 --- a/tools/train.py +++ b/tools/train.py @@ -6,7 +6,7 @@ import argparse import os - +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import anyconfig From 00fe9b3d2a363cfff8086de8e382abfba5d6ecbd Mon Sep 17 00:00:00 2001 From: BaofengZan Date: Fri, 7 Aug 2020 18:10:56 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.MD | 124 +++++++----------------------------------- models/head/DBHead.py | 1 - 2 files changed, 21 insertions(+), 104 deletions(-) diff --git a/README.MD b/README.MD index 6e591d9..986b5ce 100644 --- a/README.MD +++ b/README.MD @@ -1,128 +1,46 @@ # Real-time Scene Text Detection with Differentiable Binarization -**note**: some code is inherited from [MhLiao/DB](https://github.com/MhLiao/DB) +**note**: 原始版本 [DBNet.pytorch](https://github.com/WenmuZhou/DBNet.pytorch) [中文解读](https://zhuanlan.zhihu.com/p/94677957) ![network](imgs/paper/db.jpg) -## update -2020-06-07: 添加灰度图训练,训练灰度图时需要在配置里移除`dataset.args.transforms.Normalize` +## 安装环境 -## Install Using Conda -``` -conda env create -f environment.yml -git clone https://github.com/WenmuZhou/DBNet.pytorch.git -cd DBNet.pytorch/ -``` +请参考原始版本的[Readme](https://github.com/WenmuZhou/DBNet.pytorch/blob/master/README.MD) -or -## Install Manually -```bash -conda create -n dbnet python=3.6 -conda activate dbnet -conda install ipython pip -# python dependencies -pip install -r requirement.txt +## 修改之处 -# install PyTorch with cuda-10.1 -# Note that you can change the cudatoolkit version to the version you want. -conda install pytorch torchvision cudatoolkit=10.1 -c pytorch - -# clone repo -git clone https://github.com/WenmuZhou/DBNet.pytorch.git -cd DBNet.pytorch/ +本repo为了可以使用tensorRT加速,将反卷积操作全部改为upsample。比如 +```python + # 原始版本 +nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍 +# 修改版本 +nn.Upsample(scale_factor=2, mode='nearest'), ``` -## Requirements -* pytorch 1.4+ -* torchvision 0.5+ -* gcc 4.9+ - -## Download +更多的修改,请看代码: -TBD - -## Data Preparation - -Training data: prepare a text `train.txt` in the following format, use '\t' as a separator ``` -./datasets/train/img/001.jpg ./datasets/train/gt/001.txt +models/head/DBHead.py +models/model.py +models/neck/FPN.py ``` -Validation data: prepare a text `test.txt` in the following format, use '\t' as a separator -``` -./datasets/test/img/001.jpg ./datasets/test/gt/001.txt -``` -- Store images in the `img` folder -- Store groundtruth in the `gt` folder - -The groundtruth can be `.txt` files, with the following format: -``` -x1, y1, x2, y2, x3, y3, x4, y4, annotation -``` - - -## Train -1. config the `dataset['train']['dataset'['data_path']'`,`dataset['validate']['dataset'['data_path']`in [config/icdar2015_resnet18_fpn_DBhead_polyLR.yaml](cconfig/icdar2015_resnet18_fpn_DBhead_polyLR.yaml) -* . single gpu train -```bash -bash singlel_gpu_train.sh -``` -* . Multi-gpu training -```bash -bash multi_gpu_train.sh -``` -## Test - -[eval.py](tools/eval.py) is used to test model on test dataset - -1. config `model_path` in [eval.sh](eval.sh) -2. use following script to test -```bash -bash eval.sh -``` - -## Predict -[predict.py](tools/predict.py) Can be used to inference on all images in a folder -1. config `model_path`,`input_folder`,`output_folder` in [predict.sh](predict.sh) -2. use following script to predict -``` -bash predict.sh -``` -You can change the `model_path` in the `predict.sh` file to your model location. - -tips: if result is not good, you can change `thre` in [predict.sh](predict.sh) - -The project is still under development. - -

Performance

- -### [ICDAR 2015](http://rrc.cvc.uab.es/?ch=4) -only train on ICDAR2015 dataset - -| Method | image size (short size) |learning rate | Precision (%) | Recall (%) | F-measure (%) | FPS | -|:--------------------------:|:-------:|:--------:|:--------:|:------------:|:---------------:|:-----:| -| SynthText-Defrom-ResNet-18(paper) | 736 |0.007 | 86.8 | 78.4 | 82.3 | 48 | -| ImageNet-resnet18-FPN-DBHead |736 |1e-3| 87.03 | 75.06 | 80.6 | 43 | -| ImageNet-Defrom-Resnet18-FPN-DBHead |736 |1e-3| 88.61 | 73.84 | 80.56 | 36 | -| ImageNet-resnet50-FPN-DBHead |736 |1e-3| 88.06 | 77.14 | 82.24 | 27 | -| ImageNet-resnest50-FPN-DBHead |736 |1e-3| 88.18 | 76.27 | 81.78 | 27 | +## 模型 +修改后代码训练的模型地址:[渣云:访问密码 myj4 ](https://pan.baidu.com/s/10Ff-0AJkkpC9jGWdNSsN6g) -### examples -TBD +目前没有训练完成,相比原版模型(1200epoch),只训练了500epoch。精度:90.0 召回率:68.2。 +可以自己去训练。 -### todo -- [x] mutil gpu training +## TensorRT版本 -### reference -1. https://arxiv.org/pdf/1911.08947.pdf -2. https://github.com/WenmuZhou/PANet.pytorch -3. https://github.com/MhLiao/DB +[地址]: https://github.com/BaofengZan/DBNet-TensorRT -**If this repository helps you,please star it. Thanks.** + \ No newline at end of file diff --git a/models/head/DBHead.py b/models/head/DBHead.py index c967e72..7269dbe 100644 --- a/models/head/DBHead.py +++ b/models/head/DBHead.py @@ -13,7 +13,6 @@ def __init__(self, in_channels, out_channels, k = 50): # debug ==> 256 2 k=50 nn.Conv2d(in_channels, in_channels // 4, 3, padding=1), nn.BatchNorm2d(in_channels // 4), nn.ReLU(inplace=True), - # nn.Upsample(scale_factor=2, mode='nearest'), # ConvTranspose2d (self, in_channels, out_channels, kernel_size, stride=1, # padding=0, output_padding=0, groups=1, bias=True, # dilation=1, padding_mode='zeros'): From 0b607bb66b98a343b2bda0da20af3027ee9ed62d Mon Sep 17 00:00:00 2001 From: BaofengZan Date: Fri, 7 Aug 2020 18:12:52 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0TensorRT=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=9C=B0=E5=9D=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.MD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.MD b/README.MD index 986b5ce..38cb82c 100644 --- a/README.MD +++ b/README.MD @@ -41,6 +41,6 @@ models/neck/FPN.py ## TensorRT版本 -[地址]: https://github.com/BaofengZan/DBNet-TensorRT +https://github.com/BaofengZan/DBNet-TensorRT \ No newline at end of file