From 627c95d48b98f58c3320c562c98908629d6c4a45 Mon Sep 17 00:00:00 2001
From: BaofengZan <zanbf1123@163.com>
Date: Fri, 7 Aug 2020 17:40:15 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=8F=8D=E5=8D=B7?=
 =?UTF-8?q?=E7=A7=AF=E4=B8=BAupsample=E6=96=B9=E4=BE=BF=E4=BD=BF=E7=94=A8t?=
 =?UTF-8?q?ensorrt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 models/head/DBHead.py | 21 +++++++++++----
 models/model.py       |  4 ++-
 models/neck/FPN.py    | 36 ++++++++++++++++++--------
 tools/predict.py      | 60 ++++++++++++++++++++++++++++++++++++++-----
 tools/train.py        |  2 +-
 5 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/models/head/DBHead.py b/models/head/DBHead.py
index c9986bb..c967e72 100644
--- a/models/head/DBHead.py
+++ b/models/head/DBHead.py
@@ -3,19 +3,27 @@
 # @Author  : zhoujun
 import torch
 from torch import nn
+import torch.nn.functional as F
 
 class DBHead(nn.Module):
-    def __init__(self, in_channels, out_channels, k = 50):
+    def __init__(self, in_channels, out_channels, k = 50):  # debug ==> 256 2 k=50
         super().__init__()
         self.k = k
         self.binarize = nn.Sequential(
             nn.Conv2d(in_channels, in_channels // 4, 3, padding=1),
             nn.BatchNorm2d(in_channels // 4),
             nn.ReLU(inplace=True),
-            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2),
+            # nn.Upsample(scale_factor=2, mode='nearest'),
+            # ConvTranspose2d (self, in_channels, out_channels, kernel_size, stride=1,
+            #                  padding=0, output_padding=0, groups=1, bias=True,
+            #                  dilation=1, padding_mode='zeros'):
+            #nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍
+            nn.Upsample(scale_factor=2, mode='nearest'),
             nn.BatchNorm2d(in_channels // 4),
             nn.ReLU(inplace=True),
-            nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
+            #nn.ConvTranspose2d(in_channels // 4, 1, 2, 2),
+            nn.Upsample(scale_factor=2, mode='nearest'),
+            nn.Conv2d(in_channels//4, 1, 3, padding=1), # 311 大小不变
             nn.Sigmoid())
         self.binarize.apply(self.weights_init)
 
@@ -41,9 +49,10 @@ def weights_init(self, m):
             m.bias.data.fill_(1e-4)
 
     def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
-        in_channels = inner_channels
+        in_channels = inner_channels # 256
         if serial:
             in_channels += 1
+
         self.thresh = nn.Sequential(
             nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
             nn.BatchNorm2d(inner_channels // 4),
@@ -67,7 +76,9 @@ def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
                 module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
             return nn.Sequential(module_list)
         else:
-            return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)
+            #return nn.ConvTranspose2d(in_channels, out_channels, 2, 2)
+            return nn.Sequential(nn.Upsample(scale_factor=2, mode='nearest'),
+                    nn.Conv2d(in_channels, out_channels, 3, 1, 1))
 
     def step_function(self, x, y):
         return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
diff --git a/models/model.py b/models/model.py
index 9e67f54..4173548 100644
--- a/models/model.py
+++ b/models/model.py
@@ -31,7 +31,9 @@ def forward(self, x):
         backbone_out = self.backbone(x)
         neck_out = self.neck(backbone_out)
         y = self.head(neck_out)
-        y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True)
+        # y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True)
+        # 有点区别，就是F中的是一个函数，在nn.sequential()中，不能作为一个层，而nn.upsample中的则可以
+        y = F.interpolate(y, size=(H, W))  # 使用最近邻训练的可以用TRTAPI实现
         return y
 
 
diff --git a/models/neck/FPN.py b/models/neck/FPN.py
index 0d30f5e..96fe9f1 100644
--- a/models/neck/FPN.py
+++ b/models/neck/FPN.py
@@ -11,23 +11,25 @@
 class FPN(nn.Module):
     def __init__(self, in_channels, inner_channels=256, **kwargs):
         """
-        :param in_channels: 基础网络输出的维度
+        :param in_channels: 基础网络输出的维度 [64, 128, 256, 512]
         :param kwargs:
         """
         super().__init__()
         inplace = True
         self.conv_out = inner_channels
-        inner_channels = inner_channels // 4
+        inner_channels = inner_channels // 4  # 256 // 4 = 64
         # reduce layers
         self.reduce_conv_c2 = ConvBnRelu(in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
         self.reduce_conv_c3 = ConvBnRelu(in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
         self.reduce_conv_c4 = ConvBnRelu(in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
         self.reduce_conv_c5 = ConvBnRelu(in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
         # Smooth layers
-        self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
+        self.smooth_p4 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace) # 311
         self.smooth_p3 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
         self.smooth_p2 = ConvBnRelu(inner_channels, inner_channels, kernel_size=3, padding=1, inplace=inplace)
 
+        #self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+
         self.conv = nn.Sequential(
             nn.Conv2d(self.conv_out, self.conv_out, kernel_size=3, padding=1, stride=1),
             nn.BatchNorm2d(self.conv_out),
@@ -39,11 +41,22 @@ def forward(self, x):
         c2, c3, c4, c5 = x
         # Top-down
         p5 = self.reduce_conv_c5(c5)
-        p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+        #p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+        c4_1 = self.reduce_conv_c4(c4)
+        p4_1 = F.upsample(p5, size=c4_1.size()[2:])
+        p4 = p4_1 + c4_1
+
         p4 = self.smooth_p4(p4)
-        p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+
+        #p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+        c3_1 = self.reduce_conv_c3(c3)
+        p3_1 = F.upsample(p4, size=c3_1.size()[2:])
+        p3 = p3_1 + c3_1
         p3 = self.smooth_p3(p3)
-        p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+        #p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+        c2_1 = self.reduce_conv_c2(c2)
+        p2_1 = F.upsample(p3, size=c2_1.size()[2:])
+        p2 = p2_1 + c2_1
         p2 = self.smooth_p2(p2)
 
         x = self._upsample_cat(p2, p3, p4, p5)
@@ -51,11 +64,14 @@ def forward(self, x):
         return x
 
     def _upsample_add(self, x, y):
-        return F.interpolate(x, size=y.size()[2:]) + y
+        return F.upsample(x, size=y.size()[2:]) + y
 
     def _upsample_cat(self, p2, p3, p4, p5):
         h, w = p2.size()[2:]
-        p3 = F.interpolate(p3, size=(h, w))
-        p4 = F.interpolate(p4, size=(h, w))
-        p5 = F.interpolate(p5, size=(h, w))
+        #p3 = F.interpolate(p3, size=(h, w))
+        p3 = F.upsample(p3, size=(h, w))
+        #p4 = F.interpolate(p4, size=(h, w))
+        p4 = F.upsample(p4, size=(h, w))
+        #p5 = F.interpolate(p5, size=(h, w))
+        p5 = F.upsample(p5, size=(h, w))
         return torch.cat([p2, p3, p4, p5], dim=1)
diff --git a/tools/predict.py b/tools/predict.py
index c58d262..ffbe0e6 100644
--- a/tools/predict.py
+++ b/tools/predict.py
@@ -5,6 +5,7 @@
 import os
 import sys
 import pathlib
+import struct
 __dir__ = pathlib.Path(os.path.abspath(__file__))
 sys.path.append(str(__dir__))
 sys.path.append(str(__dir__.parent.parent))
@@ -35,7 +36,7 @@ def resize_image(img, short_size):
 
 
 class Pytorch_model:
-    def __init__(self, model_path, post_p_thre=0.7, gpu_id=None):
+    def __init__(self, model_path, post_p_thre=0.7, gpu_id=None, save_wts=False):
         '''
         初始化pytorch模型
         :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件)
@@ -59,6 +60,19 @@ def __init__(self, model_path, post_p_thre=0.7, gpu_id=None):
         self.model.load_state_dict(checkpoint['state_dict'])
         self.model.to(self.device)
         self.model.eval()
+        # 保存wts
+        # save wts
+        if save_wts:
+            f = open('DBNet.wts', 'w')
+            f.write('{}\n'.format(len(self.model.state_dict().keys())))
+            for k, v in self.model.state_dict().items():
+                vr = v.reshape(-1).cpu().numpy()
+                f.write('{} {} '.format(k, len(vr)))
+                for vv in vr:
+                    f.write(' ')
+                    f.write(struct.pack('>f', float(vv)).hex())
+                f.write('\n')
+
 
         self.transform = []
         for t in config['dataset']['train']['dataset']['args']['transforms']:
@@ -75,10 +89,12 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
         '''
         assert os.path.exists(img_path), 'file is not exists'
         img = cv2.imread(img_path, 1 if self.img_mode != 'GRAY' else 0)
+        #img = cv2.imread("E:\\Datasets\\ICDAR2015\\test\\img\\img_10.jpg", 1 if self.img_mode != 'GRAY' else 0)
         if self.img_mode == 'RGB':
             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
         h, w = img.shape[:2]
-        img = resize_image(img, short_size)
+        #img = resize_image(img, short_size)
+        img = cv2.resize(img, (640, 640))
         # 将图片由(w,h)变为(1,img_channel,h,w)
         tensor = self.transform(img)
         tensor = tensor.unsqueeze_(0)
@@ -88,8 +104,10 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
         with torch.no_grad():
             if str(self.device).__contains__('cuda'):
                 torch.cuda.synchronize(self.device)
-            start = time.time()
+            start = time.clock()
             preds = self.model(tensor)
+            t = time.clock() - start
+            print("infer time: (ms) ", t*1000)
             if str(self.device).__contains__('cuda'):
                 torch.cuda.synchronize(self.device)
             box_list, score_list = self.post_process(batch, preds, is_output_polygon=is_output_polygon)
@@ -107,6 +125,28 @@ def predict(self, img_path: str, is_output_polygon=False, short_size: int = 1024
             t = time.time() - start
         return preds[0, 0, :, :].detach().cpu().numpy(), box_list, score_list, t
 
+    def export_onnx(self):
+        img = torch.zeros((1, 3, 640, 640)).cuda()  # image size(1,3,320,192) iDetection
+        # tensor = self.transform(img)
+        # tensor = tensor.unsqueeze_(0)
+        #tensor = img.to(self.device)
+        y = self.model(img)
+        try:
+            import onnx
+
+            print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+            f = ("mode.onnx") # filename
+            #self.model.fuse()  # only for ONNX
+            torch.onnx.export(self.model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                              output_names=['classes', 'boxes'] if y is None else ['output'])
+
+            # Checks
+            onnx_model = onnx.load(f)  # load onnx model
+            onnx.checker.check_model(onnx_model)  # check onnx model
+            print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+            print('ONNX export success, saved as %s' % f)
+        except Exception as e:
+            print('ONNX export failure: %s' % e)
 
 def save_depoly(model, input, save_path):
     traced_script_model = torch.jit.trace(model, input)
@@ -116,13 +156,15 @@ def save_depoly(model, input, save_path):
 def init_args():
     import argparse
     parser = argparse.ArgumentParser(description='DBNet.pytorch')
-    parser.add_argument('--model_path', default=r'model_best.pth', type=str)
-    parser.add_argument('--input_folder', default='./test/input', type=str, help='img path for predict')
+    parser.add_argument('--model_path', default=r'E:\LearningCodes\DBNET\DBNet.pytorch\model_best.pth', type=str)
+    parser.add_argument('--input_folder', default=r'E:\Datasets\ICDAR2015\test\img', type=str, help='img path for predict')
     parser.add_argument('--output_folder', default='./test/output', type=str, help='img path for output')
     parser.add_argument('--thre', default=0.3,type=float, help='the thresh of post_processing')
     parser.add_argument('--polygon', action='store_true', help='output polygon or box')
     parser.add_argument('--show', action='store_true', help='show result')
     parser.add_argument('--save_resut', action='store_true', help='save box and score to txt file')
+    parser.add_argument('--save_wts', default=False, help='save box and score to txt file')
+    parser.add_argument('--onnx', default=False, help='save box and score to txt file')
     args = parser.parse_args()
     return args
 
@@ -137,9 +179,15 @@ def init_args():
     print(args)
     os.environ['CUDA_VISIBLE_DEVICES'] = str('0')
     # 初始化网络
-    model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0)
+    model = Pytorch_model(args.model_path, post_p_thre=args.thre, gpu_id=0, save_wts=args.save_wts)
+    if(args.onnx):
+        model.export_onnx()
+    if(args.save_wts):
+        exit(0)
+
     img_folder = pathlib.Path(args.input_folder)
     for img_path in tqdm(get_file_list(args.input_folder, p_postfix=['.jpg'])):
+
         preds, boxes_list, score_list, t = model.predict(img_path, is_output_polygon=args.polygon)
         img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list)
         if args.show:
diff --git a/tools/train.py b/tools/train.py
index 697c216..71c92e9 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -6,7 +6,7 @@
 
 import argparse
 import os
-
+os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 import anyconfig
 
 

From 00fe9b3d2a363cfff8086de8e382abfba5d6ecbd Mon Sep 17 00:00:00 2001
From: BaofengZan <zanbf1123@163.com>
Date: Fri, 7 Aug 2020 18:10:56 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0readme?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.MD             | 124 +++++++-----------------------------------
 models/head/DBHead.py |   1 -
 2 files changed, 21 insertions(+), 104 deletions(-)

diff --git a/README.MD b/README.MD
index 6e591d9..986b5ce 100644
--- a/README.MD
+++ b/README.MD
@@ -1,128 +1,46 @@
 # Real-time Scene Text Detection with Differentiable Binarization
 
-**note**: some code is inherited from [MhLiao/DB](https://github.com/MhLiao/DB)
+**note**: 原始版本 [DBNet.pytorch](https://github.com/WenmuZhou/DBNet.pytorch)
 
 [中文解读](https://zhuanlan.zhihu.com/p/94677957)
 
 ![network](imgs/paper/db.jpg)
 
-## update 
-2020-06-07: 添加灰度图训练，训练灰度图时需要在配置里移除`dataset.args.transforms.Normalize`
+## 安装环境
 
-## Install Using Conda
-```
-conda env create -f environment.yml
-git clone https://github.com/WenmuZhou/DBNet.pytorch.git
-cd DBNet.pytorch/
-```
+请参考原始版本的[Readme](https://github.com/WenmuZhou/DBNet.pytorch/blob/master/README.MD)
 
-or
-## Install Manually 
-```bash
-conda create -n dbnet python=3.6
-conda activate dbnet
 
-conda install ipython pip
 
-# python dependencies
-pip install -r requirement.txt
+## 修改之处
 
-# install PyTorch with cuda-10.1
-# Note that you can change the cudatoolkit version to the version you want.
-conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
-
-# clone repo
-git clone https://github.com/WenmuZhou/DBNet.pytorch.git
-cd DBNet.pytorch/
+本repo为了可以使用tensorRT加速，将反卷积操作全部改为upsample。比如
 
+```python
+ # 原始版本
+nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 2, 2), # 上采样两倍
+# 修改版本 
+nn.Upsample(scale_factor=2, mode='nearest'),
 ```
 
-## Requirements
-* pytorch 1.4+
-* torchvision 0.5+
-* gcc 4.9+
-
-## Download
+更多的修改，请看代码：
 
-TBD
-
-## Data Preparation
-
-Training data: prepare a text `train.txt` in the following format, use '\t' as a separator
 ```
-./datasets/train/img/001.jpg	./datasets/train/gt/001.txt
+models/head/DBHead.py 
+models/model.py 
+models/neck/FPN.py
 ```
 
-Validation data: prepare a text `test.txt` in the following format, use '\t' as a separator
-```
-./datasets/test/img/001.jpg	./datasets/test/gt/001.txt
-```
-- Store images in the `img` folder
-- Store groundtruth in the `gt` folder
-
-The groundtruth can be `.txt` files, with the following format:
-```
-x1, y1, x2, y2, x3, y3, x4, y4, annotation
-```
-
-
-## Train
-1. config the `dataset['train']['dataset'['data_path']'`,`dataset['validate']['dataset'['data_path']`in [config/icdar2015_resnet18_fpn_DBhead_polyLR.yaml](cconfig/icdar2015_resnet18_fpn_DBhead_polyLR.yaml)
-* . single gpu train
-```bash
-bash singlel_gpu_train.sh
-```
-* . Multi-gpu training
-```bash
-bash multi_gpu_train.sh
-```
-## Test
-
-[eval.py](tools/eval.py) is used to test model on test dataset
-
-1. config `model_path` in [eval.sh](eval.sh)
-2. use following script to test
-```bash
-bash eval.sh
-```
-
-## Predict 
-[predict.py](tools/predict.py) Can be used to inference on all images in a folder
-1. config `model_path`,`input_folder`,`output_folder` in [predict.sh](predict.sh)
-2. use following script to predict
-```
-bash predict.sh
-```
-You can change the `model_path` in the `predict.sh` file to your model location. 
-
-tips: if result is not good, you can change `thre` in [predict.sh](predict.sh) 
-    
-The project is still under development.
-
-<h2 id="Performance">Performance</h2>
-
-### [ICDAR 2015](http://rrc.cvc.uab.es/?ch=4)
-only train on ICDAR2015 dataset
-
-| Method                   | image size (short size) |learning rate | Precision (%) | Recall (%) | F-measure (%) | FPS |
-|:--------------------------:|:-------:|:--------:|:--------:|:------------:|:---------------:|:-----:|
-| SynthText-Defrom-ResNet-18(paper)  | 736 |0.007 | 86.8 | 78.4 | 82.3 | 48 |
-| ImageNet-resnet18-FPN-DBHead  |736 |1e-3| 87.03 | 75.06 | 80.6 | 43 |
-| ImageNet-Defrom-Resnet18-FPN-DBHead  |736 |1e-3| 88.61 | 73.84 | 80.56 | 36 |
-| ImageNet-resnet50-FPN-DBHead  |736 |1e-3| 88.06 | 77.14 | 82.24 | 27 |
-| ImageNet-resnest50-FPN-DBHead  |736 |1e-3| 88.18 | 76.27 | 81.78 | 27 |
+## 模型
 
+修改后代码训练的模型地址：[渣云：访问密码 myj4 ](https://pan.baidu.com/s/10Ff-0AJkkpC9jGWdNSsN6g)
 
-### examples
-TBD
+目前没有训练完成，相比原版模型（1200epoch），只训练了500epoch。精度：90.0  召回率：68.2。 
 
+可以自己去训练。
 
-### todo
-- [x] mutil gpu training
+## TensorRT版本
 
-### reference
-1. https://arxiv.org/pdf/1911.08947.pdf
-2. https://github.com/WenmuZhou/PANet.pytorch
-3. https://github.com/MhLiao/DB
+[地址]: https://github.com/BaofengZan/DBNet-TensorRT
 
-**If this repository helps you，please star it. Thanks.**
+ 
\ No newline at end of file
diff --git a/models/head/DBHead.py b/models/head/DBHead.py
index c967e72..7269dbe 100644
--- a/models/head/DBHead.py
+++ b/models/head/DBHead.py
@@ -13,7 +13,6 @@ def __init__(self, in_channels, out_channels, k = 50):  # debug ==> 256 2 k=50
             nn.Conv2d(in_channels, in_channels // 4, 3, padding=1),
             nn.BatchNorm2d(in_channels // 4),
             nn.ReLU(inplace=True),
-            # nn.Upsample(scale_factor=2, mode='nearest'),
             # ConvTranspose2d (self, in_channels, out_channels, kernel_size, stride=1,
             #                  padding=0, output_padding=0, groups=1, bias=True,
             #                  dilation=1, padding_mode='zeros'):

From 0b607bb66b98a343b2bda0da20af3027ee9ed62d Mon Sep 17 00:00:00 2001
From: BaofengZan <zanbf1123@163.com>
Date: Fri, 7 Aug 2020 18:12:52 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0TensorRT=E7=89=88?=
 =?UTF-8?q?=E6=9C=AC=E5=9C=B0=E5=9D=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.MD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.MD b/README.MD
index 986b5ce..38cb82c 100644
--- a/README.MD
+++ b/README.MD
@@ -41,6 +41,6 @@ models/neck/FPN.py
 
 ## TensorRT版本
 
-[地址]: https://github.com/BaofengZan/DBNet-TensorRT
+https://github.com/BaofengZan/DBNet-TensorRT
 
  
\ No newline at end of file