SSD框架訓練自己的資料集

SSD框架訓練自己的資料集
SSD demo中詳細介紹瞭如何在VOC資料集上使用SSD進行物體檢測的訓練和驗證。
本文介紹如何使用SSD實現對自己資料集的訓練和驗證過程,內容包括:
1 資料集的標註
2 資料集的轉換
3 使用SSD如何訓練
4 使用SSD如何測試
1 資料集的標註 
  資料的標註使用BBox-Label-Tool工具,該工具使用python實現,使用簡單方便。修改後的工具支援多label的標籤標註。
該工具生成的標籤格式是:
object_number
className x1min y1min x1max y1max
classname x2min y2min x2max y2max
...
1.1 labelTool工具的使用說明
  BBox-Label-Tool工具實現較簡單,原始的git版本使用起來有一些小問題,進行了簡單的修改,修改後的版本

複製程式碼
#-------------------------------------------------------------------------------
# Name:        Object bounding box label tool
# Purpose:     Label object bboxes for ImageNet Detection data
# Author:      Qiushi
# Created:     06/06/2014
#
#-------------------------------------------------------------------------------
from __future__ import division
from Tkinter import *
import tkMessageBox
from PIL import Image, ImageTk
import os
import glob
import random
# colors for the bboxes
COLORS = ['red', 'blue', 'yellow', 'pink', 'cyan', 'green', 'black']
# image sizes for the examples
SIZE = 256, 256
classLabels=['mat', 'door', 'sofa', 'chair', 'table', 'bed', 'ashcan', 'shoe']
class LabelTool():
def __init__(self, master):
# set up the main frame
self.parent = master
self.parent.title("LabelTool")
self.frame = Frame(self.parent)
self.frame.pack(fill=BOTH, expand=1)
self.parent.resizable(width = False, height = False)
# initialize global state
self.imageDir = ''
self.imageList= []
self.egDir = ''
self.egList = []
self.outDir = ''
self.cur = 0
self.total = 0
self.category = 0
self.imagename = ''
self.labelfilename = ''
self.tkimg = None
# initialize mouse state
self.STATE = {}
self.STATE['click'] = 0
self.STATE['x'], self.STATE['y'] = 0, 0
# reference to bbox
self.bboxIdList = []
self.bboxId = None
self.bboxList = []
self.hl = None
self.vl = None
self.currentClass = ''
# ----------------- GUI stuff ---------------------
# dir entry & load
self.label = Label(self.frame, text = "Image Dir:")
self.label.grid(row = 0, column = 0, sticky = E)
self.entry = Entry(self.frame)
self.entry.grid(row = 0, column = 1, sticky = W+E)
self.ldBtn = Button(self.frame, text = "Load", command = self.loadDir)
self.ldBtn.grid(row = 0, column = 2, sticky = W+E)
# main panel for labeling
self.mainPanel = Canvas(self.frame, cursor='tcross')
self.mainPanel.bind("<Button-1>", self.mouseClick)
self.mainPanel.bind("<Motion>", self.mouseMove)
self.parent.bind("<Escape>", self.cancelBBox)  # press <Espace> to cancel current bbox
self.parent.bind("s", self.cancelBBox)
self.parent.bind("a", self.prevImage) # press 'a' to go backforward
self.parent.bind("d", self.nextImage) # press 'd' to go forward
self.mainPanel.grid(row = 1, column = 1, rowspan = 4, sticky = W+N)
# showing bbox info & delete bbox
self.lb1 = Label(self.frame, text = 'Bounding boxes:')
self.lb1.grid(row = 1, column = 2,  sticky = W+N)
self.listbox = Listbox(self.frame, width = 22, height = 12)
self.listbox.grid(row = 2, column = 2, sticky = N)
self.btnDel = Button(self.frame, text = 'Delete', command = self.delBBox)
self.btnDel.grid(row = 3, column = 2, sticky = W+E+N)
self.btnClear = Button(self.frame, text = 'ClearAll', command = self.clearBBox)
self.btnClear.grid(row = 4, column = 2, sticky = W+E+N)
#select class type
self.classPanel = Frame(self.frame)
self.classPanel.grid(row = 5, column = 1, columnspan = 10, sticky = W+E)
label = Label(self.classPanel, text = 'class:')
label.grid(row = 5, column = 1,  sticky = W+N)
self.classbox = Listbox(self.classPanel,  width = 4, height = 2)
self.classbox.grid(row = 5,column = 2)
for each in range(len(classLabels)):
function = 'select' + classLabels[each]
print classLabels[each]
btnMat = Button(self.classPanel, text = classLabels[each], command = getattr(self, function))
btnMat.grid(row = 5, column = each + 3)
# control panel for image navigation
self.ctrPanel = Frame(self.frame)
self.ctrPanel.grid(row = 6, column = 1, columnspan = 2, sticky = W+E)
self.prevBtn = Button(self.ctrPanel, text='<< Prev', width = 10, command = self.prevImage)
self.prevBtn.pack(side = LEFT, padx = 5, pady = 3)
self.nextBtn = Button(self.ctrPanel, text='Next >>', width = 10, command = self.nextImage)
self.nextBtn.pack(side = LEFT, padx = 5, pady = 3)
self.progLabel = Label(self.ctrPanel, text = "Progress:     /    ")
self.progLabel.pack(side = LEFT, padx = 5)
self.tmpLabel = Label(self.ctrPanel, text = "Go to Image No.")
self.tmpLabel.pack(side = LEFT, padx = 5)
self.idxEntry = Entry(self.ctrPanel, width = 5)
self.idxEntry.pack(side = LEFT)
self.goBtn = Button(self.ctrPanel, text = 'Go', command = self.gotoImage)
self.goBtn.pack(side = LEFT)
# example pannel for illustration
self.egPanel = Frame(self.frame, border = 10)
self.egPanel.grid(row = 1, column = 0, rowspan = 5, sticky = N)
self.tmpLabel2 = Label(self.egPanel, text = "Examples:")
self.tmpLabel2.pack(side = TOP, pady = 5)
self.egLabels = []
for i in range(3):
self.egLabels.append(Label(self.egPanel))
self.egLabels[-1].pack(side = TOP)
# display mouse position
self.disp = Label(self.ctrPanel, text='')
self.disp.pack(side = RIGHT)
self.frame.columnconfigure(1, weight = 1)
self.frame.rowconfigure(10, weight = 1)
# for debugging
##        self.setImage()
##        self.loadDir()
def loadDir(self, dbg = False):
if not dbg:
s = self.entry.get()
self.parent.focus()
self.category = int(s)
else:
s = r'D:\workspace\python\labelGUI'
##        if not os.path.isdir(s):
##            tkMessageBox.showerror("Error!", message = "The specified dir doesn't exist!")
##            return
# get image list
self.imageDir = os.path.join(r'./Images', '%d' %(self.category))
self.imageList = glob.glob(os.path.join(self.imageDir, '*.jpg'))
if len(self.imageList) == 0:
print 'No .JPEG images found in the specified dir!'
return   
# set up output dir
self.outDir = os.path.join(r'./Labels', '%d' %(self.category))
if not os.path.exists(self.outDir):
os.mkdir(self.outDir)
labeledPicList = glob.glob(os.path.join(self.outDir, '*.txt'))
for label in labeledPicList:
data = open(label, 'r')
if '0\n' == data.read():
data.close()
continue
data.close()
picture = label.replace('Labels', 'Images').replace('.txt', '.jpg')
if picture in self.imageList:
self.imageList.remove(picture)
# default to the 1st image in the collection
self.cur = 1
self.total = len(self.imageList)
self.loadImage()
print '%d images loaded from %s' %(self.total, s)
def loadImage(self):
# load image
imagepath = self.imageList[self.cur - 1]
self.img = Image.open(imagepath)
self.imgSize = self.img.size
self.tkimg = ImageTk.PhotoImage(self.img)
self.mainPanel.config(width = max(self.tkimg.width(), 400), height = max(self.tkimg.height(), 400))
self.mainPanel.create_image(0, 0, image = self.tkimg, anchor=NW)
self.progLabel.config(text = "%04d/%04d" %(self.cur, self.total))
# load labels
self.clearBBox()
self.imagename = os.path.split(imagepath)[-1].split('.')[0]
labelname = self.imagename + '.txt'
self.labelfilename = os.path.join(self.outDir, labelname)
bbox_cnt = 0
if os.path.exists(self.labelfilename):
with open(self.labelfilename) as f:
for (i, line) in enumerate(f):
if i == 0:
bbox_cnt = int(line.strip())
continue
tmp = [int(t.strip()) for t in line.split()]
##                    print tmp
self.bboxList.append(tuple(tmp))
tmpId = self.mainPanel.create_rectangle(tmp[0], tmp[1], \
tmp[2], tmp[3], \
width = 2, \
outline = COLORS[(len(self.bboxList)-1) % len(COLORS)])
self.bboxIdList.append(tmpId)
self.listbox.insert(END, '(%d, %d) -> (%d, %d)' %(tmp[0], tmp[1], tmp[2], tmp[3]))
self.listbox.itemconfig(len(self.bboxIdList) - 1, fg = COLORS[(len(self.bboxIdList) - 1) % len(COLORS)])
def saveImage(self):
with open(self.labelfilename, 'w') as f:
f.write('%d\n' %len(self.bboxList))
for bbox in self.bboxList:
f.write(' '.join(map(str, bbox)) + '\n')
print 'Image No. %d saved' %(self.cur)
def mouseClick(self, event):
if self.STATE['click'] == 0:
self.STATE['x'], self.STATE['y'] = event.x, event.y
#self.STATE['x'], self.STATE['y'] = self.imgSize[0], self.imgSize[1]
else:
x1, x2 = min(self.STATE['x'], event.x), max(self.STATE['x'], event.x)
y1, y2 = min(self.STATE['y'], event.y), max(self.STATE['y'], event.y)
if x2 > self.imgSize[0]:
x2 = self.imgSize[0]
if y2 > self.imgSize[1]:
y2 = self.imgSize[1]                
self.bboxList.append((self.currentClass, x1, y1, x2, y2))
self.bboxIdList.append(self.bboxId)
self.bboxId = None
self.listbox.insert(END, '(%d, %d) -> (%d, %d)' %(x1, y1, x2, y2))
self.listbox.itemconfig(len(self.bboxIdList) - 1, fg = COLORS[(len(self.bboxIdList) - 1) % len(COLORS)])
self.STATE['click'] = 1 - self.STATE['click']
def mouseMove(self, event):
self.disp.config(text = 'x: %d, y: %d' %(event.x, event.y))
if self.tkimg:
if self.hl:
self.mainPanel.delete(self.hl)
self.hl = self.mainPanel.create_line(0, event.y, self.tkimg.width(), event.y, width = 2)
if self.vl:
self.mainPanel.delete(self.vl)
self.vl = self.mainPanel.create_line(event.x, 0, event.x, self.tkimg.height(), width = 2)
if 1 == self.STATE['click']:
if self.bboxId:
self.mainPanel.delete(self.bboxId)
self.bboxId = self.mainPanel.create_rectangle(self.STATE['x'], self.STATE['y'], \
event.x, event.y, \
width = 2, \
outline = COLORS[len(self.bboxList) % len(COLORS)])
def cancelBBox(self, event):
if 1 == self.STATE['click']:
if self.bboxId:
self.mainPanel.delete(self.bboxId)
self.bboxId = None
self.STATE['click'] = 0
def delBBox(self):
sel = self.listbox.curselection()
if len(sel) != 1 :
return
idx = int(sel[0])
self.mainPanel.delete(self.bboxIdList[idx])
self.bboxIdList.pop(idx)
self.bboxList.pop(idx)
self.listbox.delete(idx)
def clearBBox(self):
for idx in range(len(self.bboxIdList)):
self.mainPanel.delete(self.bboxIdList[idx])
self.listbox.delete(0, len(self.bboxList))
self.bboxIdList = []
self.bboxList = []
def selectmat(self):
self.currentClass = 'mat'
self.classbox.delete(0,END)
self.classbox.insert(0, 'mat')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectdoor(self):
self.currentClass = 'door'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'door')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectsofa(self):
self.currentClass = 'sofa'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'sofa')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectchair(self):
self.currentClass = 'chair'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'chair')
self.classbox.itemconfig(0,fg = COLORS[0])
def selecttable(self):
self.currentClass = 'table'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'table')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectbed(self):
self.currentClass = 'bed'
self.classbox.delete(0,END)    
self.classbox.insert(0, 'bed')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectashcan(self):
self.currentClass = 'ashcan'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'ashcan')
self.classbox.itemconfig(0,fg = COLORS[0])
def selectshoe(self):
self.currentClass = 'shoe'    
self.classbox.delete(0,END)    
self.classbox.insert(0, 'shoe')
self.classbox.itemconfig(0,fg = COLORS[0])    
def prevImage(self, event = None):
self.saveImage()
if self.cur > 1:
self.cur -= 1
self.loadImage()
def nextImage(self, event = None):
self.saveImage()
if self.cur < self.total:
self.cur += 1
self.loadImage()
def gotoImage(self):
idx = int(self.idxEntry.get())
if 1 <= idx and idx <= self.total:
self.saveImage()
self.cur = idx
self.loadImage()
##    def setImage(self, imagepath = r'test2.png'):
##        self.img = Image.open(imagepath)
##        self.tkimg = ImageTk.PhotoImage(self.img)
##        self.mainPanel.config(width = self.tkimg.width())
##        self.mainPanel.config(height = self.tkimg.height())
##        self.mainPanel.create_image(0, 0, image = self.tkimg, anchor=NW)
if __name__ == '__main__':
root = Tk()
tool = LabelTool(root)
root.mainloop()
複製程式碼

  使用方法:   

     (1) 在BBox-Label-Tool/Images目錄下建立儲存圖片的目錄, 目錄以數字命名(BBox-Label-Tool/Images/1), 然後將待標註的圖片copy到1這個目錄下;

     (2) 在BBox-Label-Tool目錄下執行命令   python main.py

     (3) 在工具介面上, Image Dir 框中輸入需要標記的目錄名(比如 1), 然後點選load按鈕, 工具自動將Images/1目錄下的圖片載入進來;

      需要說明一下, 如果目錄中的圖片已經標註過,點選load時不會被重新載入進來.

     (4) 該工具支援多類別標註, 畫bounding boxs框標定之前,需要先選定類別,然後再畫框.

     (5) 一張圖片標註完後, 點選Next>>按鈕, 標註下一張圖片,  圖片label成功後,會在BBox-Label-Tool/Labels對應的目錄下生成與圖片檔名對應的label檔案.

2 資料集的轉換

  caffe訓練使用LMDB格式的資料,ssd框架中提供了voc資料格式轉換成LMDB格式的指令碼。
所以實踐中先將BBox-Label-Tool標註的資料轉換成voc資料格式,然後再轉換成LMDB格式。

2.1 voc資料格式

(1)Annotations中儲存的是xml格式的label資訊

複製程式碼
<?xml version="1.0" ?>
<annotation>
<folder>VOC2007</folder>
<filename>1.jpg</filename>
<source>
<database>My Database</database>
<annotation>VOC2007</annotation>
<image>flickr</image>
<flickrid>NULL</flickrid>
</source>
<owner>
<flickrid>NULL</flickrid>
<name>idaneel</name>
</owner>
<size>
<width>320</width>
<height>240</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>door</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>109</xmin>
<ymin>3</ymin>
<xmax>199</xmax>
<ymax>204</ymax>
</bndbox>
</object>
</annotation>
複製程式碼

(2)ImageSet目錄下的Main目錄裡存放的是用於表示訓練的圖片集和測試的圖片集

(3)JPEGImages目錄下存放所有圖片集

(4)label目錄下儲存的是BBox-Label-Tool工具標註好的bounding box座標檔案,
該目錄下的檔案就是待轉換的label標籤檔案。

2.2 Label轉換成VOC資料格式
BBox-Label-Tool工具標註好的bounding box座標檔案轉換成VOC資料格式的形式.
具體的轉換過程包括了兩個步驟:
(1)將BBox-Label-Tool下的txt格式儲存的bounding box資訊轉換成VOC資料格式下以xml方式表示;
(2)生成用於訓練的資料集和用於測試的資料集。
用python實現了上述兩個步驟的換轉。
createXml.py 完成txt到xml的轉換; 執行指令碼./createXml.py

複製程式碼
#!/usr/bin/env python
import os
import sys
import cv2
from itertools import islice
from xml.dom.minidom import Document
labels='label'
imgpath='JPEGImages/'
xmlpath_new='Annotations/'
foldername='VOC2007'
def insertObject(doc, datas):
obj = doc.createElement('object')
name = doc.createElement('name')
name.appendChild(doc.createTextNode(datas[0]))
obj.appendChild(name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
obj.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode(str(0)))
obj.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode(str(0)))
obj.appendChild(difficult)
bndbox = doc.createElement('bndbox')
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(datas[1])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')                
ymin.appendChild(doc.createTextNode(str(datas[2])))
bndbox.appendChild(ymin)                
xmax = doc.createElement('xmax')                
xmax.appendChild(doc.createTextNode(str(datas[3])))
bndbox.appendChild(xmax)                
ymax = doc.createElement('ymax')    
if  '\r' == str(datas[4])[-1] or '\n' == str(datas[4])[-1]:
data = str(datas[4])[0:-1]
else:
data = str(datas[4])
ymax.appendChild(doc.createTextNode(data))
bndbox.appendChild(ymax)
obj.appendChild(bndbox)                
return obj
def create():
for walk in os.walk(labels):
for each in walk[2]:
fidin=open(walk[0] + '/'+ each,'r')
objIndex = 0
for data in islice(fidin, 1, None):        
objIndex += 1
data=data.strip('\n')
datas = data.split(' ')
if 5 != len(datas):
print 'bounding box information error'
continue
pictureName = each.replace('.txt', '.jpg')
imageFile = imgpath + pictureName
img = cv2.imread(imageFile)
imgSize = img.shape
if 1 == objIndex:
xmlName = each.replace('.txt', '.xml')
f = open(xmlpath_new + xmlName, "w")
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder.appendChild(doc.createTextNode(foldername))
annotation.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode(pictureName))
annotation.appendChild(filename)
source = doc.createElement('source')                
database = doc.createElement('database')
database.appendChild(doc.createTextNode('My Database'))
source.appendChild(database)
source_annotation = doc.createElement('annotation')
source_annotation.appendChild(doc.createTextNode(foldername))
source.appendChild(source_annotation)
image = doc.createElement('image')
image.appendChild(doc.createTextNode('flickr'))
source.appendChild(image)
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('NULL'))
source.appendChild(flickrid)
annotation.appendChild(source)
owner = doc.createElement('owner')
flickrid = doc.createElement('flickrid')
flickrid.appendChild(doc.createTextNode('NULL'))
owner.appendChild(flickrid)
name = doc.createElement('name')
name.appendChild(doc.createTextNode('idaneel'))
owner.appendChild(name)
annotation.appendChild(owner)
size = doc.createElement('size')
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(imgSize[1])))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(imgSize[0])))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(imgSize[2])))
size.appendChild(depth)
annotation.appendChild(size)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode(str(0)))
annotation.appendChild(segmented)            
annotation.appendChild(insertObject(doc, datas))
else:
annotation.appendChild(insertObject(doc, datas))
try:
f.write(doc.toprettyxml(indent = '    '))
f.close()
fidin.close()
except:
pass
if __name__ == '__main__':
create()
複製程式碼

  createTest.py 生成訓練集和測試集標識檔案; 執行指令碼

  ./createTest.py %startID% %endID% %testNumber%

複製程式碼
#!/usr/bin/env python
import os
import sys
import random
try:
start = int(sys.argv[1])
end = int(sys.argv[2])
test = int(sys.argv[3])
allNum = end-start+1
except:
print 'Please input picture range'
print './createTest.py 1 1500 500'
os._exit(0)
b_list = range(start,end)
blist_webId = random.sample(b_list, test)
blist_webId = sorted(blist_webId) 
allFile = []
testFile = open('ImageSets/Main/test.txt', 'w')
trainFile = open('ImageSets/Main/trainval.txt', 'w')
for i in range(allNum):
allFile.append(i+1)
for test in blist_webId:
allFile.remove(test)
testFile.write(str(test) + '\n')   
for train in allFile:
trainFile.write(str(train) + '\n')
testFile.close()
trainFile.close()
複製程式碼

說明: 由於BBox-Label-Tool實現相對簡單,該工具每次只能對一個類別進行打標籤,所以轉換指令碼

每一次也是對一個類別進行資料的轉換,這個問題後續需要優化改進。

優化後的BBox-Label-Tool工具,支援多類別標定,生成的label檔案中增加了類別名稱資訊。

使用時修改classLabels,改寫成自己的類別, 修改後的工具程式碼參見1.1中的main.py 

2.3  VOC資料轉換成LMDB資料

  SSD提供了VOC資料到LMDB資料的轉換指令碼 data/VOC0712/create_list.sh 和 ./data/VOC0712/create_data.sh,這兩個指令碼是完全針對VOC0712目錄下的資料進行的轉換。
  實現中為了不破壞VOC0712目錄下的資料內容,針對我們自己的資料集,修改了上面這兩個指令碼,
將指令碼中涉及到VOC0712的資訊替換成我們自己的目錄資訊。
在處理我們的資料集時,將VOC0712替換成indoor。
具體的步驟如下:
  (1) 在 $HOME/data/VOCdevkit目錄下建立indoor目錄,該目錄中存放自己轉換完成的VOC資料集;
  (2) $CAFFE_ROOT/examples目錄下建立indoor目錄;
(3) $CAFFE_ROOT/data目錄下建立indoor目錄,同時將data/VOC0712下的create_list.sh,create_data.sh,labelmap_voc.prototxt
這三個檔案copy到indoor目錄下,分別重新命名為create_list_indoor.sh,create_data_indoor.sh, labelmap_indoor.prototxt
  (4)對上面新生成的兩個create檔案進行修改,主要修改是將VOC0712相關的資訊替換成indoor
  修改後的這兩個檔案分別為:  

複製程式碼
#!/bin/bash
root_dir=$HOME/data/VOCdevkit/
sub_dir=ImageSets/Main
bash_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for dataset in trainval test    
do
dst_file=$bash_dir/$dataset.txt
if [ -f $dst_file ]
then
rm -f $dst_file
fi
for name in indoor
do
if [[ $dataset == "test" && $name == "VOC2012" ]]
then
continue
fi
echo "Create list for $name $dataset..."
dataset_file=$root_dir/$name/$sub_dir/$dataset.txt
img_file=$bash_dir/$dataset"_img.txt"
cp $dataset_file $img_file
sed -i "s/^/$name\/JPEGImages\//g" $img_file
sed -i "s/$/.jpg/g" $img_file
label_file=$bash_dir/$dataset"_label.txt"
cp $dataset_file $label_file
sed -i "s/^/$name\/Annotations\//g" $label_file
sed -i "s/$/.xml/g" $label_file
paste -d' ' $img_file $label_file >> $dst_file
rm -f $label_file
rm -f $img_file
done
# Generate image name and size infomation.
if [ $dataset == "test" ]
then
$bash_dir/../../build/tools/get_image_size $root_dir $dst_file $bash_dir/$dataset"_name_size.txt"
fi
# Shuffle trainval file.
if [ $dataset == "trainval" ]
then
rand_file=$dst_file.random
cat $dst_file | perl -MList::Util=shuffle -e 'print shuffle(<STDIN>);' > $rand_file
mv $rand_file $dst_file
fi
done
複製程式碼

複製程式碼
cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
root_dir=$cur_dir/../..
cd $root_dir
redo=1
data_root_dir="$HOME/data/VOCdevkit"
dataset_name="indoor"
mapfile="$root_dir/data/$dataset_name/labelmap_indoor.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0
extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
extra_cmd="$extra_cmd --redo"
fi
for subset in test trainval
do
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
done
複製程式碼
        (5)修改labelmap_indoor.prototxt,將該檔案中的類別修改成和自己的資料集相匹配,注意需要保留一個label 0 , background類別

複製程式碼
item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "door"
label: 1
display_name: "door"
}
複製程式碼

  完成上面步驟的修改後,可以開始LMDB資料資料的製作,在$CAFFE_ROOT目錄下分別執行:

  ./data/indoor/create_list_indoor.sh

  ./data/indoor/create_data_indoor.sh

  命令執行完畢後,可以在$CAFFE_ROOT/indoor目錄下檢視轉換完成的LMDB資料資料。

3 使用SSD進行自己資料集的訓練

訓練時使用ssd demo中提供的預訓練好的VGGnet model : VGG_ILSVRC_16_layers_fc_reduced.caffemodel
將該模型儲存到$CAFFE_ROOT/models/VGGNet下。
將ssd_pascal.py copy一份 ssd_pascal_indoor.py檔案, 根據自己的資料集修改ssd_pascal_indoor.py
主要修改點:
(1)train_data和test_data修改成指向自己的資料集LMDB
   train_data = "examples/indoor/indoor_trainval_lmdb"
test_data = "examples/indoor/indoor_test_lmdb"
(2) num_test_image該變數修改成自己資料集中測試資料的數量
(3)num_classes 該變數修改成自己資料集中 標籤類別數量數 + 1

針對我的資料集,ssd_pascal_indoor.py的內容為:

複製程式碼
from __future__ import print_function
import caffe
from caffe.model_libs import *
from google.protobuf import text_format
import math
import os
import shutil
import stat
import subprocess
import sys
# Add extra layers on top of a "base" network (e.g. VGGNet or Inception).
def AddExtraLayers(net, use_batchnorm=True):
use_relu = True
# Add additional convolutional layers.
from_layer = net.keys()[-1]
# TODO(weiliu89): Construct the name using the last layer to avoid duplication.
out_layer = "conv6_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1)
from_layer = out_layer
out_layer = "conv6_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2)
for i in xrange(7, 9):
from_layer = out_layer
out_layer = "conv{}_1".format(i)
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1)
from_layer = out_layer
out_layer = "conv{}_2".format(i)
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2)
# Add global pooling layer.
name = net.keys()[-1]
net.pool6 = L.Pooling(net[name], pool=P.Pooling.AVE, global_pooling=True)
return net
### Modify the following parameters accordingly ###
# The directory which contains the caffe code.
# We assume you are running the script at the CAFFE_ROOT.
caffe_root = os.getcwd()
# Set true if you want to start training right after generating all files.
run_soon = True
# Set true if you want to load from most recently saved snapshot.
# Otherwise, we will load from the pretrain_model defined below.
resume_training = True
# If true, Remove old model files.
remove_old_models = False
# The database file for training data. Created by data/VOC0712/create_data.sh
train_data = "examples/indoor/indoor_trainval_lmdb"
# The database file for testing data. Created by data/VOC0712/create_data.sh
test_data = "examples/indoor/indoor_test_lmdb"
# Specify the batch sampler.
resize_width = 300
resize_height = 300
resize = "{}x{}".format(resize_width, resize_height)
batch_sampler = [
{
'sampler': {
},
'max_trials': 1,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'min_jaccard_overlap': 0.1,
},
'max_trials': 50,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'min_jaccard_overlap': 0.3,
},
'max_trials': 50,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'min_jaccard_overlap': 0.5,
},
'max_trials': 50,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'min_jaccard_overlap': 0.7,
},
'max_trials': 50,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'min_jaccard_overlap': 0.9,
},
'max_trials': 50,
'max_sample': 1,
},
{
'sampler': {
'min_scale': 0.3,
'max_scale': 1.0,
'min_aspect_ratio': 0.5,
'max_aspect_ratio': 2.0,
},
'sample_constraint': {
'max_jaccard_overlap': 1.0,
},
'max_trials': 50,
'max_sample': 1,
},
]
train_transform_param = {
'mirror': True,
'mean_value': [104, 117, 123],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': resize_height,
'width': resize_width,
'interp_mode': [
P.Resize.LINEAR,
P.Resize.AREA,
P.Resize.NEAREST,
P.Resize.CUBIC,
P.Resize.LANCZOS4,
],
},
'emit_constraint': {
'emit_type': caffe_pb2.EmitConstraint.CENTER,
}
}
test_transform_param = {
'mean_value': [104, 117, 123],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': resize_height,
'width': resize_width,
'interp_mode': [P.Resize.LINEAR],
},
}
# If true, use batch norm for all newly added layers.
# Currently only the non batch norm version has been tested.
use_batchnorm = False
# Use different initial learning rate.
if use_batchnorm:
base_lr = 0.0004
else:
# A learning rate for batch_size = 1, num_gpus = 1.
base_lr = 0.00004
# Modify the job name if you want.
job_name = "SSD_{}".format(resize)
# The name of the model. Modify it if you want.
model_name = "VGG_VOC0712_{}".format(job_name)
# Directory which stores the model .prototxt file.
save_dir = "models/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the snapshot of models.
snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the job script and log file.
job_dir = "jobs/VGGNet/VOC0712/{}".format(job_name)
# Directory which stores the detection results.
output_result_dir = "{}/data/VOCdevkit/results/VOC2007/{}/Main".format(os.environ['HOME'], job_name)
# model definition files.
train_net_file = "{}/train.prototxt".format(save_dir)
test_net_file = "{}/test.prototxt".format(save_dir)
deploy_net_file = "{}/deploy.prototxt".format(save_dir)
solver_file = "{}/solver.prototxt".format(save_dir)
# snapshot prefix.
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
# job script path.
job_file = "{}/{}.sh".format(job_dir, model_name)
# Stores the test image names and sizes. Created by data/VOC0712/create_list.sh
name_size_file = "data/indoor/test_name_size.txt"
# The pretrained model. We use the Fully convolutional reduced (atrous) VGGNet.
pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel"
# Stores LabelMapItem.
label_map_file = "data/indoor/labelmap_indoor.prototxt"
# MultiBoxLoss parameters.
num_classes = 2
share_location = True
background_label_id=0
train_on_diff_gt = True
normalization_mode = P.Loss.VALID
code_type = P.PriorBox.CENTER_SIZE
neg_pos_ratio = 3.
loc_weight = (neg_pos_ratio + 1.) / 4.
multibox_loss_param = {
'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
'loc_weight': loc_weight,
'num_classes': num_classes,
'share_location': share_location,
'match_type': P.MultiBoxLoss.PER_PREDICTION,
'overlap_threshold': 0.5,
'use_prior_for_matching': True,
'background_label_id': background_label_id,
'use_difficult_gt': train_on_diff_gt,
'do_neg_mining': True,
'neg_pos_ratio': neg_pos_ratio,
'neg_overlap': 0.5,
'code_type': code_type,
}
loss_param = {
'normalization': normalization_mode,
}
# parameters for generating priors.
# minimum dimension of input image
min_dim = 300
# conv4_3 ==> 38 x 38
# fc7 ==> 19 x 19
# conv6_2 ==> 10 x 10
# conv7_2 ==> 5 x 5
# conv8_2 ==> 3 x 3
# pool6 ==> 1 x 1
mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'pool6']
# in percent %
min_ratio = 20
max_ratio = 95
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
min_sizes = []
max_sizes = []
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(min_dim * ratio / 100.)
max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [[]] + max_sizes
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
# L2 normalize conv4_3.
normalizations = [20, -1, -1, -1, -1, -1]
# variance used to encode/decode prior bboxes.
if code_type == P.PriorBox.CENTER_SIZE:
prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
prior_variance = [0.1]
flip = True
clip = True
# Solver parameters.
# Defining which GPUs to use.
gpus = "0"
gpulist = gpus.split(",")
num_gpus = len(gpulist)
# Divide the mini-batch to different GPUs.
batch_size = 4
accum_batch_size = 32
iter_size = accum_batch_size / batch_size
solver_mode = P.Solver.CPU
device_id = 0
batch_size_per_device = batch_size
if num_gpus > 0:
batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))
iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))
solver_mode = P.Solver.GPU
device_id = int(gpulist[0])
if normalization_mode == P.Loss.NONE:
base_lr /= batch_size_per_device
elif normalization_mode == P.Loss.VALID:
base_lr *= 25. / loc_weight
elif normalization_mode == P.Loss.FULL:
# Roughly there are 2000 prior bboxes per image.
# TODO(weiliu89): Estimate the exact # of priors.
base_lr *= 2000.
# Which layers to freeze (no backward) during training.
freeze_layers = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2']
# Evaluate on whole test set.
num_test_image = 800
test_batch_size = 1
test_iter = num_test_image / test_batch_size
solver_param = {
# Train parameters
'base_lr': base_lr,
'weight_decay': 0.0005,
'lr_policy': "step",
'stepsize': 40000,
'gamma': 0.1,
'momentum': 0.9,
'iter_size': iter_size,
'max_iter': 60000,
'snapshot': 40000,
'display': 10,
'average_loss': 10,
'type': "SGD",
'solver_mode': solver_mode,
'device_id': device_id,
'debug_info': False,
'snapshot_after_train': True,
# Test parameters
'test_iter': [test_iter],
'test_interval': 10000,
'eval_type': "detection",
'ap_version': "11point",
'test_initialization': False,
}
# parameters for generating detection output.
det_out_param = {
'num_classes': num_classes,
'share_location': share_location,
'background_label_id': background_label_id,
'nms_param': {'nms_threshold': 0.45, 'top_k': 400},
'save_output_param': {
'output_directory': output_result_dir,
'output_name_prefix': "comp4_det_test_",
'output_format': "VOC",
'label_map_file': label_map_file,
'name_size_file': name_size_file,
'num_test_image': num_test_image,
},
'keep_top_k': 200,
'confidence_threshold': 0.01,
'code_type': code_type,
}
# parameters for evaluating detection results.
det_eval_param = {
'num_classes': num_classes,
'background_label_id': background_label_id,
'overlap_threshold': 0.5,
'evaluate_difficult_gt': False,
'name_size_file': name_size_file,
}
### Hopefully you don't need to change the following ###
# Check file.
check_if_exist(train_data)
check_if_exist(test_data)
check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)
# Create train net.
net = caffe.NetSpec()
net.data, net.label = CreateAnnotatedDataLayer(train_data, batch_size=batch_size_per_device,
train=True, output_label=True, label_map_file=label_map_file,
transform_param=train_transform_param, batch_sampler=batch_sampler)
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False, freeze_layers=freeze_layers)
AddExtraLayers(net, use_batchnorm)
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1)
# Create the MultiBoxLossLayer.
name = "mbox_loss"
mbox_layers.append(net.label)
net[name] = L.MultiBoxLoss(*mbox_layers, multibox_loss_param=multibox_loss_param,
loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
propagate_down=[True, True, False, False])
with open(train_net_file, 'w') as f:
print('name: "{}_train"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(train_net_file, job_dir)
# Create test net.
net = caffe.NetSpec()
net.data, net.label = CreateAnnotatedDataLayer(test_data, batch_size=test_batch_size,
train=False, output_label=True, label_map_file=label_map_file,
transform_param=test_transform_param)
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False, freeze_layers=freeze_layers)
AddExtraLayers(net, use_batchnorm)
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1)
conf_name = "mbox_conf"
if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:
reshape_name = "{}_reshape".format(conf_name)
net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
softmax_name = "{}_softmax".format(conf_name)
net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
flatten_name = "{}_flatten".format(conf_name)
net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
mbox_layers[1] = net[flatten_name]
elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:
sigmoid_name = "{}_sigmoid".format(conf_name)
net[sigmoid_name] = L.Sigmoid(net[conf_name])
mbox_layers[1] = net[sigmoid_name]
net.detection_out = L.DetectionOutput(*mbox_layers,
detection_output_param=det_out_param,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,
detection_evaluate_param=det_eval_param,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
with open(test_net_file, 'w') as f:
print('name: "{}_test"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)
# Create deploy net.
# Remove the first and last layer from test net.
deploy_net = net
with open(deploy_net_file, 'w') as f:
net_param = deploy_net.to_proto()
# Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net.
del net_param.layer[0]
del net_param.layer[-1]
net_param.name = '{}_deploy'.format(model_name)
net_param.input.extend(['data'])
net_param.input_shape.extend([
caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])
print(net_param, file=f)
shutil.copy(deploy_net_file, job_dir)
# Create solver.
solver = caffe_pb2.SolverParameter(
train_net=train_net_file,
test_net=[test_net_file],
snapshot_prefix=snapshot_prefix,
**solver_param)
with open(solver_file, 'w') as f:
print(solver, file=f)
shutil.copy(solver_file, job_dir)
max_iter = 0
# Find most recent snapshot.
for file in os.listdir(snapshot_dir):
if file.endswith(".solverstate"):
basename = os.path.splitext(file)[0]
iter = int(basename.split("{}_iter_".format(model_name))[1])
if iter > max_iter:
max_iter = iter
train_src_param = '--weights="{}" \\\n'.format(pretrain_model)
if resume_training:
if max_iter > 0:
train_src_param = '--snapshot="{}_iter_{}.solverstate" \\\n'.format(snapshot_prefix, max_iter)
if remove_old_models:
# Remove any snapshots smaller than max_iter.
for file in os.listdir(snapshot_dir):
if file.endswith(".solverstate"):
basename = os.path.splitext(file)[0]
iter = int(basename.split("{}_iter_".format(model_name))[1])
if max_iter > iter:
os.remove("{}/{}".format(snapshot_dir, file))
if file.endswith(".caffemodel"):
basename = os.path.splitext(file)[0]
iter = int(basename.split("{}_iter_".format(model_name))[1])
if max_iter > iter:
os.remove("{}/{}".format(snapshot_dir, file))
# Create job file.
with open(job_file, 'w') as f:
f.write('cd {}\n'.format(caffe_root))
f.write('./build/tools/caffe train \\\n')
f.write('--solver="{}" \\\n'.format(solver_file))
f.write(train_src_param)
if solver_param['solver_mode'] == P.Solver.GPU:
f.write('--gpu {} 2>&1 | tee {}/{}.log\n'.format(gpus, job_dir, model_name))
else:
f.write('2>&1 | tee {}/{}.log\n'.format(job_dir, model_name))
# Copy the python script to job_dir.
py_file = os.path.abspath(__file__)
shutil.copy(py_file, job_dir)
# Run the job.
os.chmod(job_file, stat.S_IRWXU)
if run_soon:
subprocess.call(job_file, shell=True)
複製程式碼
訓練命令:
python examples/ssd/ssd_pascal_indoor.py

4 測試

SSD框架中提供了測試程式碼,有C++版本和python版本

 4.1 c++版本

編譯完SSD後,C++版本的的可執行檔案存放目錄: .build_release/examples/ssd/ssd_detect.bin

測試命令  ./
.build_release/examples/ssd/ssd_detect.bin models/VGGNet/indoor/deploy.prototxt   models/VGGNet/indoor/VGG_VOC0712_SSD_300x300_iter_60000.caffemodel pictures.txt

其中pictures.txt中儲存的是待測試圖片的list

 4.2 python版本

    python 版本的測試過程參見examples/detection.ipynb

參考:
 1 Faster RCNN 訓練自己的資料集(Matlab,python版本)及製作VOC2007格式資料集
2 SSD的配置及執行