Object Detection using Tensorflow: adhoc functions

homeMachine Learning >Image Processing

Object Detection using Tensorflow: bee and butterflies

  1. Part 1: set up tensorflow in a virtual environment
  2. adhoc functions
  3. Part 2: preparing annotation in PASCAL VOC format
  4. Part 3: preparing tfrecord files
  5. more scripts
  6. Part 4: start training our machine learning algorithm!
  7. COCO API for Windows
  8. Part 5: perform object detection

The following script contains adhoc adjustable functions. This is written with respect to this object detection tutorial for convenience. Of course they can be done manually or differently.

adhoc_functions.py

import cv2, os, sys

def move_ALL_to_train(folder_name_CLONE, folder_name_ANNOT,rotate_angle_set, folder_target):
	img_in_CLONE =  [file for file in os.listdir(folder_name_CLONE)]
	xml_in_ANNOT =  [file for file in os.listdir(folder_name_ANNOT) if file.endswith("xml")]
	for x in img_in_CLONE:
		# print(x)
		copyfile(folder_name_CLONE+"\\"+x, folder_target+"\\"+x)
	for x in img_in_CLONE:
		num_label = 0 
		for angle_in_deg in rotate_angle_set:
			rotated_name = "R" + str(num_label) +"_"+ x
			# print(rotated_name)
			img_to_rotate = cv2.imread(''.join((folder_name_CLONE,"\\",x)))
			rows,cols,_ = img_to_rotate.shape
			M = cv2.getRotationMatrix2D((cols/2,rows/2),angle_in_deg,1)
			dst = cv2.warpAffine(img_to_rotate,M,(cols,rows))
			cv2.imwrite(folder_target+"\\"+rotated_name, dst)
            # rows,cols,_ = img_to_rotate.shape
            # M = cv2.getRotationMatrix2D((cols/2,rows/2),angle_in_deg,1)
            # dst = cv2.warpAffine(img_to_rotate,M,(cols,rows))          
			num_label = num_label + 1
	for x in xml_in_ANNOT:
		# print(x)
		copyfile(folder_name_ANNOT+"\\"+x,folder_target+"\\"+x)
	return

def mass_convert_to_PASCAL_VOC_xml(annot_foldername,annot_filetype ,
	img_foldername,img_filetype):
	txt_files = [file for file in os.listdir(annot_foldername) if (os.path.isfile(os.path.join(annot_foldername,file)) and file.endswith(annot_filetype))]
	img_files = [file for file in os.listdir(img_foldername) if (os.path.isfile(os.path.join(img_foldername,file)) and file.endswith(img_filetype))]
	count=0
	for annot_filename_filetype, img_filename_filetype in zip(txt_files, img_files):
		annot_filename = annot_filename_filetype[0:len(annot_filename_filetype)-len(annot_filetype)]
		img_filename = img_filename_filetype[0:len(img_filename_filetype)-len(img_filetype)]

		# ******************************************* #
		# PERFORM THE FILE NAME MATCHING ACCORDINGLY
		# The following Boolean formula is the formula for corresponding matching strings
		formula = ( annot_filename == img_filename)
		# print(annot_filename," : ", img_filename, " --> matching names: ", name1, " : ", name2)
		# ******************************************* #
		
		if formula:
			convert_to_PASCAL_VOC_xml(annot_foldername,annot_filename,annot_filetype,
				img_foldername,img_filename,img_filetype)
			count = count+1
		else:
			print("mass_convert_to_PASCAL_VOC_xml(). Files not matching: ", annot_filename, img_filename)
			print(" + terminating...")
			break
	print("mass_convert_to_PASCAL_VOC_xml(). Number of converted files = ", count)
	return

def convert_to_PASCAL_VOC_xml(annot_foldername,annot_filename,annot_filetype, 
	img_foldername,img_filename,img_filetype):
	# assume each line in annotation file txt is
	# label\theight\twidth\txmin\tymin\txmax\tymax\n
	# print("convert_to_PASCAL_VOC_xml().")
	annot_txt = open(''.join((annot_foldername,"\\",annot_filename,annot_filetype)),'r')
	object_list = []
	
	line = annot_txt.read().split("\n")
	# print("LINE: ", line)
	for x in line:
		if x is not '':
			object_list.append(x.split('\t'))
		
	annot_txt.close()

	# print(" + start printing annotation xml")
	xml = open(''.join((annot_foldername,"\\",annot_filename,".xml")),'w')
	xml.write(''.join(("<annota","tion>\n")))
	xml.write(''.join(("\t","<fol","der>", img_foldername,"</fol","der>\n")))
	xml.write(''.join(("\t","<fil","ename>", img_filename, img_filetype , "</file","name>\n")))
	xml.write(''.join(("\t", "<pa","th>", ''.join((img_foldername,"/", img_filename, img_filetype)), "</pa","th>\n")))
	xml.write(''.join(("\t<sou","rce>\n\t\t<data","base>Unknown</dat","abase>\n\t</sou","rce>\n")))

	if len(object_list)>0:
		img_height = object_list[0][1]
		img_width = object_list[0][2]	
		xml.write(''.join(("\t<si","ze>\n")))
		xml.write(''.join(("\t\t<wi","dth>",str(img_width),"</wid","th>\n")))
		xml.write(''.join(("\t\t<hei","ght>", str(img_height), "</he","ight>\n")))
		xml.write(''.join(("\t\t<de","pth>", "3", "</de","pth>\n"))) # not sure
		xml.write(''.join(("\t</siz","e>\n")))
		xml.write(''.join(("\t\t<seg","mented>", str(0), "</segm","ented>\n")))

		for one_annot_line in object_list:
			label = one_annot_line[0]
			# img_height = one_annot_line[1]
			# img_width = one_annot_line[2]
			xmin = one_annot_line[3]
			ymin = one_annot_line[4]
			xmax = one_annot_line[5]
			ymax = one_annot_line[6]
			xml.write(''.join(("\t<obj","ect>\n")))
			xml.write(''.join(("\t\t<na","me>", label, "</nam","e>\n")))
			xml.write(''.join(("\t\t<po","se>", "Unspecified", "</po","se>\n")))
			xml.write(''.join(("\t\t<trunca","ted>", "0", "</trunc","ated>\n")))
			xml.write(''.join(("\t\t<d","ifficult>", "0", "</diff","icult>\n")))
			xml.write(''.join(("\t\t\t<bnd","box>\n")))
			xml.write(''.join(("\t\t\t\t<xm","in>",str(xmin) , "</xm","in>\n")))
			xml.write(''.join(("\t\t\t\t<ym","in>",str(ymin) , "</y","min>\n")))
			xml.write(''.join(("\t\t\t\t<xm","ax>",str(xmax) , "</xm","ax>\n")))
			xml.write(''.join(("\t\t\t\t<ym","ax>",str(ymax) , "</ym","ax>\n")))
			xml.write(''.join(("\t\t\t</bnd","box>\n")))
			xml.write(''.join(("\t</ob","ject>\n")))

	xml.write(''.join(("</annot","ation>")))
	xml.close()
	# print(" + convert_to_PASCAL_VOC_xml(). END.")
	
	return

from os import listdir
import random
from shutil import copyfile

def move_some_percent(src,tgt):
	#! change the format here accordingly!!!!

	# src="C:\\Users\\ericotjoa\\Desktop\\I2R\\keropb\\IN PROGRESS\\gg_train"
	# tgt="C:\\Users\\ericotjoa\\Desktop\\I2R\\keropb\\IN PROGRESS\\gg_test"
	
	all_files = [f for f in listdir(src) if (f.endswith(".png") or f.endswith(".jpg"))]
	count = 1
	total = len(all_files)
	# print(all_files)
	
	for i in range(len(all_files)):
		coin=random.randint(1,10)
		if coin == 1:
			copyfile("".join((src,"\\",all_files[i])),"".join((tgt,"\\",all_files[i])))
			correspxml = ''.join((all_files[i][0:len(all_files[i])-4],".xml"))
			print(correspxml)
			copyfile("".join((src,"\\",correspxml)),"".join((tgt,"\\",correspxml)))
			os.remove("".join((src,"\\",all_files[i])))
			os.remove("".join((src,"\\",correspxml)))
			count = count+1
	return

Update Log:

20181009: convert_to_PASCAL_VOC_xml() to be able to handle images without label