最近想训练一个通用点选模型,样本不可能自己去打标,就自己写了一个点选样本生成
需要调整的参数
getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12
pos是框大小调整 xpos是左右调整 ypos是上下调整,需要手动一个调整一个完美的位置后生成,下面有调试代码
生成代码
import os
# import cv2
from PIL import ImageFont, ImageDraw, Image
import random
import uuid
background = os.listdir('./background')
with open('./text.txt',encoding='utf-8',mode='r') as f:
text = f.readlines()
text = [i.replace('\n','') for i in text]
resize_width = 300
resize_height = 200
getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12
# need
need = 5000
model = 'train'
font_loader = ImageFont.truetype('./fonts/' + getOneFont, 45)
fontname = getOneFont.split('.')[0]
i = 0
isMakes = False
while True:
try:
if i > need:
break
i += 1
getOneBackground = random.choice(background)
# 读取背景图片
img = Image.open('./background/' + getOneBackground)
img = img.resize((resize_width, resize_height))
data = []
oldx1y1wh = []
floating_x1 = [
[
10,60
],
[
70,130
],
[
140,190
],
[
200,250
]
]
# 10 -180 4个
wenzi = ''
for k in range(4):
# min 10 - 250
# x1 = random.randint(10, 250)
x1 = random.randint(floating_x1[k][0], floating_x1[k][1])
# min 10 - 180
y1 = random.randint(15, 170)
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
# 生成随机文字
getOne = random.choice(text)
# 添加到图片中
bbox = font_loader.getmask(getOne).getbbox()
# 计算文本宽度和高度
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# 添加文字
draw = ImageDraw.Draw(img)
draw.text((x1, y1), getOne, (r, g, b), font=font_loader)
wenzi += getOne
# 计算yolo格式的x y w h
# x = (x1 + text_width / 2) / resize_width
# y = (y1 + text_height / 2) / resize_height
# w = text_width / resize_width
# h = text_height / resize_height
# 框稍微扩大
x = (x1 - xpos + text_width/ 2) / resize_width
y = (y1 - ypos + text_height / 2) / resize_height
w = (text_width + pos) / resize_width
h = (text_height + pos) / resize_height
data.append([0, x, y, w, h])
if not isMakes:
if os.path.exists('./datasets/images/'+model):
isMakes = True
else:
os.makedirs('./datasets/images/'+model)
os.makedirs('./datasets/labels/'+model)
isMakes = True
uuid4 = str(uuid.uuid4()) + wenzi
img_path = './datasets/images/'+model+'/' + uuid4 + '.jpg'
txt_path = './datasets/labels/'+model+'/' + uuid4 + '.txt'
# img_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\JPEGImages\\' + uuid4 + '.jpg'
# txt_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\labels\\' + uuid4 + '.txt'
img.save(img_path)
with open(txt_path,encoding='utf-8',mode='w') as f:
for n in data:
f.write(' '.join([str(j) for j in n]) + '\n')
except Exception as e:
print(e)
if os.path.exists(img_path):
os.remove(img_path)
if os.path.exists(txt_path):
os.remove(txt_path)
print(f"generate {i} images", end='\r')
调试位置代码
import os
import cv2
from PIL import ImageFont, ImageDraw, Image
import random
import uuid
background = os.listdir('./background')
font = os.listdir('./font')
with open('./text.txt',encoding='utf-8',mode='r') as f:
text = f.readlines()
text = [i.replace('\n','') for i in text]
resize_width = 300
resize_height = 200
getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12
try:
# getOneFont = random.choice(font)
font_loader = ImageFont.truetype('./fonts/' + getOneFont, 45)
fontname = getOneFont.split('.')[0]
getOneBackground = random.choice(background)
# 读取背景图片
img = Image.open('./background/' + getOneBackground)
img = img.resize((resize_width, resize_height))
data = []
oldx1y1wh = []
floating_x1 = [
[
10,60
],
[
70,130
],
[
140,190
],
[
200,250
]
]
# 10 -180 4个
wenzi = ''
for k in range(4):
# min 10 - 250
# x1 = random.randint(10, 250)
x1 = random.randint(floating_x1[k][0], floating_x1[k][1])
# min 10 - 180
y1 = random.randint(15, 170)
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
# 生成随机文字
getOne = random.choice(text)
# 添加到图片中
bbox = font_loader.getmask(getOne).getbbox()
# 计算文本宽度和高度
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# 添加文字
draw = ImageDraw.Draw(img)
draw.text((x1, y1), getOne, (r, g, b), font=font_loader)
wenzi += getOne
x = (x1 - xpos + text_width/ 2) / resize_width
y = (y1 - ypos + text_height / 2) / resize_height
w = (text_width + pos) / resize_width
h = (text_height + pos) / resize_height
data.append([0, x, y, w, h])
uuid4 = 'testGenerate'
img_path = './testGenerate/' + uuid4 + '.jpg'
txt_path = './testGenerate/' + uuid4 + '.txt'
# img_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\JPEGImages\\' + uuid4 + '.jpg'
# txt_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\labels\\' + uuid4 + '.txt'
img.save(img_path)
with open(txt_path,encoding='utf-8',mode='w') as f:
for n in data:
f.write(' '.join([str(j) for j in n]) + '\n')
except Exception as e:
print(e)
if os.path.exists(img_path):
os.remove(img_path)
if os.path.exists(txt_path):
os.remove(txt_path)
# 使用opencv读取图片,并对标签进行绘制
img = cv2.imread(img_path)
with open(txt_path,encoding='utf-8',mode='r') as f:
for line in f.readlines():
line = line.replace('\n','')
line = line.split(' ')
x = float(line[1])
y = float(line[2])
w = float(line[3])
h = float(line[4])
x1 = int((x - w / 2) * resize_width)
y1 = int((y - h / 2) * resize_height)
x2 = int((x + w / 2) * resize_width)
y2 = int((y + h / 2) * resize_height)
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2, 1)
cv2.putText(img, line[0], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
HACKED BY H4VK