最近想训练一个通用点选模型,样本不可能自己去打标,就自己写了一个点选样本生成

需要调整的参数

getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12

pos是框大小调整 xpos是左右调整 ypos是上下调整,需要手动一个调整一个完美的位置后生成,下面有调试代码

生成代码

import os
# import cv2
from PIL import ImageFont, ImageDraw, Image
import random
import uuid

background = os.listdir('./background')
with open('./text.txt',encoding='utf-8',mode='r') as f:
    text = f.readlines()
    text = [i.replace('\n','') for i in text]
resize_width = 300
resize_height = 200

getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12

# need
need = 5000


model = 'train'

font_loader = ImageFont.truetype('./fonts/' + getOneFont, 45)
fontname = getOneFont.split('.')[0]

i = 0
isMakes = False
while True:
    try:
        if i > need:
            break
        i += 1

        getOneBackground = random.choice(background)
        # 读取背景图片
        img = Image.open('./background/' + getOneBackground)

        img = img.resize((resize_width, resize_height))

        data = []
        oldx1y1wh = []

        floating_x1 = [
            [
                10,60
            ],
            [
                70,130
            ],
            [
                140,190
            ],
            [
                200,250
            ]
        ]

        # 10 -180 4个

        wenzi = ''
        for k in range(4):
            # min 10 - 250
            # x1 = random.randint(10, 250)
            x1 = random.randint(floating_x1[k][0], floating_x1[k][1])
            # min 10 - 180
            y1 = random.randint(15, 170)


            r = random.randint(0, 255)
            g = random.randint(0, 255)
            b = random.randint(0, 255)

            # 生成随机文字
            getOne = random.choice(text)

            # 添加到图片中
            bbox = font_loader.getmask(getOne).getbbox()
            # 计算文本宽度和高度
            text_width = bbox[2] - bbox[0]
            text_height = bbox[3] - bbox[1]

        #     添加文字
            draw = ImageDraw.Draw(img)
            draw.text((x1, y1), getOne, (r, g, b), font=font_loader)
            wenzi += getOne

        #     计算yolo格式的x y w h
        #     x = (x1 + text_width / 2) / resize_width
        #     y = (y1 + text_height / 2) / resize_height
        #     w = text_width / resize_width
        #     h = text_height / resize_height

            # 框稍微扩大
            x = (x1 - xpos + text_width/ 2) / resize_width
            y = (y1 - ypos + text_height / 2) / resize_height
            w = (text_width + pos) / resize_width
            h = (text_height + pos) / resize_height

            data.append([0, x, y, w, h])




        if not isMakes:
            if os.path.exists('./datasets/images/'+model):
                isMakes = True
            else:
                os.makedirs('./datasets/images/'+model)
                os.makedirs('./datasets/labels/'+model)
                isMakes = True

        uuid4 = str(uuid.uuid4()) + wenzi
        img_path = './datasets/images/'+model+'/' + uuid4 + '.jpg'
        txt_path = './datasets/labels/'+model+'/' + uuid4 + '.txt'
        # img_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\JPEGImages\\' + uuid4 + '.jpg'
        # txt_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\labels\\' + uuid4 + '.txt'

        img.save(img_path)
        with open(txt_path,encoding='utf-8',mode='w') as f:
            for n in data:
                f.write(' '.join([str(j) for j in n]) + '\n')



    except Exception as e:
        print(e)
        if os.path.exists(img_path):
            os.remove(img_path)
        if os.path.exists(txt_path):
            os.remove(txt_path)



    print(f"generate {i} images", end='\r')

调试位置代码

import os
import cv2
from PIL import ImageFont, ImageDraw, Image
import random
import uuid

background = os.listdir('./background')
font = os.listdir('./font')
with open('./text.txt',encoding='utf-8',mode='r') as f:
    text = f.readlines()
    text = [i.replace('\n','') for i in text]
resize_width = 300
resize_height = 200
getOneFont = '铁蒺藜体.ttf'
pos = 10
xpos = 0
ypos = -12
try:
    # getOneFont = random.choice(font)
    font_loader = ImageFont.truetype('./fonts/' + getOneFont, 45)
    fontname = getOneFont.split('.')[0]

    getOneBackground = random.choice(background)
    # 读取背景图片
    img = Image.open('./background/' + getOneBackground)

    img = img.resize((resize_width, resize_height))

    data = []
    oldx1y1wh = []

    floating_x1 = [
        [
            10,60
        ],
        [
            70,130
        ],
        [
            140,190
        ],
        [
            200,250
        ]
    ]

    # 10 -180 4个

    wenzi = ''
    for k in range(4):
        # min 10 - 250
        # x1 = random.randint(10, 250)
        x1 = random.randint(floating_x1[k][0], floating_x1[k][1])
        # min 10 - 180
        y1 = random.randint(15, 170)


        r = random.randint(0, 255)
        g = random.randint(0, 255)
        b = random.randint(0, 255)

        # 生成随机文字
        getOne = random.choice(text)

        # 添加到图片中
        bbox = font_loader.getmask(getOne).getbbox()
        # 计算文本宽度和高度
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]

    #     添加文字
        draw = ImageDraw.Draw(img)
        draw.text((x1, y1), getOne, (r, g, b), font=font_loader)
        wenzi += getOne

        x = (x1 - xpos + text_width/ 2) / resize_width
        y = (y1 - ypos + text_height / 2) / resize_height
        w = (text_width + pos) / resize_width
        h = (text_height + pos) / resize_height

        data.append([0, x, y, w, h])



    uuid4 = 'testGenerate'
    img_path = './testGenerate/' + uuid4 + '.jpg'
    txt_path = './testGenerate/' + uuid4 + '.txt'
    # img_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\JPEGImages\\' + uuid4 + '.jpg'
    # txt_path = r'E:\软件备份\yolo综合工具X10212\yolo综合工具X1021\项目集合\123\data\labels\\' + uuid4 + '.txt'

    img.save(img_path)
    with open(txt_path,encoding='utf-8',mode='w') as f:
        for n in data:
            f.write(' '.join([str(j) for j in n]) + '\n')



except Exception as e:
    print(e)
    if os.path.exists(img_path):
        os.remove(img_path)
    if os.path.exists(txt_path):
        os.remove(txt_path)


# 使用opencv读取图片,并对标签进行绘制
img = cv2.imread(img_path)
with open(txt_path,encoding='utf-8',mode='r') as f:
    for line in f.readlines():
        line = line.replace('\n','')
        line = line.split(' ')
        x = float(line[1])
        y = float(line[2])
        w = float(line[3])
        h = float(line[4])
        x1 = int((x - w / 2) * resize_width)
        y1 = int((y - h / 2) * resize_height)
        x2 = int((x + w / 2) * resize_width)
        y2 = int((y + h / 2) * resize_height)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2, 1)
        cv2.putText(img, line[0], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()