在这篇博文中手把手教你如何去分割验证,然后进行识别。
一:下载验证码
验证码分析,图片上有折线,验证码有数字,有英文字母大小写,分类的时候需要更多的样本,验证码的字母是彩色的,图片上有雪花等噪点,因此识别改验证码难度较大
二:二值化和降噪:
三: 切割:
四:分类:
五: 测试识别率
六:总结:
综合识别率在70%左右,对于这个识别率我觉得还是挺高的,因为这个验证码的识别难度还是很大
代码:
一. 下载图片:
#-*-coding:utf-8-*- import requestsdef spider():url = "https://www.epailive.com/basic/captcha?ran=0.22070346581876787"for i in range(1, 101):print("正在下载的张数是:",i)with open("./1__get_image/{}.png".format(i), "wb") as f:f.write(requests.get(url).content) spider()
二: 验证码二值化和降噪:
#-*-coding:utf-8-*- # coding:utf-8 import sys, os from PIL import Image, ImageDraw# 二值数组 t2val = {}def twoValue(image, G):for y in range(0, image.size[1]):for x in range(0, image.size[0]):g = image.getpixel((x, y))if g > G:t2val[(x, y)] = 1else:t2val[(x, y)] = 0# 根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0 <N <8),当A的RGB值与周围8个点的RGB相等数小于N时,此点为噪点 # G: Integer 图像二值化阀值 # N: Integer 降噪率 0 <N <8 # Z: Integer 降噪次数 # 输出 # 0:降噪成功 # 1:降噪失败 def clearNoise(image, N, Z):for i in range(0, Z):t2val[(0, 0)] = 1t2val[(image.size[0] - 1, image.size[1] - 1)] = 1for x in range(1, image.size[0] - 1):for y in range(1, image.size[1] - 1):nearDots = 0L = t2val[(x, y)]if L == t2val[(x - 1, y - 1)]:nearDots += 1if L == t2val[(x - 1, y)]:nearDots += 1if L == t2val[(x - 1, y + 1)]:nearDots += 1if L == t2val[(x, y - 1)]:nearDots += 1if L == t2val[(x, y + 1)]:nearDots += 1if L == t2val[(x + 1, y - 1)]:nearDots += 1if L == t2val[(x + 1, y)]:nearDots += 1if L == t2val[(x + 1, y + 1)]:nearDots += 1if nearDots < N:t2val[(x, y)] = 1def saveImage(filename, size):image = Image.new("1", size)draw = ImageDraw.Draw(image)for x in range(0, size[0]):for y in range(0, size[1]):draw.point((x, y), t2val[(x, y)])image.save(filename)for i in range(1, 101):path = "1__get_image/" + str(i) + ".png"image = Image.open(path)image = image.convert('L')twoValue(image, 198)clearNoise(image, 3, 1)path1 = "2__erzhihua_jiangzao/" + str(i) + ".jpg"saveImage(path1, image.size)
三: 切割验证码:
#-*-coding:utf-8-*-from PIL import Imagedef smartSliceImg(img, outDir, ii,count=4, p_w=3):''':param img::param outDir::param count: 图片中有多少个图片:param p_w: 对切割地方多少像素内进行判断:return:'''w, h = img.sizepixdata = img.load()eachWidth = int(w / count)beforeX = 0for i in range(count):allBCount = []nextXOri = (i + 1) * eachWidthfor x in range(nextXOri - p_w, nextXOri + p_w):if x >= w:x = w - 1if x < 0:x = 0b_count = 0for y in range(h):if pixdata[x, y] == 0:b_count += 1allBCount.append({'x_pos': x, 'count': b_count})sort = sorted(allBCount, key=lambda e: e.get('count'))nextX = sort[0]['x_pos']box = (beforeX, 0, nextX, h)img.crop(box).save(outDir + str(ii) + "_" + str(i) + ".png")beforeX = nextXfor ii in range(1, 101):path = "2__erzhihua_jiangzao/" + str(ii) + ".jpg"img = Image.open(path)outDir = '3__qiege/'smartSliceImg(img, outDir, ii,count=4, p_w=3)
四: 训练:
#-*-coding:utf-8-*-import numpy as np import os import timefrom PIL import Image from sklearn.externals import joblib from sklearn.neighbors import KNeighborsClassifierdef load_dataset():X = []y = []for i in "23456789ABVDEFGHKMNPRSTUVWXYZ":target_path = "fenlei/" + iprint(target_path)for title in os.listdir(target_path):pix = np.asarray(Image.open(os.path.join(target_path, title)).convert('L'))X.append(pix.reshape(25 * 30))y.append(target_path.split('/')[-1])X = np.asarray(X)y = np.asarray(y)return X, ydef check_everyone(model):pre_list = []y_list = []for i in "23456789ABCDEFGHKMNPRSTUVWXYZ":part_path = "part/" + ifor title in os.listdir(part_path):pix = np.asarray(Image.open(os.path.join(part_path, title)).convert('L'))pix = pix.reshape(25 * 30)pre_list.append(pix)y_list.append(part_path.split('/')[-1])pre_list = np.asarray(pre_list)y_list = np.asarray(y_list)result_list = model.predict(pre_list)acc = 0for i in result_list == y_list:print(result_list,y_list,)if i == np.bool(True):acc += 1print(acc, acc / len(result_list))X, y = load_dataset() knn = KNeighborsClassifier() knn.fit(X, y) joblib.dump(knn, 'yipai.model') check_everyone(knn)
五:模型测试:
# -*- coding: utf-8 -*-import numpy as np from PIL import Image from sklearn.externals import joblib import ostarget_path = "1__get_image/" source_result = [] for title in os.listdir(target_path):source_result.append(title.replace('.png',''))def predict(model):predict_result = []for q in range(1,101):pre_list = []y_list = []for i in range(0,4):part_path = "part1/" + str(q) + "_" + str(i) + ".png"# print(part_path)pix = np.asarray(Image.open(os.path.join(part_path)))pix = pix.reshape(25 * 30)pre_list.append(pix)y_list.append(part_path.split('/')[-1])pre_list = np.asarray(pre_list)y_list = np.asarray(y_list)result_list = model.predict(pre_list)print(result_list,q)predict_result.append(str(result_list[0] + result_list[1] + result_list[2] + result_list[3]))return predict_resultmodel = joblib.load('yipai.model') predict_result = predict(model) # print(source_result) # print(predict_result)