font = TTFont('match_07.ttf') # 打开文件 code_list = font.getGlyphOrder()[1:] # 返回的是数字对于的编码 print(code_list) # 创建图片 im = Image.new("RGB", (1800, 1000), (255, 255, 255)) dr = ImageDraw.Draw(im) font = ImageFont.truetype('match_07.ttf', 40)
# 将字体写入图片 count = 3 array_list = numpy.array_split(code_list, count) # 将列表切分成15份,以便于在图片上分行显示 for t inrange(count): new_list = [i.replace("uni", "\\u") for i in array_list[t]] text = "".join(new_list) text = text.encode('utf-8').decode('unicode_escape') dr.text((0, 50 * t), text, font=font, fill="#000000") im.save("sss.jpg")
result = pytesseract.image_to_string(im, lang="chi_sim") result = result.replace(" ", "").replace("\n", "") code_list = [i.replace("uni", "&#x") for i in code_list] code_dict = dict(zip(code_list, list(result))) return code_dict
解析出来之后,就是一个字典,然后将接口返回的data 数值进行获取就可以了
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
response = requests.get('https://match.yuanrenxue.cn/api/match/7?page={}'.format(i), cookies=cookies, headers=headers).json() data_dict = response['data'] font_file = base64.b64decode(response['woff']) withopen('match_07.ttf', 'wb') as f: f.write(font_file) code_dict = write_img() for data in data_dict: value_list = data['value'].split(' ')[:-1] lp = '' for value in value_list: num = code_dict[value] lp += num ifint(lp) > max_lp: max_lp = int(lp) print(lp) # print(value_list)