tfse-app-api-v0.2/common/text_recognition.py

109 lines
3.0 KiB
Python

"""
图片文字识别工具
使用外部接口,用于营业执照、身份证图片的文字识别
"""
import json
import base64
import requests
from urllib.request import Request, urlopen
def get_img(img_file):
data = img_file.read()
try:
encode_str = str(base64.b64encode(data), 'utf-8')
except TypeError:
encode_str = base64.b64encode(data)
return encode_str
# 将解析出来的时间数据加上年月日
def date_trans_1(_time):
year = _time[:4]
month = _time[4:6]
day = _time[6:8]
return year + '' + month + '' + day + ''
# 将解析出来的时间数据加上年月日
def date_trans_2(_time):
year = _time[:4]
month = _time[4:6]
day = _time[6:8]
return year + '.' + month + '.' + day
def id_card_recognition(**kwargs):
url = 'http://dm-51.data.aliyun.com/rest/160601/ocr/ocr_idcard.json'
app_code = "6d6c3146677c40bf9e3e1e2aa73404cd"
# 请求头
headers = {
'Authorization': 'APPCODE %s' % app_code,
'Content-Type': 'application/json; charset=UTF-8'
}
# 请求体
body = {
"configure": {'side': kwargs['side']},
"image": get_img(kwargs['image'])
}
try:
params = json.dumps(body).encode(encoding='UTF8')
req = Request(url, params, headers)
r = urlopen(req)
html = r.read()
res = json.loads(html.decode("utf8"))
if res['is_fake']:
return '伪造身份证'
data = dict()
if kwargs['side'] == 'face':
data['姓名'] = res['name']
data['性别'] = res['sex']
data['民族'] = res['nationality']
data['出生'] = "{}{}{}".format(res['birth'][:4], res['birth'][4:6], res['birth'][6:])
data['住址'] = res['address']
data['身份证号码'] = res['num']
else:
data['签发机关'] = res['issue']
data['有效期限'] = '{}-{}'.format(date_trans_2(res['start_date']), date_trans_2(res['end_date']))
return data
except Exception:
return '识别错误'
def business_license_recognition(**kwargs):
url = 'https://dm-58.data.aliyun.com/rest/160601/ocr/ocr_business_license.json'
app_code = "6d6c3146677c40bf9e3e1e2aa73404cd"
headers = {
'Authorization': 'APPCODE %s' % app_code,
'Content-Type': 'application/json; charset=UTF-8'
}
body = {
"image": get_img(kwargs['image'])
}
response = requests.post(url=url, data=json.dumps(body), headers=headers)
try:
res = json.loads(response.text)
data = dict()
data['统一社会信用代码'] = res['reg_num']
data['名称'] = res['name']
data['注册资本'] = res['capital']
data['类型'] = res['type']
data['成立日期'] = date_trans_1(res['establish_date'])
data['法定代表人'] = res['person']
data['经营范围'] = res['business']
data['住所'] = res['address']
return data
except Exception:
return '识别错误'