2021-12-01 01:18:23 +08:00
|
|
|
|
"""
|
|
|
|
|
图片文字识别工具
|
|
|
|
|
使用外部接口,用于营业执照、身份证图片的文字识别
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import base64
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
from urllib.request import Request, urlopen
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_img(img_file):
|
|
|
|
|
data = img_file.read()
|
|
|
|
|
try:
|
|
|
|
|
encode_str = str(base64.b64encode(data), 'utf-8')
|
|
|
|
|
except TypeError:
|
|
|
|
|
encode_str = base64.b64encode(data)
|
|
|
|
|
return encode_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将解析出来的时间数据加上年月日
|
|
|
|
|
def date_trans_1(_time):
|
|
|
|
|
year = _time[:4]
|
|
|
|
|
month = _time[4:6]
|
|
|
|
|
day = _time[6:8]
|
|
|
|
|
return year + '年' + month + '月' + day + '日'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将解析出来的时间数据加上年月日
|
|
|
|
|
def date_trans_2(_time):
|
|
|
|
|
year = _time[:4]
|
|
|
|
|
month = _time[4:6]
|
|
|
|
|
day = _time[6:8]
|
|
|
|
|
return year + '.' + month + '.' + day
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def id_card_recognition(**kwargs):
|
|
|
|
|
url = 'http://dm-51.data.aliyun.com/rest/160601/ocr/ocr_idcard.json'
|
|
|
|
|
app_code = "6d6c3146677c40bf9e3e1e2aa73404cd"
|
|
|
|
|
|
|
|
|
|
# 请求头
|
|
|
|
|
headers = {
|
|
|
|
|
'Authorization': 'APPCODE %s' % app_code,
|
2021-12-08 12:45:01 +08:00
|
|
|
|
'Content-Type': 'application/template; charset=UTF-8'
|
2021-12-01 01:18:23 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 请求体
|
|
|
|
|
body = {
|
|
|
|
|
"configure": {'side': kwargs['side']},
|
|
|
|
|
"image": get_img(kwargs['image'])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
params = json.dumps(body).encode(encoding='UTF8')
|
|
|
|
|
req = Request(url, params, headers)
|
|
|
|
|
r = urlopen(req)
|
|
|
|
|
html = r.read()
|
|
|
|
|
res = json.loads(html.decode("utf8"))
|
|
|
|
|
|
|
|
|
|
if res['is_fake']:
|
|
|
|
|
return '伪造身份证'
|
|
|
|
|
|
|
|
|
|
data = dict()
|
|
|
|
|
if kwargs['side'] == 'face':
|
|
|
|
|
data['姓名'] = res['name']
|
|
|
|
|
data['性别'] = res['sex']
|
|
|
|
|
data['民族'] = res['nationality']
|
|
|
|
|
data['出生'] = "{}年{}月{}日".format(res['birth'][:4], res['birth'][4:6], res['birth'][6:])
|
|
|
|
|
data['住址'] = res['address']
|
|
|
|
|
data['身份证号码'] = res['num']
|
|
|
|
|
else:
|
|
|
|
|
data['签发机关'] = res['issue']
|
|
|
|
|
data['有效期限'] = '{}-{}'.format(date_trans_2(res['start_date']), date_trans_2(res['end_date']))
|
|
|
|
|
return data
|
|
|
|
|
except Exception:
|
|
|
|
|
return '识别错误'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def business_license_recognition(**kwargs):
|
|
|
|
|
url = 'https://dm-58.data.aliyun.com/rest/160601/ocr/ocr_business_license.json'
|
|
|
|
|
app_code = "6d6c3146677c40bf9e3e1e2aa73404cd"
|
|
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
'Authorization': 'APPCODE %s' % app_code,
|
2021-12-08 12:45:01 +08:00
|
|
|
|
'Content-Type': 'application/template; charset=UTF-8'
|
2021-12-01 01:18:23 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
body = {
|
|
|
|
|
"image": get_img(kwargs['image'])
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
response = requests.post(url=url, data=json.dumps(body), headers=headers)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
res = json.loads(response.text)
|
|
|
|
|
data = dict()
|
|
|
|
|
data['统一社会信用代码'] = res['reg_num']
|
|
|
|
|
data['名称'] = res['name']
|
|
|
|
|
data['注册资本'] = res['capital']
|
|
|
|
|
data['类型'] = res['type']
|
|
|
|
|
data['成立日期'] = date_trans_1(res['establish_date'])
|
|
|
|
|
data['法定代表人'] = res['person']
|
|
|
|
|
data['经营范围'] = res['business']
|
|
|
|
|
data['住所'] = res['address']
|
|
|
|
|
return data
|
|
|
|
|
except Exception:
|
|
|
|
|
return '识别错误'
|