2022-05-24 17:15:02 +08:00
|
|
|
|
import json
|
2022-05-25 02:58:06 +08:00
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
|
|
|
|
from DBHelper.MongoHelperInstance import DB_GUA
|
2022-05-24 17:15:02 +08:00
|
|
|
|
|
|
|
|
|
|
2022-05-25 14:41:58 +08:00
|
|
|
|
class CommonUtils(object):
|
|
|
|
|
|
|
|
|
|
# 生成新的企业ID,如果该ID存在,则重新生成
|
|
|
|
|
@staticmethod
|
|
|
|
|
def make_new_cid():
|
|
|
|
|
|
|
|
|
|
def random_cid(num):
|
|
|
|
|
"""随机企业ID"""
|
|
|
|
|
choices = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
|
|
|
|
|
salt = ''
|
|
|
|
|
for i in range(num):
|
|
|
|
|
salt += random.choice(choices)
|
|
|
|
|
return salt
|
|
|
|
|
|
|
|
|
|
new_cid = random_cid(8)
|
|
|
|
|
|
|
|
|
|
case = DB_GUA.find_single_column(
|
2022-05-26 03:39:35 +08:00
|
|
|
|
"管理端",
|
|
|
|
|
"企业数据",
|
2022-05-25 14:41:58 +08:00
|
|
|
|
{"企业ID": new_cid},
|
|
|
|
|
"企业ID"
|
|
|
|
|
) is not None
|
|
|
|
|
while case:
|
|
|
|
|
new_cid = random_cid(8)
|
|
|
|
|
return new_cid
|
|
|
|
|
|
|
|
|
|
|
2022-05-24 17:15:02 +08:00
|
|
|
|
class ExcelParserUtil(object):
|
|
|
|
|
|
|
|
|
|
# 转换excel日期
|
|
|
|
|
@staticmethod
|
|
|
|
|
def parse_date(days):
|
|
|
|
|
delta = timedelta(days=days)
|
|
|
|
|
# 将1899-12-30转化为可以计算的时间格式并加上要转化的日期戳
|
|
|
|
|
_date = datetime.strptime('1899/12/30', '%Y/%m/%d') + delta
|
|
|
|
|
return datetime.strftime(_date, '%Y年%m月%d日')
|
|
|
|
|
|
|
|
|
|
# 删除列表中所有''
|
|
|
|
|
@staticmethod
|
|
|
|
|
def list_remove_blank(_list):
|
|
|
|
|
while '' in _list:
|
|
|
|
|
_list.remove('')
|
|
|
|
|
return _list
|
|
|
|
|
|
|
|
|
|
# 列表中的''转换为None
|
|
|
|
|
@staticmethod
|
|
|
|
|
def list_to_none(_list):
|
|
|
|
|
return list(map(lambda x: None if x == '' else x, _list))
|
|
|
|
|
|
|
|
|
|
# 列表中的float类型数据都保留2位小数
|
|
|
|
|
@staticmethod
|
|
|
|
|
def list_decimal(_list):
|
|
|
|
|
return list(map(lambda _v: round(_v, 2) if isinstance(_v, float) else _v, _list))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ExcelSheetParser(object):
|
|
|
|
|
|
|
|
|
|
def __init__(self, sheet):
|
|
|
|
|
self._sheet = sheet
|
|
|
|
|
|
|
|
|
|
# 解析企业信息
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet1(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
# 表头
|
|
|
|
|
cols_tag = self._sheet.col_values(1)
|
|
|
|
|
cols_tag.pop(0)
|
|
|
|
|
# 表值
|
|
|
|
|
cols_val = self._sheet.col_values(2)
|
|
|
|
|
cols_val.pop(0)
|
2022-05-26 03:39:35 +08:00
|
|
|
|
# 参保人数
|
|
|
|
|
cols_val[16] = int(cols_val[16]) if type(cols_val[16]) == float else cols_val[16]
|
2022-05-24 17:15:02 +08:00
|
|
|
|
# 数组中的空字符串转为None
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(cols_val)
|
|
|
|
|
# 数组中的float类型数据都保留2位小数
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
# 拼接
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
|
|
|
|
|
# 转换日期
|
|
|
|
|
data['成立日期'] = ExcelParserUtil.parse_date(data['成立日期'])
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
# 解析高管信息、股东信息
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet2(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
# 高管信息
|
|
|
|
|
cols_tag1 = self._sheet.row_values(1)[1:7]
|
|
|
|
|
n_rows1 = len(ExcelParserUtil.list_remove_blank(self._sheet.col_values(1)[2:]))
|
|
|
|
|
return_data1 = []
|
|
|
|
|
for i in range(2, n_rows1 + 2):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.row_values(i)[1:7])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
2022-05-26 03:39:35 +08:00
|
|
|
|
# 出生日期
|
|
|
|
|
list_1[3] = ExcelParserUtil.parse_date(list_1[3])
|
2022-05-24 17:15:02 +08:00
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag1, list_1))))
|
|
|
|
|
return_data1.append(data)
|
|
|
|
|
# 股东信息
|
|
|
|
|
cols_tag2 = self._sheet.row_values(1)[8:]
|
|
|
|
|
n_rows2 = len(ExcelParserUtil.list_remove_blank(self._sheet.col_values(8)[2:]))
|
|
|
|
|
return_data2 = []
|
|
|
|
|
for i in range(2, n_rows2 + 2):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.row_values(i)[8:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag2, list_1))))
|
|
|
|
|
return_data2.append(data)
|
|
|
|
|
return return_data1, return_data2
|
|
|
|
|
|
|
|
|
|
# 解析经营情况
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet3(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = self._sheet.col_values(1)[1:]
|
|
|
|
|
return_data = []
|
|
|
|
|
for i in range(2, 6):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
2022-05-26 03:39:35 +08:00
|
|
|
|
# 解析返回数据
|
|
|
|
|
parsed_data = dict()
|
|
|
|
|
# 报告期
|
|
|
|
|
parsed_data["报告期"] = list_1[0]
|
|
|
|
|
# 资产负债表
|
|
|
|
|
parsed_data["资产负债表"] = json.loads(json.dumps(dict(zip(cols_tag[43:67], list_1[43:67]))))
|
|
|
|
|
# 利润表
|
|
|
|
|
parsed_data["利润表"] = json.loads(json.dumps(dict(zip(cols_tag[28:43], list_1[28:43]))))
|
|
|
|
|
# 补充数据表
|
|
|
|
|
parsed_data["补充数据表"] = json.loads(json.dumps(dict(zip(cols_tag[1:28], list_1[1:28]))))
|
|
|
|
|
return_data.append(parsed_data)
|
2022-05-24 17:15:02 +08:00
|
|
|
|
return return_data
|
|
|
|
|
|
|
|
|
|
# 解析客户信息
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet4(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = self._sheet.row_values(1)[1:]
|
|
|
|
|
n_rows = len(ExcelParserUtil.list_remove_blank(self._sheet.col_values(1)[2:]))
|
|
|
|
|
return_data = []
|
|
|
|
|
for i in range(2, n_rows + 2):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.row_values(i)[1:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
|
|
|
|
|
data['担保起始日'] = ExcelParserUtil.parse_date(data['担保起始日'])
|
|
|
|
|
data['担保截止日'] = ExcelParserUtil.parse_date(data['担保截止日'])
|
|
|
|
|
return_data.append(data)
|
|
|
|
|
return return_data
|
|
|
|
|
|
|
|
|
|
# 解析区域分布、行业分布
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet5(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = self._sheet.row_values(1)[1:]
|
|
|
|
|
n_rows = len(ExcelParserUtil.list_remove_blank(self._sheet.col_values(1)[2:]))
|
|
|
|
|
return_data = []
|
|
|
|
|
for i in range(2, n_rows + 2):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.row_values(i)[1:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
|
|
|
|
|
return_data.append(data)
|
|
|
|
|
return return_data
|
|
|
|
|
|
|
|
|
|
# 解析担保金额分布
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet6(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = ['年度', '担保户数(少于100w)', '担保笔数(少于100w)', '担保责任余额(少于100w)',
|
|
|
|
|
'担保户数(少于500w)', '担保笔数(少于500w)', '担保责任余额(少于500w)',
|
|
|
|
|
'担保户数(500w-1000w)', '担保笔数(500w-1000w)', '担保责任余额(500w-1000w)',
|
|
|
|
|
'担保户数(1000w-3000w)', '担保笔数(1000w-3000w)', '担保责任余额(1000w-3000w)',
|
|
|
|
|
'担保户数(3000w-5000w)', '担保笔数(3000w-5000w)', '担保责任余额(3000w-5000w)',
|
|
|
|
|
'担保户数(5000w-8000w)', '担保笔数(5000w-8000w)', '担保责任余额(5000w-8000w)',
|
|
|
|
|
'担保户数(8000w-10000w)', '担保笔数(8000w-10000w)', '担保责任余额(8000w-10000w)',
|
|
|
|
|
'担保户数(大于10000w)', '担保笔数(大于10000w)', '担保责任余额(大于10000w)',
|
|
|
|
|
'担保户数合计', '担保笔数合计', '担保责任余额合计']
|
|
|
|
|
years = self._sheet.row_values(1)
|
|
|
|
|
_list = [[years[2]], [years[5]], [years[8]], [years[11]]]
|
|
|
|
|
|
|
|
|
|
for i in range(3, 12):
|
|
|
|
|
_list[0].extend(self._sheet.row_values(i)[2:5])
|
|
|
|
|
_list[1].extend(self._sheet.row_values(i)[5:8])
|
|
|
|
|
_list[2].extend(self._sheet.row_values(i)[8:11])
|
|
|
|
|
_list[3].extend(self._sheet.row_values(i)[11:14])
|
|
|
|
|
|
|
|
|
|
data = list(map(lambda x: dict(zip(cols_tag, ExcelParserUtil.list_decimal(ExcelParserUtil.list_to_none(x)))), _list))
|
|
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
# 解析资产负债表
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet7(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = self._sheet.col_values(1)[1:]
|
|
|
|
|
return_data = []
|
|
|
|
|
for i in range(2, 6):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
|
|
|
|
|
data.pop('流动资产:')
|
|
|
|
|
data.pop('非流动资产:')
|
|
|
|
|
data.pop('流动负债:')
|
|
|
|
|
data.pop('非流动负债:')
|
|
|
|
|
data.pop('所有者权益:')
|
|
|
|
|
return_data.append(data)
|
|
|
|
|
return return_data
|
|
|
|
|
|
|
|
|
|
# 解析利润表
|
2022-05-25 02:58:06 +08:00
|
|
|
|
def parse_sheet8(self):
|
2022-05-24 17:15:02 +08:00
|
|
|
|
cols_tag = list(map(lambda x: x.strip().replace('减:', '').replace('加:', ''), self._sheet.col_values(1)[1:]))
|
|
|
|
|
return_data = []
|
|
|
|
|
for i in range(2, 6):
|
|
|
|
|
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
|
|
|
|
|
list_1 = ExcelParserUtil.list_decimal(list_0)
|
|
|
|
|
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
|
|
|
|
|
return_data.append(data)
|
|
|
|
|
return return_data
|