Merge remote-tracking branch 'origin/ps' into wsc

This commit is contained in:
王思川 2022-06-07 13:30:45 +08:00
commit 8e291c49a0
7 changed files with 568 additions and 187 deletions

View File

@ -5,3 +5,4 @@ DB_TYC = MongoHelper("tyc")
# 股交项目数据库
DB_GUA = MongoHelper("guarantee")

View File

@ -1,9 +1,16 @@
import xlrd
import json
import os
import re
from DBHelper.MongoHelperInstance import DB_GUA
import pandas as pd
import requests
import xlrd
import cpca
from DBHelper.MongoHelperInstance import DB_GUA, DB_TYC
from Modules.Company.CompanyObject import BasicBusinessInfo, FinancialData, CustomerInfo, RegionalDistribution, \
IndustryDistribution, GuaranteedBalanceDistribution
IndustryDistribution, GuaranteedBalanceDistribution, GuaranteeInfo
from Modules.Company.CompanyUtils import CompanyUtils, ExcelSheetParser
from Modules.Company.static.province_map import p_map
from Utils.ObjUtil import SpecObject
@ -14,25 +21,169 @@ class BasicBusinessInfoImpl(object):
"""解析excel模板数据 创建公司信息"""
new_cid = CompanyUtils.make_new_cid()
work_book = xlrd.open_workbook(file_contents=file.read())
# work_book = xlrd.open_workbook(file_contents=file.read())
work_book = xlrd.open_workbook(file)
basic_business_info = BasicBusinessInfo()
financial_data_list = list()
customer_info_list = list()
guarantee_info_list = list()
records_dict = dict()
region_distribution_list = list()
industry_distribution_list = list()
guarantee_distribution_list = list()
for sheet in work_book.sheets():
basic = ExcelSheetParser(sheet=work_book.sheet_by_name('企业信息')).parse_sheet1()
if sheet.name == '企业信息':
data = ExcelSheetParser(sheet=sheet).parse_sheet1()
def get_basic(name):
# 获取tyc工商信息
def basic_info_api():
"""工商信息接口"""
url = "http://api.fecribd.com/api/tyc/basic_info"
headers = {'token': 'uzdq51N4!I0%HY4sCaQ!aeCSIDIVIdAM'}
parameter = {"企业名称": name}
res = requests.post(url=url, headers=headers, data=json.dumps(parameter))
return res
record = DB_TYC.find_single_data(
'公司背景',
'基本信息',
{"企业名称": name},
['企业名称', '基本信息']
)
if not record:
res_ = basic_info_api()
if res_.status_code == 200:
record = DB_TYC.find_single_data(
'公司背景',
'基本信息',
{"企业名称": name},
['企业名称', '基本信息']
)
return record
basic_data = get_basic(basic['企业名称'])
basic_business_info.cid = new_cid
basic_business_info.set_instance(data=data)
basic['企业类型'] = basic_data['基本信息']['companyOrgType']
basic['联系电话'] = str(basic['联系电话']) if isinstance(basic['联系电话'], float) else basic['联系电话']
basic['统一社会信用代码'] = basic_data['基本信息']['companyOrgType']
basic['企业性质'] = '国有企业' if '国企' in basic_data['基本信息']['tags'] else '民营企业'
try:
basic['注册资本(万元)'] = float(basic['注册资本(万元)']) if basic['注册资本(万元)'] else float(
(basic_data['基本信息']['regCapital']).replace('万人民币', ''))
basic['实缴资本(万元)'] = float(basic['实缴资本(万元)']) if basic['实缴资本(万元)'] else float(
(basic_data['基本信息']['actualCapital']).replace('万人民币', ''))
except AttributeError:
basic['注册资本(万元)'] = None
basic['实缴资本(万元)'] = None
basic['参保人数'] = int(basic['参保人数']) if basic['参保人数'] else basic['参保人数']
basic_business_info.set_instance(data=basic)
elif sheet.name == '高管股东信息':
def get_shareholders(name):
# 获取tyc股东信息
def basic_info_api():
"""工商信息接口"""
url = "http://api.fecribd.com/api/tyc/shareholders_info"
headers = {'token': 'uzdq51N4!I0%HY4sCaQ!aeCSIDIVIdAM'}
parameter = {"企业名称": name}
res = requests.post(url=url, headers=headers, data=json.dumps(parameter))
return res
basic_info = DB_TYC.find_single_data(
'公司背景',
'基本信息',
{"企业名称": name},
['基本信息']
)
share_list = list()
if '上市' in basic_info['基本信息']['companyOrgType']:
shareholder_info = DB_TYC.find_single_data(
'公司背景',
'十大股东',
{"企业名称": name},
['十大股东']
)
if not shareholder_info:
res_ = basic_info_api()
if res_.status_code == 200:
shareholder_info = DB_TYC.find_single_data(
'公司背景',
'十大股东',
{"企业名称": name},
['十大股东']
)
# 遍历数据修改格式
if shareholder_info['十大股东']:
for share in shareholder_info['十大股东']['holderList']:
share_dict = dict()
share_dict['股东名称'] = share['name']
share_dict['持股比例'] = share['proportion']
share_dict['股东性质'] = '-'
share_dict['持股数量(股)'] = share['holdingNum']
share_list.append(share_dict)
else:
shareholder_info = DB_TYC.find_single_data(
'公司背景',
'企业股东',
{"企业名称": name},
['企业股东']
)
if not shareholder_info:
res_ = basic_info_api()
if res_.status_code == 200:
shareholder_info = DB_TYC.find_single_data(
'公司背景',
'企业股东',
{"企业名称": name},
['企业股东']
)
# 遍历数据修改格式
for share in shareholder_info['企业股东']['result']:
share_dict = dict()
share_dict['股东名称'] = share['name']
share_dict['持股比例'] = share['capital'][0]['percent']
share_dict['股东性质'] = '-'
share_dict['持股数量(股)'] = share['capital'][0]['amomon']
share_list.append(share_dict)
return share_list
def get_education(name):
"""
获取企业员工学历信息
1.根据名称获取该公司对应数据文档
2.获取年份最大的excel
3.获取机构人员情况sheet
4.获取其实学历信息
"""
edu_dict = dict()
a_path = f'E:\Project\guarantee-admin-api-v0.2\Modules\Company\补充数据'
path = os.listdir(a_path)
for p in path:
company = re.sub(r'[0-9]+.', '', p)
if company == name:
f_path = a_path + '\\' + p
c_path = os.listdir(f_path)
file_path = f_path + '\\' + c_path[-1]
edu_data = pd.read_excel(file_path, sheet_name='机构人员情况')
edu_data = edu_data.dropna(axis=1, how='all')
edu_data = edu_data.dropna(axis=0, how='any')
edu_data.loc[:, '融资担保公司及人员情况'] = edu_data['融资担保公司及人员情况'].str.strip()
edu_data = edu_data.set_index('融资担保公司及人员情况')
edu_dict['本科'] = int(edu_data.loc['本科', 'Unnamed: 2'])
edu_dict['研究生'] = int(edu_data.loc['其中:研究生', 'Unnamed: 2'])
edu_dict['大专及以下'] = int(edu_data.loc['大专及以下', 'Unnamed: 2'])
return edu_dict
tyc_shareholders = get_shareholders(basic['企业名称'])
employee_education = get_education(basic['企业名称'])
executives, shareholders = ExcelSheetParser(sheet=sheet).parse_sheet2()
shareholders = shareholders if shareholders else tyc_shareholders
basic_business_info.shareholder_information = list()
basic_business_info.executive_information = list()
@ -50,98 +201,180 @@ class BasicBusinessInfoImpl(object):
)
basic_business_info.shareholder_information.append(shareholder)
elif sheet.name == '经营情况':
items = ExcelSheetParser(sheet=sheet).parse_sheet3()
for item in items:
financial_data = FinancialData()
financial_data.cid = new_cid
financial_data.company_name = basic_business_info.company_name
financial_data.report_period = item["报告期"]
financial_data.appendix_sheet = SpecObject.set_specify_instance(
instance=FinancialData.AppendixSheet,
data=item["补充数据表"]
)
financial_data_list.append(financial_data)
basic_business_info.employee_education = SpecObject.set_specify_instance(
instance=BasicBusinessInfo.EmployeeEducation,
data=employee_education
)
elif sheet.name == '客户信息':
items = ExcelSheetParser(sheet=sheet).parse_sheet4()
# 企业信息
data = ExcelSheetParser(sheet=work_book.sheet_by_name('企业信息')).parse_sheet1()
# 客户信息
client_info = ExcelSheetParser(sheet=work_book.sheet_by_name('客户信息')).parse_sheet4()
# 区域分布
area_info = ExcelSheetParser(sheet=work_book.sheet_by_name('区域分布')).parse_sheet5()
# 行业分布
industry_info = ExcelSheetParser(sheet=work_book.sheet_by_name('行业分布')).parse_sheet5()
# 担保金额分布
guarantee_amount = ExcelSheetParser(sheet=work_book.sheet_by_name('担保金额分布')).parse_sheet6()
for item in items:
customer_info = CustomerInfo()
customer_info.cid = new_cid
customer_info.company_name = basic_business_info.company_name
customer_info.set_instance(data=item)
customer_info_list.append(customer_info)
def tyc_basic_info():
"""调用天眼查api保存工商信息"""
elif sheet.name == '区域分布':
items = ExcelSheetParser(sheet=sheet).parse_sheet5()
def tyc_api(name):
"""天眼查接口调用"""
url = "http://api.fecribd.com/api/tyc/basic_info"
headers = {'token': 'uzdq51N4!I0%HY4sCaQ!aeCSIDIVIdAM'}
parameter = {"企业名称": name}
res = requests.post(url=url, headers=headers, data=json.dumps(parameter))
return res
for item in items:
region_distribution = RegionalDistribution()
region_distribution.cid = new_cid
region_distribution.company_name = basic_business_info.company_name
region_distribution.set_instance(data=item)
region_distribution_list.append(region_distribution)
for client in client_info:
company = client['客户名称']
# 查询数据是否存在
record_ = DB_TYC.find_single_data(
'公司背景',
'基本信息',
{"企业名称": company},
['企业名称', '基本信息']
)
if not record_:
# 不存在则调用接口抓取
result = tyc_api(company)
if result.status_code == 200:
record = DB_TYC.find_single_data(
'公司背景',
'基本信息',
{"企业名称": company},
['企业名称', '基本信息']
)
records_dict[record['企业名称']] = record['基本信息']
else:
records_dict[record_['企业名称']] = record_['基本信息']
elif sheet.name == '行业分布':
items = ExcelSheetParser(sheet=sheet).parse_sheet5()
def history_data():
"""历史数据"""
# 客户信息
for client in client_info:
client['在保余额'] = None if isinstance(client['在保余额'], str) else client['在保余额']
customer_info = CustomerInfo()
customer_info.cid = new_cid
customer_info.company_name = basic_business_info.company_name
customer_info.set_instance(data=client)
customer_info_list.append(customer_info)
# 区域分布
for area in area_info:
region_distribution = RegionalDistribution()
region_distribution.cid = new_cid
region_distribution.company_name = basic_business_info.company_name
region_distribution.set_instance(data=area)
region_distribution_list.append(region_distribution)
# 行业分布
for industry in industry_info:
industry_distribution = IndustryDistribution()
industry_distribution.cid = new_cid
industry_distribution.company_name = basic_business_info.company_name
industry_distribution.set_instance(data=industry)
industry_distribution_list.append(industry_distribution)
# 担保金额分布
for amount in guarantee_amount:
guarantee_distribution = GuaranteedBalanceDistribution()
guarantee_distribution.cid = new_cid
guarantee_distribution.company_name = basic_business_info.company_name
guarantee_distribution.report_period = amount["报告期"]
guarantee_distribution.guarantee_account = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=amount["担保户数"]
)
guarantee_distribution.guarantee_number = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=amount["担保笔数"]
)
guarantee_distribution.guarantee_liability_balance = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=amount["担保责任余额"]
)
guarantee_distribution_list.append(guarantee_distribution)
for item in items:
industry_distribution = IndustryDistribution()
industry_distribution.cid = new_cid
industry_distribution.company_name = basic_business_info.company_name
industry_distribution.set_instance(data=item)
industry_distribution_list.append(industry_distribution)
def guarantee_data():
"""担保信息"""
for info in client_info:
guarantee_info = GuaranteeInfo()
guarantee_info.cid = new_cid
guarantee_info.guarantee_id = CompanyUtils.make_new_cid()
guarantee_info.warrantee = info['客户名称']
guarantee_info.guarantee_type = '融资担保'
guarantee_info.guarantee_method = '银行借款担保'
guarantee_info.guarantee_balance = None if isinstance(info['在保余额'], str) else info['在保余额']
guarantee_info.release_amount = 0
guarantee_info.liability_guarantee_balance = info['融资担保责任余额']
guarantee_info.guarantee_start_date = info['担保起始日']
guarantee_info.guarantee_end_date = info['担保截止日']
guarantee_info.remark = None
try:
guarantee_info.project_name = records_dict[info['客户名称']]['alias'] + (
info['担保起始日'].replace('-', ''))
guarantee_info.industry = records_dict[info['客户名称']]['industry']
if '公司' in info['客户名称']:
loc_list = list()
loc_list.append(info['客户名称'])
province = records_dict[info['客户名称']]['base']
if province == '':
guarantee_info.province = None
else:
guarantee_info.province = [v for k, v in p_map.items() if k == province][0]
guarantee_info.city = records_dict[info['客户名称']]['city']
else:
guarantee_info.province = data['所在省份']
guarantee_info.city = data['所在城市']
except KeyError:
guarantee_info.project_name = info['客户名称'] + (
info['担保起始日'].replace('-', ''))
elif sheet.name == '担保金额分布':
items = ExcelSheetParser(sheet=sheet).parse_sheet6()
if '公司' in info['客户名称']:
loc_list = list()
loc_list.append(info['客户名称'])
df = cpca.transform(loc_list)
guarantee_info.province = df.loc[0, '']
guarantee_info.city = df.loc[0, '']
else:
guarantee_info.province = data['所在省份']
guarantee_info.city = data['所在城市']
guarantee_info_list.append(guarantee_info)
for item in items:
guarantee_distribution = GuaranteedBalanceDistribution()
guarantee_distribution.cid = new_cid
guarantee_distribution.company_name = basic_business_info.company_name
guarantee_distribution.report_period = item["报告期"]
guarantee_distribution.guarantee_account = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=item["担保户数"]
)
guarantee_distribution.guarantee_number = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=item["担保笔数"]
)
guarantee_distribution.guarantee_liability_balance = SpecObject.set_specify_instance(
instance=GuaranteedBalanceDistribution.GuaranteeBalance,
data=item["担保责任余额"]
)
guarantee_distribution_list.append(guarantee_distribution)
def __main__():
tyc_basic_info()
history_data()
guarantee_data()
__main__()
elif sheet.name == '资产负债表':
items_ = ExcelSheetParser(sheet=work_book.sheets()[2]).parse_sheet3()
items = ExcelSheetParser(sheet=sheet).parse_sheet7()
# 数据源
balance = ExcelSheetParser(sheet=work_book.sheet_by_name('资产负债表')).parse_sheet7()
income = ExcelSheetParser(sheet=work_book.sheet_by_name('利润表')).parse_sheet8()
appendix = ExcelSheetParser(sheet=work_book.sheet_by_name('补充数据表')).parse_sheet3()
length = len(items)
length = len(balance)
balance_sheet = FinancialData().BalanceSheet()
current_assets_dict = [item for item in balance_sheet.CurrentAssets.fields_map.values()]
non_current_assets_dict = [item for item in balance_sheet.NonCurrentAssets.fields_map.values()]
current_liabilities_dict = [item for item in balance_sheet.CurrentLiabilities.fields_map.values()]
non_current_liabilities_dict = [item for item in balance_sheet.NonCurrentLiabilities.fields_map.values()]
owner_equity_dict = [item for item in balance_sheet.OwnerEquity.fields_map.values()]
def balance_script(sheet_data):
"""资产负债表"""
# 各类对象key
bs_obj = financial_data.BalanceSheet()
current_assets_dict = [_item for _item in bs_obj.CurrentAssets.fields_map.values()]
non_current_assets_dict = [_item for _item in bs_obj.NonCurrentAssets.fields_map.values()]
current_liabilities_dict = [_item for _item in bs_obj.CurrentLiabilities.fields_map.values()]
non_current_liabilities_dict = [_item for _item in bs_obj.NonCurrentLiabilities.fields_map.values()]
owner_equity_dict = [_item for _item in bs_obj.OwnerEquity.fields_map.values()]
for num in range(length):
year = items[num]['年度']
# 资产负债表分组
current_assets_dict_ = dict()
non_current_assets_dict_ = dict()
current_liabilities_dict_ = dict()
non_current_liabilities_dict_ = dict()
owner_equity_dict_ = dict()
balance = items_[num]['资产负债表']
for k, v in balance.items():
if v:
items[num][k] = v
for key, value in items[num].items():
for key, value in sheet_data.items():
if key in current_assets_dict:
current_assets_dict_[key] = value
elif key in non_current_assets_dict:
@ -153,54 +386,58 @@ class BasicBusinessInfoImpl(object):
elif key in owner_equity_dict:
owner_equity_dict_[key] = value
balance_sheet.current_assets = SpecObject.set_specify_instance(
instance=balance_sheet.CurrentAssets,
bs_obj.current_assets = SpecObject.set_specify_instance(
instance=bs_obj.CurrentAssets,
data=current_assets_dict_
)
balance_sheet.non_current_assets = SpecObject.set_specify_instance(
instance=balance_sheet.NonCurrentAssets,
bs_obj.non_current_assets = SpecObject.set_specify_instance(
instance=bs_obj.NonCurrentAssets,
data=non_current_assets_dict_
)
balance_sheet.current_liabilities = SpecObject.set_specify_instance(
instance=balance_sheet.CurrentLiabilities,
bs_obj.total_assets = sheet_data['资产总计']
bs_obj.current_liabilities = SpecObject.set_specify_instance(
instance=bs_obj.CurrentLiabilities,
data=current_liabilities_dict_
)
balance_sheet.non_current_liabilities = SpecObject.set_specify_instance(
instance=balance_sheet.NonCurrentLiabilities,
bs_obj.non_current_liabilities = SpecObject.set_specify_instance(
instance=bs_obj.NonCurrentLiabilities,
data=non_current_liabilities_dict_
)
balance_sheet.owner_equity = SpecObject.set_specify_instance(
instance=balance_sheet.OwnerEquity,
bs_obj.total_liabilities = sheet_data['负债合计']
bs_obj.owner_equity = SpecObject.set_specify_instance(
instance=bs_obj.OwnerEquity,
data=owner_equity_dict_
)
return bs_obj
for financial in financial_data_list:
if year == financial.report_period:
financial.balance_sheet = balance_sheet
def income_script(sheet_data):
"""利润表"""
sheet_data.pop('报告期')
ic_obj = SpecObject.set_specify_instance(
instance=financial_data.IncomeSheet,
data=sheet_data
)
return ic_obj
elif sheet.name == '利润表':
items_ = ExcelSheetParser(sheet=work_book.sheets()[2]).parse_sheet3()
items = ExcelSheetParser(sheet=sheet).parse_sheet8()
length = len(items)
def appendix_script(sheet_data):
"""补充数据表"""
sheet_data.pop('报告期')
ap_obj = SpecObject.set_specify_instance(
instance=financial_data.AppendixSheet,
data=sheet_data
)
return ap_obj
for num in range(length):
year = items[num]['年度']
items[num].pop('年度')
income = items_[num]['利润表']
income['营业总收入'] = income['营业收入']
income.pop('营业收入')
for k, v in income.items():
if v:
items[num][k] = v
income_sheet = SpecObject.set_specify_instance(
instance=FinancialData.IncomeSheet,
data=items[num]
)
for financial in financial_data_list:
if year == financial.report_period:
financial.income_sheet = income_sheet
# 财务数据obj
financial_data = FinancialData()
financial_data.cid = new_cid
financial_data.company_name = basic_business_info.company_name
financial_data.report_period = balance[num]['报告期']
financial_data.balance_sheet = balance_script(balance[num])
financial_data.income_sheet = income_script(income[num])
financial_data.appendix_sheet = appendix_script(appendix[num])
financial_data_list.append(financial_data)
DB_GUA.insert_single_data(
"企业数据",
@ -214,26 +451,68 @@ class BasicBusinessInfoImpl(object):
[item.fields_toggle() for item in financial_data_list]
)
DB_GUA.insert_many_data(
"企业数据",
"客户信息",
[item.fields_toggle() for item in customer_info_list]
)
if guarantee_info_list:
DB_GUA.insert_many_data(
"企业数据",
"担保数据",
[item.fields_toggle() for item in guarantee_info_list]
)
DB_GUA.insert_many_data(
"企业数据",
"担保区域分布",
[item.fields_toggle() for item in region_distribution_list]
)
if customer_info_list:
DB_GUA.insert_many_data(
"历史数据",
"客户信息",
[item.fields_toggle() for item in customer_info_list]
)
DB_GUA.insert_many_data(
"企业数据",
"担保行业分布",
[item.fields_toggle() for item in industry_distribution_list]
)
if region_distribution_list:
DB_GUA.insert_many_data(
"历史数据",
"担保区域分布",
[item.fields_toggle() for item in region_distribution_list]
)
DB_GUA.insert_many_data(
"企业数据",
"担保金额分布",
[item.fields_toggle() for item in guarantee_distribution_list]
)
if industry_distribution_list:
DB_GUA.insert_many_data(
"历史数据",
"担保行业分布",
[item.fields_toggle() for item in industry_distribution_list]
)
if guarantee_distribution_list:
DB_GUA.insert_many_data(
"历史数据",
"担保金额分布",
[item.fields_toggle() for item in guarantee_distribution_list]
)
if __name__ == '__main__':
for root_dir, sub_dir, files in os.walk(r"E:\Project\text"):
for file in files:
if file.endswith(".xlsx"):
company_name = re.sub(r'[0-9]+.', '', file)
company_name = company_name.replace('.xlsx', '')
record_name = DB_GUA.find_single_data(
'企业数据',
'工商信息',
{'企业名称': company_name},
['企业ID']
)
short_name = DB_GUA.find_single_data(
'企业数据',
'工商信息',
{'企业简称': company_name},
['企业ID']
)
if record_name or short_name:
print('数据已存在', file)
continue
else:
file_name = os.path.join(root_dir, file)
impl = BasicBusinessInfoImpl()
impl.parse_excel_and_create_company(file_name)
print('储存完成', file)
else:
continue

View File

@ -9,9 +9,9 @@ class BasicBusinessInfo(SpecObject):
"""股东信息"""
name = ValidateAttr(field="name", type=str)
ratio = ValidateAttr(field="ratio", type=str)
nature = ValidateAttr(field="nature", type=str)
quantity = ValidateAttr(field="quantity", type=int, default=None)
ratio = ValidateAttr(field="ratio", type=[str, int, float], default=None)
nature = ValidateAttr(field="nature", type=str, default=None)
quantity = ValidateAttr(field="quantity", type=[int, float, str], default=None)
fields_map = {
"name": "股东名称",
@ -24,7 +24,7 @@ class BasicBusinessInfo(SpecObject):
"""高管信息"""
name = ValidateAttr(field="name", type=str)
position = ValidateAttr(field="position", type=str)
position = ValidateAttr(field="position", type=str, default=None)
education = ValidateAttr(field="education", type=str, default=None)
birth_data = ValidateAttr(field="birth_data", type=str, default=None)
political_status = ValidateAttr(field="political_status", type=str, default=None)
@ -39,35 +39,50 @@ class BasicBusinessInfo(SpecObject):
"industry_experience": "行业经验"
}
class EmployeeEducation(SpecObject):
"""员工学历"""
college_degree = ValidateAttr(field='college_degree', type=int)
bachelor_degree = ValidateAttr(field='bachelor_degree', type=int)
postgraduate_degree = ValidateAttr(field='postgraduate_degree', type=int)
fields_map = {
"college_degree": "大专及以下",
"bachelor_degree": "本科",
"postgraduate_degree": "研究生"
}
cid = ValidateAttr(field="cid", type=str)
company_name = ValidateAttr(field="company_name", type=str)
enterprise_abbreviation = ValidateAttr(field="enterprise_abbreviation", type=str)
type_of_enterprise = ValidateAttr(field="type_of_enterprise", type=str)
warranty_type = ValidateAttr(field="warranty_type", type=str)
unified_social_credit_code = ValidateAttr(field="unified_social_credit_code", type=str)
organization_code = ValidateAttr(field="organization_code", type=str)
date_of_establishment = ValidateAttr(field="date_of_establishment", type=str)
registered_capital = ValidateAttr(field="registered_capital", type=[float, int])
paid_in_capital = ValidateAttr(field="paid_in_capital", type=[float, int])
business_scope = ValidateAttr(field="business_scope", type=str)
province = ValidateAttr(field="province", type=str)
city = ValidateAttr(field="city", type=str)
registered_address = ValidateAttr(field="registered_address", type=str)
contact_address = ValidateAttr(field="contact_address", type=str)
e_mail = ValidateAttr(field="e_mail", type=str)
enterprise_abbreviation = ValidateAttr(field="enterprise_abbreviation", type=str, default=None)
type_of_enterprise = ValidateAttr(field="type_of_enterprise", type=str, default=None)
nature_of_enterprise = ValidateAttr(field="nature_of_enterprise", type=str, default=None)
warranty_type = ValidateAttr(field="warranty_type", type=str, default=None)
unified_social_credit_code = ValidateAttr(field="unified_social_credit_code", type=str, default=None)
organization_code = ValidateAttr(field="organization_code", type=str, default=None)
date_of_establishment = ValidateAttr(field="date_of_establishment", type=str, default=None)
registered_capital = ValidateAttr(field="registered_capital", type=[float, int], default=None)
paid_in_capital = ValidateAttr(field="paid_in_capital", type=[float, int], default=None)
business_scope = ValidateAttr(field="business_scope", type=str, default=None)
province = ValidateAttr(field="province", type=str, default=None)
city = ValidateAttr(field="city", type=str, default=None)
registered_address = ValidateAttr(field="registered_address", type=str, default=None)
contact_address = ValidateAttr(field="contact_address", type=str, default=None)
e_mail = ValidateAttr(field="e_mail", type=str, default=None)
contact_number = ValidateAttr(field="contact_number", type=str, default=None)
number_of_participants = ValidateAttr(field="number_of_participants", type=int)
number_of_participants = ValidateAttr(field="number_of_participants", type=int, default=None)
company_positioning = ValidateAttr(field="company_positioning", type=str, default=None)
legal_representative = ValidateAttr(field="legal_representative", type=str)
the_actual_controller = ValidateAttr(field="the_actual_controller", type=str)
legal_representative = ValidateAttr(field="legal_representative", type=str, default=None)
the_actual_controller = ValidateAttr(field="the_actual_controller", type=str, default=None)
shareholder_information = ValidateAttr(field="shareholder_information", instance_list=ShareholderInformation)
executive_information = ValidateAttr(field="executive_information", instance_list=ExecutiveInformation)
employee_education = ValidateAttr(field="employee_education", type=EmployeeEducation)
fields_map = {
"cid": "企业ID",
"company_name": "企业名称",
"enterprise_abbreviation": "企业简称",
"type_of_enterprise": "企业类型",
"nature_of_enterprise": "企业性质",
"warranty_type": "担保类型",
"unified_social_credit_code": "统一社会信用代码",
"organization_code": "组织代码",
@ -86,7 +101,8 @@ class BasicBusinessInfo(SpecObject):
"legal_representative": "法定代表人",
"the_actual_controller": "实际控制人",
"shareholder_information": "股东信息",
"executive_information": "高管信息"
"executive_information": "高管信息",
"employee_education": "员工学历",
}
@ -182,7 +198,6 @@ class FinancialData(SpecObject):
deferred_tax_assets = ValidateAttr(field="deferred_tax_assets", type=[float, int], default=None),
other_non_current_assets = ValidateAttr(field="other_non_current_assets", type=[float, int], default=None),
total_non_current_assets = ValidateAttr(field="total_non_current_assets", type=[float, int], default=None),
total_assets = ValidateAttr(field="total_assets", type=[float, int], default=None)
fields_map = {
"available_for_sale_financial_assets": "可供出售金融资产",
@ -201,8 +216,7 @@ class FinancialData(SpecObject):
"long_term_prepaid_expenses": "长期待摊费用",
"deferred_tax_assets": "递延所得税资产",
"other_non_current_assets": "其他非流动资产",
"total_non_current_assets": "非流动资产合计",
"total_assets": "资产总计"
"total_non_current_assets": "非流动资产合计"
}
class CurrentLiabilities(SpecObject):
@ -256,8 +270,7 @@ class FinancialData(SpecObject):
deferred_income = ValidateAttr(field="deferred_income", type=[float, int], default=None),
deferred_tax_liabilities = ValidateAttr(field="deferred_tax_liabilities", type=[float, int], default=None),
other_non_current_liabilities = ValidateAttr(field="other_non_current_liabilities", type=[float, int], default=None),
total_non_current_liabilities = ValidateAttr(field="total_non_current_liabilities", type=[float, int], default=None),
total_liabilities = ValidateAttr(field="total_liabilities", type=[float, int], default=None)
total_non_current_liabilities = ValidateAttr(field="total_non_current_liabilities", type=[float, int], default=None)
fields_map = {
"long_term_loan": "长期借款",
@ -272,7 +285,6 @@ class FinancialData(SpecObject):
"deferred_tax_liabilities": "递延所得税负债",
"other_non_current_liabilities": "其他非流动负债",
"total_non_current_liabilities": "非流动负债合计",
"total_liabilities": "负债合计"
}
class OwnerEquity(SpecObject):
@ -303,15 +315,19 @@ class FinancialData(SpecObject):
current_assets = ValidateAttr(field="current_assets", type=CurrentAssets)
non_current_assets = ValidateAttr(field="non_current_assets", type=NonCurrentAssets)
total_assets = ValidateAttr(field="total_assets", type=[float, int], default=None),
current_liabilities = ValidateAttr(field="current_liabilities", type=CurrentLiabilities)
non_current_liabilities = ValidateAttr(field="non_current_liabilities", type=NonCurrentLiabilities)
total_liabilities = ValidateAttr(field="total_liabilities", type=[float, int], default=None),
owner_equity = ValidateAttr(field="owner_equity", type=OwnerEquity)
fields_map = {
"current_assets": "流动资产",
"non_current_assets": "非流动资产",
"total_assets": "资产总计",
"current_liabilities": "流动负债",
"non_current_liabilities": "非流动负债",
"total_liabilities": "负债合计",
"owner_equity": "所有者权益"
}
@ -688,3 +704,57 @@ class RatingRecord(SpecObject):
"rating_date": "评级日期",
"rating_report": "评级报告"
}
class GuaranteeInfo(SpecObject):
"""担保信息"""
cid = ValidateAttr(field="cid", type=str)
guarantee_id = ValidateAttr(field="guarantee_id", type=str)
warrantee = ValidateAttr(field="warrantee", type=str)
guarantee_type = ValidateAttr(field="guarantee_type", type=str)
project_name = ValidateAttr(field="project_name", type=str)
guarantee_method = ValidateAttr(field="guarantee_method", type=str)
guarantee_balance = ValidateAttr(field="guarantee_balance", type=[float, int], default=None)
release_amount = ValidateAttr(field="release_amount", type=[float, int], default=None)
liability_guarantee_balance = ValidateAttr(field="liability_guarantee_balance", type=[float, int], default=None)
guarantee_start_date = ValidateAttr(field="guarantee_start_date", func=Validate.date_format)
guarantee_end_date = ValidateAttr(field="guarantee_end_date", func=Validate.date_format)
province = ValidateAttr(field="province", type=str, default=None)
city = ValidateAttr(field="city", type=str, default=None)
industry = ValidateAttr(field="industry", type=str, default=None)
remark = ValidateAttr(field="remark", type=str, default=None)
fields_map = {
"cid": "企业ID",
"guarantee_id": "担保ID",
"warrantee": "被担保人",
"guarantee_type": "担保类型",
"project_name": "项目名称",
"guarantee_method": "担保方式",
"guarantee_balance": "担保余额(万元)",
"release_amount": "解除金额(万元)",
"liability_guarantee_balance": "责任担保余额(万元)",
"guarantee_start_date": "担保开始日期",
"guarantee_end_date": "担保结束日期",
"province": "省份",
"city": "地级市",
"industry": "行业",
"remark": "备注"
}
class CompensationInfo(SpecObject):
"""代偿信息"""
compensation_id = ValidateAttr(field="compensation_id", type=str)
project_name = ValidateAttr(field="project_name", type=str)
compensation_amount = ValidateAttr(field="project_amount", type=[int,float])
compensation_recovery_amount = ValidateAttr(field="project_recovery_amount", type=[int,float])
compensation_date = ValidateAttr(field="compensation_date", func=Validate.date_format)
fields_map = {
"compensation_id": "担保ID",
"project_name": "项目名称",
"compensation_amount": "代偿金额(万元)",
"compensation_recovery_amount": "代偿回收金额(万元)",
"compensation_date": "代偿日期"
}

View File

@ -12,7 +12,6 @@ company_route = Blueprint('company', __name__)
def create_route(**kwargs):
"""新增公司"""
try:
company_name = request.form['company_name']
file = request.files['file']
impl = BasicBusinessInfoImpl()
impl.parse_excel_and_create_company(file)

View File

@ -32,9 +32,12 @@ class ExcelParserUtil(object):
if type(days) == str: # 特殊情况:比如某些高管信息出生日期只有年份,则返回年份
return days
else:
delta = timedelta(days=days) # 将1899-12-30转化为可以计算的时间格式并加上要转化的日期戳
_date = datetime.strptime('1899/12/30', '%Y/%m/%d') + delta
return datetime.strftime(_date, '%Y年%m月%d')
try:
delta = timedelta(days=days) # 将1899-12-30转化为可以计算的时间格式并加上要转化的日期戳
_date = datetime.strptime('1899/12/30', '%Y/%m/%d') + delta
return datetime.strftime(_date, '%Y年%m月%d')
except TypeError:
return '-'
# 转换excel日期
@staticmethod
@ -123,24 +126,15 @@ class ExcelSheetParser(object):
return_data2.append(data)
return return_data1, return_data2
# 解析经营情况
# 解析补充数据表
def parse_sheet3(self):
cols_tag = self._sheet.col_values(1)[1:]
cols_tag = self._sheet.col_values(0)
return_data = []
for i in range(2, 6):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
for i in range(1, 5):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i))
list_1 = ExcelParserUtil.list_decimal(list_0)
# 解析返回数据
parsed_data = dict()
# 报告期
parsed_data["报告期"] = list_1[0]
# 资产负债表
parsed_data["资产负债表"] = json.loads(json.dumps(dict(zip(cols_tag[43:67], list_1[43:67]))))
# 利润表
parsed_data["利润表"] = json.loads(json.dumps(dict(zip(cols_tag[28:43], list_1[28:43]))))
# 补充数据表
parsed_data["补充数据表"] = json.loads(json.dumps(dict(zip(cols_tag[1:28], list_1[1:28]))))
return_data.append(parsed_data)
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
return_data.append(data)
return return_data
# 解析客户信息
@ -191,10 +185,10 @@ class ExcelSheetParser(object):
# 解析资产负债表
def parse_sheet7(self):
cols_tag = self._sheet.col_values(1)[1:]
cols_tag = self._sheet.col_values(0)
return_data = []
for i in range(2, 6):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
for i in range(1, 5):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i))
list_1 = ExcelParserUtil.list_decimal(list_0)
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
data.pop('流动资产:')
@ -207,10 +201,10 @@ class ExcelSheetParser(object):
# 解析利润表
def parse_sheet8(self):
cols_tag = list(map(lambda x: x.strip().replace('减:', '').replace('加:', ''), self._sheet.col_values(1)[1:]))
cols_tag = list(map(lambda x: x.strip().replace('减:', '').replace('加:', ''), self._sheet.col_values(0)))
return_data = []
for i in range(2, 6):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i)[1:])
for i in range(1, 5):
list_0 = ExcelParserUtil.list_to_none(self._sheet.col_values(i))
list_1 = ExcelParserUtil.list_decimal(list_0)
data = json.loads(json.dumps(dict(zip(cols_tag, list_1))))
return_data.append(data)

View File

@ -0,0 +1,36 @@
p_map = {
"heb": "河北省",
"sx": "山西省",
"ln": "辽宁省",
"jl": "吉林省",
"hlj": "黑龙江省",
"js": "江苏省",
"zj": "浙江省",
"ah": "安徽省",
"fj": "福建省",
"jx": "江西省",
"sd": "山东省",
"hen": "河南省",
"hub": "湖北省",
"hun": "湖南省",
"gd": "广东省",
"han": "海南省",
"sc": "四川省",
"gz": "贵州省",
"yn": "云南省",
"snx": "陕西省",
"gs": "甘肃省",
"qh": "青海省",
"tw": "台湾省",
"nmg": "内蒙古自治区",
"gx": "广西壮族自治区",
"xz": "西藏自治区",
"nx": "宁夏回族自治区",
"xj": "新疆维吾尔自治区",
"bj": "北京市",
"tj": "天津市",
"sh": "上海市",
"cq": "重庆市",
"xg": "香港特别行政区",
"am": "澳门特别行政区"
}

View File

@ -1,6 +1,6 @@
markupsafe==2.0.1
itsdangerous==2.0.1
flask
flask~=2.1.2
flask_cors
gunicorn
gevent
@ -8,4 +8,6 @@ pymongo~=3.11.0
requests~=2.25.1
pandas~=1.3.5
pycryptodome
xlrd~=1.2.0
xlrd~=1.2.0
Werkzeug~=2.1.2
cpca~=0.5.5