工商信息 股东信息 数据清洗

This commit is contained in:
王思川 2022-04-11 04:57:20 +08:00
parent 409e51765f
commit 9f0bd4e463
6 changed files with 247 additions and 28 deletions

View File

@ -0,0 +1,107 @@
import json
import requests
from CompanyData.CompanyDataObj import CompanyData, BasicInfo, ShareHolder
from DBHelper.MongoHelper import MongoHelper
from Utils.ErrorUtil import LogConditionCheckFailed
class CompanyDataImpl(CompanyData):
"""企业数据实现类"""
db_tyc = MongoHelper("tyc")
db_tfse = MongoHelper("tfse_v0.21")
def drag_data_from_tyc_api(self):
"""拉取天眼查接口数据"""
url = "http://api.fecribd.com/api/tyc/drag_data"
headers = {'token': "uzdq51N4!I0%HY4sCaQ!aeCSIDIVIdAM"}
data = {"企业名称": self.name}
res = requests.post(url=url, headers=headers, data=json.dumps(data))
return True if res.status_code == 200 else False
def prepare_company_init_data(self):
""""""
def prepare_business_data():
""""""
data = self.db_tyc.find_single_column(
"公司背景",
"基本信息",
{"企业名称": self.name},
"基本信息"
)
basic_info = BasicInfo()
basic_info.status = data['regStatus']
basic_info.legal_person = data['legalPersonName']
basic_info.company_type = data['companyOrgType']
basic_info.taxpayer_id = data['taxNumber']
basic_info.business_scope = data['businessScope']
basic_info.registered_capital = data['regCapital']
basic_info.paid_capital = data['actualCapital']
basic_info.registered_address = data['regLocation']
basic_info.registration_authority = data['regInstitute']
basic_info.industry = data['industry']
basic_info.staff_size = data['staffNumRange']
basic_info.people_insured_num = data['socialStaffNum']
basic_info.micro_company = "" if data['regStatus'] == 1 else ""
self.basic_info = basic_info
self.db_tfse.upsert_single_data(
"企业数据",
"企业数据_更新汇总",
{"企业ID": self.cid},
self.dict_to_save()
)
def prepare_share_holders():
""""""
self.share_holder = list()
data = self.db_tyc.find_single_column(
"公司背景",
"企业股东",
{"企业名称": self.name},
"企业股东"
)
try:
results = data['result']
except KeyError:
raise LogConditionCheckFailed("企业股东数据异常", 200)
for result in results:
share_holder = ShareHolder()
share_holder.name = result['name']
share_holder.share_holder_type = "公司" if result['type'] == 1 else ("个人" if result['type'] == 2 else "其他")
if result['capital']:
share_holder.share_holding_ratio = None if result['capital'][0]['percent'] == '' else result['capital'][0]['percent']
share_holder.subscription_amount = None if result['capital'][0]['amomon'] == '' else result['capital'][0]['amomon']
share_holder.subscription_date = None if result['capital'][0]['time'] == '' else result['capital'][0]['time']
if result['capitalActl']:
share_holder.paid_amount = [capital_actl['amomon'] for capital_actl in result['capitalActl']]
share_holder.payment_method = [capital_actl['paymet'] for capital_actl in result['capitalActl']]
share_holder.payment_time = [capital_actl['time'] for capital_actl in result['capitalActl']]
self.share_holder.append(share_holder)
def __main__():
# prepare_business_data()
prepare_share_holders()
__main__()
if __name__ == '__main__':
impl = CompanyDataImpl()
impl.cid = "qqqqqqqq"
impl.name = "远光软件股份有限公司"
impl.prepare_company_init_data()
print(impl.dict_to_save())

View File

@ -41,12 +41,12 @@ class ShareHolder(SpecObject):
name = ValidateAttr(field='name', type=str) name = ValidateAttr(field='name', type=str)
share_holder_type = ValidateAttr(field='share_holder_type', type=str) share_holder_type = ValidateAttr(field='share_holder_type', type=str)
share_holding_ratio = ValidateAttr(field="share_holding_ratio", type=str) share_holding_ratio = ValidateAttr(field="share_holding_ratio", type=str, default=None)
subscription_amount = ValidateAttr(field="subscription_amount", type=str) subscription_amount = ValidateAttr(field="subscription_amount", type=str, default=None)
subscription_date = ValidateAttr(field="subscription_date", type=str) subscription_date = ValidateAttr(field="subscription_date", type=str, default=None)
paid_amount = ValidateAttr(field="paid_amount", type=list) paid_amount = ValidateAttr(field="paid_amount", type=list, default=[])
payment_method = ValidateAttr(field="payment_method", type=list) payment_method = ValidateAttr(field="payment_method", type=list, default=[])
payment_time = ValidateAttr(field="payment_time", type=list) payment_time = ValidateAttr(field="payment_time", type=list, default=[])
fields_map = { fields_map = {
"name": "股东", "name": "股东",
@ -60,6 +60,102 @@ class ShareHolder(SpecObject):
} }
class MainMembers(SpecObject):
"""主要成员"""
name = ValidateAttr(field="name", type=str)
job_title = ValidateAttr(field="job_title", type=list)
fields_map = {
"name": "姓名",
"job_title": "职务"
}
class BalanceSheet(SpecObject):
"""资产负债表"""
accounts_receivable = ValidateAttr(field='accounts_receivable', type=float)
stock = ValidateAttr(field='stock', type=float)
total_current_assets = ValidateAttr(field='total_current_assets', type=float)
total_assets = ValidateAttr(field='total_assets', type=float)
short_loan = ValidateAttr(field='short_loan', type=float)
one_year_liabilities = ValidateAttr(field='one_year_liabilities', type=float)
total_current_liabilities = ValidateAttr(field='total_current_liabilities', type=float)
long_term_loan = ValidateAttr(field='long_term_loan', type=float)
total_liabilities = ValidateAttr(field='total_liabilities', type=float)
total_owners_equity = ValidateAttr(field='total_owners_equity', type=float)
fields_map = {
'accounts_receivable': '应收账款',
'stock': '存货',
'total_current_assets': '流动资产合计',
'total_assets': '资产总计',
'short_loan': '短期借款',
'one_year_liabilities': '一年内到期非流动负债',
'total_current_liabilities': '流动负债合计',
'long_term_loan': '长期借款',
'total_liabilities': '负债合计',
'total_owners_equity': '所有者权益合计'
}
class ProfitSheet(SpecObject):
"""利润表"""
operating_income = ValidateAttr(field='operating_income', type=float)
operating_cost = ValidateAttr(field='operating_cost', type=float)
total_profit = ValidateAttr(field='total_profit', type=float)
net_profit = ValidateAttr(field='net_profit', type=float)
fields_map = {
'operating_income': '营业收入',
'operating_cost': '营业成本',
'total_profit': '利润总额',
'net_profit': '净利润'
}
class AppendixDataSheet(SpecObject):
"""补充数据表"""
rd_expenses = ValidateAttr(field='rd_expenses', type=float)
interest_disbursement = ValidateAttr(field='interest_disbursement', type=float)
interest_expense = ValidateAttr(field='interest_expense', type=float)
fields_map = {
'rd_expenses': '研发费用异常',
'interest_disbursement': '计入财务费的利息支出异常',
'interest_expense': '资本化利息支出异常'
}
class FinancialIndex(SpecObject):
"""财务指标"""
roe = ValidateAttr(field="roe", type=float)
inventory_turnover = ValidateAttr(field="inventory_turnover", type=float)
interest_multiple = ValidateAttr(field="interest_multiple", type=float)
accounts_receivable_turnover = ValidateAttr(field="accounts_receivable_turnover", type=float)
total_asset_turnover = ValidateAttr(field="total_asset_turnover", type=float)
total_asset_growth_rate = ValidateAttr(field="total_asset_growth_rate", type=float)
roa = ValidateAttr(field="roa", type=float)
technology_investment_ratio = ValidateAttr(field="technology_investment_ratio", type=float)
operating_growth_rate = ValidateAttr(field="operating_growth_rate", type=float)
assets_and_liabilities = ValidateAttr(field="assets_and_liabilities", type=float)
quick_ratio = ValidateAttr(field="quick_ratio", type=float)
fields_map = {
"roe": "净资产收益率",
"inventory_turnover": "存货周转率",
"interest_multiple": "已获利息倍数",
"accounts_receivable_turnover": "应收账款周转率",
"total_asset_turnover": "总资产周转率",
"total_asset_growth_rate": "总资产增长率",
"roa": "总资产报酬率",
"technology_investment_ratio": "技术投入比率",
"operating_growth_rate": "营业增长率",
"assets_and_liabilities": "资产负债率",
"quick_ratio": "速动比率"
}
class CompanyData(SpecObject): class CompanyData(SpecObject):
"""企业数据""" """企业数据"""
@ -69,15 +165,15 @@ class CompanyData(SpecObject):
industry_l2 = ValidateAttr(field='industry_l2', type=str) industry_l2 = ValidateAttr(field='industry_l2', type=str)
basic_info = ValidateAttr(field='basic_info', type=BasicInfo) basic_info = ValidateAttr(field='basic_info', type=BasicInfo)
share_holder = ValidateAttr(field='share_holders', instance_list=ShareHolder) share_holder = ValidateAttr(field='share_holders', instance_list=ShareHolder)
main_members = [] main_members = ValidateAttr(field="main_members", type=MainMembers)
balance_sheet = {} balance_sheet = ValidateAttr(field='balance_sheet', type=BalanceSheet)
profit_sheet = {} profit_sheet = ValidateAttr(field='profit_sheet', type=ProfitSheet)
income_sheet = {} # income_sheet = {}
appendix_sheet = {} appendix_sheet = ValidateAttr(field='appendix_sheet', type=AppendixDataSheet)
fin_index = {} fin_index = ValidateAttr(field='fin_index', type=FinancialIndex)
cc_rating_result = {} cc_rating_result = ValidateAttr(field='cc_rating_result', type=dict)
esg_rating_result = {} esg_rating_result = ValidateAttr(field='esg_rating_result', type=dict)
update_time = {} update_time = ValidateAttr(field='update_time', type=dict)
fields_map = { fields_map = {
"cid": "企业ID", "cid": "企业ID",
@ -97,13 +193,8 @@ class CompanyData(SpecObject):
"update_time": "更新时间" "update_time": "更新时间"
} }
def drag_data_from_tyc_api(self):
"""拉取天眼查接口数据"""
if __name__ == '__main__': def prepare_company_init_data(self):
company_data = CompanyData() """准备企业初始化数据"""
basic_info = BasicInfo()
share_holder = ShareHolder()
basic_info.status = "存续"
share_holder.name = '123'
company_data.basic_info = basic_info
company_data.share_holders = [share_holder, share_holder]
print(company_data.dict_to_save())

View File

@ -1,6 +1,7 @@
{ {
"MongoDB": { "MongoDB": {
"tfse_v0.21": "root:UTlC9cCoglD1cI1*@116.63.130.34:27021" "tfse_v0.21": "root:UTlC9cCoglD1cI1*@116.63.130.34:27021",
"tyc": "root:gP@DwMSVd5Sh6EiH@116.63.130.34:27019"
}, },
"Mysql": { "Mysql": {

View File

@ -1,8 +1,11 @@
import re
import os import os
import json import json
import gridfs import gridfs
import pymongo import pymongo
from urllib import parse
from bson import ObjectId from bson import ObjectId
from gridfs import GridFS from gridfs import GridFS
@ -18,7 +21,9 @@ class MongoHelper:
with open(os.path.abspath(os.path.dirname(__file__) + '/DBConfig.json')) as f: with open(os.path.abspath(os.path.dirname(__file__) + '/DBConfig.json')) as f:
db_configs = json.load(f) db_configs = json.load(f)
this_mongo_cfg = db_configs['MongoDB'][param] this_mongo_cfg = db_configs['MongoDB'][param]
self.client = pymongo.MongoClient('mongodb://{}'.format(this_mongo_cfg)) m = re.match('([\s\S].*?):([\s\S].*)@([\s\S].*)', this_mongo_cfg)
parsed_mongo_config = "{}:{}@{}".format(parse.quote_plus(m.group(1)), parse.quote_plus(m.group(2)), m.group(3))
self.client = pymongo.MongoClient('mongodb://{}'.format(parsed_mongo_config))
def find_single_column(self, param1, param2, param3, param4): def find_single_column(self, param1, param2, param3, param4):
""" """

View File

@ -6,8 +6,8 @@ from flask import request
from DBHelper.MongoHelper import MongoHelper from DBHelper.MongoHelper import MongoHelper
class ReturnConditionCheckFailed(RuntimeError): class CheckFailed(RuntimeError):
"""条件检查失败 抛出异常 接口返回失败原因和状态码""" """检查异常"""
def __init__(self, failed_info, status_code): def __init__(self, failed_info, status_code):
self.failed_info = failed_info # 失败信息 self.failed_info = failed_info # 失败信息
@ -39,3 +39,16 @@ class ReturnConditionCheckFailed(RuntimeError):
"异常日志", "异常日志",
info info
) )
class LogConditionCheckFailed(CheckFailed):
"""直接记录检查异常"""
def __init__(self, failed_info, status_code):
self.failed_info = failed_info # 失败信息
self.status_code = status_code # 状态码
self.log_error()
class ReturnConditionCheckFailed(CheckFailed):
"""条件检查失败 抛出异常 接口返回失败原因和状态码"""

View File

@ -22,6 +22,8 @@ class SpecObject(object):
_dict_[self.fields_map[key]] = self.__dict__[key] _dict_[self.fields_map[key]] = self.__dict__[key]
else: else:
_dict_[self.fields_map[key]] = [item.dict_to_save() for item in self.__dict__[key]] _dict_[self.fields_map[key]] = [item.dict_to_save() for item in self.__dict__[key]]
elif self.__dict__[key] is None:
_dict_[self.fields_map[key]] = self.__dict__[key]
else: else:
_dict_[self.fields_map[key]] = self.__dict__[key].dict_to_save() _dict_[self.fields_map[key]] = self.__dict__[key].dict_to_save()