def role():
new_role = False
try:
logger.info('finding role')
iam('get_role', RoleName='gimel')
except ClientError:
logger.info('role not found. creating')
iam('create_role', RoleName='gimel',
AssumeRolePolicyDocument=ASSUMED_ROLE_POLICY)
new_role = True
role_arn = iam('get_role', RoleName='gimel', query='Role.Arn')
logger.debug('role_arn={}'.format(role_arn))
logger.info('updating role policy')
iam('put_role_policy', RoleName='gimel', PolicyName='gimel',
PolicyDocument=POLICY)
if new_role:
from time import sleep
logger.info('waiting for role policy propagation')
sleep(5)
return role_arn
python类debug()的实例源码
def getCachedFile(siteCachePath,cacheId):
mascara = os.path.join(siteCachePath,cacheId[:2],cacheId[2:],"*.cache")
logger.debug("[scrapertools.py] mascara="+mascara)
import glob
ficheros = glob.glob( mascara )
logger.debug("[scrapertools.py] Hay %d ficheros con ese id" % len(ficheros))
cachedFile = ""
# Si hay más de uno, los borra (serán pruebas de programación) y descarga de nuevo
if len(ficheros)>1:
logger.debug("[scrapertools.py] Cache inválida")
for fichero in ficheros:
logger.debug("[scrapertools.py] Borrando "+fichero)
os.remove(fichero)
cachedFile = ""
# Hay uno: fichero cacheado
elif len(ficheros)==1:
cachedFile = ficheros[0]
return cachedFile
scrapertools_old.py 文件源码
项目:plugin.video.streamondemand-pureita
作者: orione7
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def getCachedFile(siteCachePath,cacheId):
mascara = os.path.join(siteCachePath,cacheId[:2],cacheId[2:],"*.cache")
logger.debug("[scrapertools.py] mascara="+mascara)
import glob
ficheros = glob.glob( mascara )
logger.debug("[scrapertools.py] Hay %d ficheros con ese id" % len(ficheros))
cachedFile = ""
# Si hay más de uno, los borra (serán pruebas de programación) y descarga de nuevo
if len(ficheros)>1:
logger.debug("[scrapertools.py] Cache inválida")
for fichero in ficheros:
logger.debug("[scrapertools.py] Borrando "+fichero)
os.remove(fichero)
cachedFile = ""
# Hay uno: fichero cacheado
elif len(ficheros)==1:
cachedFile = ficheros[0]
return cachedFile
def add_rule(self, id_, database_name, schema_name, table_name,
column_name, description, rule, param, param2=None):
assert isinstance(id_, int)
if self.schema_name != schema_name or self.table_name != table_name:
return False
label = id_
log.debug("add_rule: label = %s" % label)
assert param
if rule == 'regexp':
self.add_rule_regexp(label, column_name, param)
elif rule == 'eval':
self.add_rule_eval(label, column_name, param)
elif rule == 'columnstat':
self.add_rule_columnstat(label, column_name, param)
elif rule == 'sql':
assert param2
self.add_rule_sql(label, column_name, param, param2)
else:
raise InternalError(_("Unsupported validation rule: %s") % rule)
self.descriptions[label] = description
return True
def thread_func(downloader, thread_id, exporter):
logger.debug("Starting thread %d" % thread_id)
mutual_fund_id_count = len(downloader.mutual_fund_id_list)
start_pos = mutual_fund_id_count/20 * thread_id
end_pos = mutual_fund_id_count/20 * (thread_id+1)
for i in range(start_pos, end_pos):
if i >= len(downloader.mutual_fund_id_list):
return
mutual_fund_inst = downloader.save_mutual_fund_info(downloader.mutual_fund_id_list[i])
if mutual_fund_inst is None:
continue
exporter.write_mutual_fund_to_file(mutual_fund_inst, thread_id, i-start_pos)
downloader.lock.acquire()
downloader.current_finish_count += 1
sys.stdout.write("\rNow processing #%d Mutual Fund." % downloader.current_finish_count)
sys.stdout.flush()
downloader.lock.release()
def save_all_mutual_fund_info(self, exporter):
if os.path.exists(self.mutual_fund_id_list_file_name) and os.path.isfile(self.mutual_fund_id_list_file_name):
logger.debug("Read mutual fund id list from %s" % self.mutual_fund_id_list_file_name)
file_hdlr = open(self.mutual_fund_id_list_file_name, 'r')
self.mutual_fund_id_list = json.load(file_hdlr)
file_hdlr.close()
else:
logger.debug("%s file does not exist, need to read from Internet and write to file" % self.mutual_fund_id_list_file_name)
page_count = self.__get_mutual_fund_page_count()
for i in range(page_count):
if i > 0:
self.__save_mutual_fund_id_list_per_page(i)
self.__write_mutual_fund_id_list_to_file()
print "Total %d Mutual Fund need to be downloaded." % len(self.mutual_fund_id_list)
thread_inst_list = []
for i in range(20):
thread_inst = threading.Thread(name=str(i), target=thread_func, args=(self, i, exporter))
thread_inst_list.append(thread_inst)
thread_inst.setDaemon(True)
thread_inst.start()
for inst in thread_inst_list:
inst.join()
def _cleanup_old_versions(name):
logger.info('cleaning up old versions of {0}. Keeping {1}'.format(
name, REVISIONS))
versions = _versions(name)
for version in versions[0:(len(versions) - REVISIONS)]:
logger.debug('deleting {} version {}'.format(name, version))
aws_lambda('delete_function',
FunctionName=name,
Qualifier=version)
def get_create_api():
api_id = apigateway('get_rest_apis',
query='items[?name==`gimel`] | [0].id')
if not api_id:
api_id = apigateway('create_rest_api', name='gimel',
description='Gimel API', query='id')
logger.debug("api_id={}".format(api_id))
return api_id
def function_uri(function_arn, region):
uri = ('arn:aws:apigateway:{0}:lambda:path/2015-03-31/functions'
'/{1}/invocations').format(region, function_arn)
logger.debug("uri={0}".format(uri))
return uri
def create_update_lambda(role_arn, wiring):
name, handler, memory, timeout = (wiring[k] for k in ('FunctionName',
'Handler',
'MemorySize',
'Timeout'))
try:
logger.info('finding lambda function')
function_arn = aws_lambda('get_function',
FunctionName=name,
query='Configuration.FunctionArn')
except ClientError:
function_arn = None
if not function_arn:
logger.info('creating new lambda function {}'.format(name))
with open('gimel.zip', 'rb') as zf:
function_arn, version = aws_lambda('create_function',
FunctionName=name,
Runtime='python2.7',
Role=role_arn,
Handler=handler,
MemorySize=memory,
Timeout=timeout,
Publish=True,
Code={'ZipFile': zf.read()},
query='[FunctionArn, Version]')
else:
logger.info('updating lambda function {}'.format(name))
with open('gimel.zip', 'rb') as zf:
function_arn, version = aws_lambda('update_function_code',
FunctionName=name,
Publish=True,
ZipFile=zf.read(),
query='[FunctionArn, Version]')
function_arn = _function_alias(name, version)
_cleanup_old_versions(name)
logger.debug('function_arn={} ; version={}'.format(function_arn, version))
return function_arn
def getCacheFileNames(url):
# Obtiene el directorio de la cache para esta url
siteCachePath = getSiteCachePath(url)
# Obtiene el ID de la cache (md5 de la URL)
cacheId = get_md5(url)
logger.debug("[scrapertools.py] cacheId="+cacheId)
# Timestamp actual
nowtimestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
logger.debug("[scrapertools.py] nowtimestamp="+nowtimestamp)
# Nombre del fichero
# La cache se almacena en una estructura CACHE + URL
ruta = os.path.join( siteCachePath , cacheId[:2] , cacheId[2:] )
newFile = os.path.join( ruta , nowtimestamp + ".cache" )
logger.debug("[scrapertools.py] newFile="+newFile)
if not os.path.exists(ruta):
os.makedirs( ruta )
# Busca ese fichero en la cache
cachedFile = getCachedFile(siteCachePath,cacheId)
return cachedFile, newFile
# Busca ese fichero en la cache
def getSiteCachePath(url):
# Obtiene el dominio principal de la URL
dominio = urlparse.urlparse(url)[1]
logger.debug("[scrapertools.py] dominio="+dominio)
nombres = dominio.split(".")
if len(nombres)>1:
dominio = nombres[len(nombres)-2]+"."+nombres[len(nombres)-1]
else:
dominio = nombres[0]
logger.debug("[scrapertools.py] dominio="+dominio)
# Crea un directorio en la cache para direcciones de ese dominio
siteCachePath = os.path.join( CACHE_PATH , dominio )
if not os.path.exists(CACHE_PATH):
try:
os.mkdir( CACHE_PATH )
except:
logger.error("[scrapertools.py] Error al crear directorio "+CACHE_PATH)
if not os.path.exists(siteCachePath):
try:
os.mkdir( siteCachePath )
except:
logger.error("[scrapertools.py] Error al crear directorio "+siteCachePath)
logger.debug("[scrapertools.py] siteCachePath="+siteCachePath)
return siteCachePath
def get_node_from_data_json(name_file, node, path=None):
"""
Obtiene el nodo de un fichero JSON
@param name_file: Puede ser el nombre de un canal o server (sin incluir extension)
o bien el nombre de un archivo json (con extension)
@type name_file: str
@param node: nombre del nodo a obtener
@type node: str
@param path: Ruta base del archivo json. Por defecto la ruta de settings_channels.
@return: dict con el nodo a devolver
@rtype: dict
"""
logger.info()
from core import config
from core import filetools
dict_node = {}
if not name_file.endswith(".json"):
name_file += "_data.json"
if not path:
path = filetools.join(config.get_data_path(), "settings_channels")
fname = filetools.join(path , name_file)
if filetools.isfile(fname):
data = filetools.read(fname)
dict_data = load_json(data)
check_json_file(data, fname, dict_data)
if node in dict_data:
dict_node = dict_data[node]
logger.debug("dict_node: %s" % dict_node)
return dict_node
def check_json_file(data, fname, dict_data):
"""
Comprueba que si dict_data(conversion del fichero JSON a dict) no es un diccionario, se genere un fichero con
data de nombre fname.bk.
@param data: contenido del fichero fname
@type data: str
@param fname: nombre del fichero leido
@type fname: str
@param dict_data: nombre del diccionario
@type dict_data: dict
"""
logger.info()
if not dict_data:
logger.error("Error al cargar el json del fichero %s" % fname)
if data != "":
# se crea un nuevo fichero
from core import filetools
title = filetools.write("%s.bk" % fname, data)
if title != "":
logger.error("Ha habido un error al guardar el fichero: %s.bk" % fname)
else:
logger.debug("Se ha guardado una copia con el nombre: %s.bk" % fname)
else:
logger.debug("Está vacío el fichero: %s" % fname)
scrapertools_old.py 文件源码
项目:plugin.video.streamondemand-pureita
作者: orione7
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def getCacheFileNames(url):
# Obtiene el directorio de la cache para esta url
siteCachePath = getSiteCachePath(url)
# Obtiene el ID de la cache (md5 de la URL)
cacheId = get_md5(url)
logger.debug("[scrapertools.py] cacheId="+cacheId)
# Timestamp actual
nowtimestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
logger.debug("[scrapertools.py] nowtimestamp="+nowtimestamp)
# Nombre del fichero
# La cache se almacena en una estructura CACHE + URL
ruta = os.path.join( siteCachePath , cacheId[:2] , cacheId[2:] )
newFile = os.path.join( ruta , nowtimestamp + ".cache" )
logger.debug("[scrapertools.py] newFile="+newFile)
if not os.path.exists(ruta):
os.makedirs( ruta )
# Busca ese fichero en la cache
cachedFile = getCachedFile(siteCachePath,cacheId)
return cachedFile, newFile
# Busca ese fichero en la cache
scrapertools_old.py 文件源码
项目:plugin.video.streamondemand-pureita
作者: orione7
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def getSiteCachePath(url):
# Obtiene el dominio principal de la URL
dominio = urlparse.urlparse(url)[1]
logger.debug("[scrapertools.py] dominio="+dominio)
nombres = dominio.split(".")
if len(nombres)>1:
dominio = nombres[len(nombres)-2]+"."+nombres[len(nombres)-1]
else:
dominio = nombres[0]
logger.debug("[scrapertools.py] dominio="+dominio)
# Crea un directorio en la cache para direcciones de ese dominio
siteCachePath = os.path.join( CACHE_PATH , dominio )
if not os.path.exists(CACHE_PATH):
try:
os.mkdir( CACHE_PATH )
except:
logger.error("[scrapertools.py] Error al crear directorio "+CACHE_PATH)
if not os.path.exists(siteCachePath):
try:
os.mkdir( siteCachePath )
except:
logger.error("[scrapertools.py] Error al crear directorio "+siteCachePath)
logger.debug("[scrapertools.py] siteCachePath="+siteCachePath)
return siteCachePath
def makedic(self):
dic = {}
dic['database_name'] = self.database_name
dic['schema_name'] = self.schema_name
dic['table_name'] = self.table_name
dic['table_name_nls'] = self.table_name_nls
dic['timestamp'] = self.timestamp
dic['row_count'] = self.row_count
dic['columns'] = []
for c in self.columns:
dic['columns'].append(c.makedic())
dic['comment'] = self.comment
dic['sample_rows'] = self.sample_rows
log.debug('dic: %s' % dic)
return dic
def __init__(self, host, port, dbname, dbuser, dbpass, debug=False):
self.host = host
self.port = port
self.dbname = dbname
self.dbuser = dbuser
self.dbpass = dbpass
log.debug_enabled = debug
def get_schemas(self):
"""Get a list of database name and number of tables in each schema.
Returns:
list: a list of lists: [[dbname,schemaname,num of tables], ...]
"""
log.trace("get_schemas: start")
query = """
SELECT database_name,
schema_name,
COUNT(DISTINCT table_name)
FROM repo
GROUP BY
database_name,
schema_name
ORDER BY
database_name,
schema_name
"""
schemas = []
try:
cursor = self._conn.cursor()
log.debug("get_schemas: query = %s" % query)
cursor.execute(query)
for r in cursor.fetchall():
r2 = [_s2u(x) for x in r]
schemas.append(r2)
except Exception as e:
log.trace("get_schemas: " + unicode(e))
raise InternalError(_("Could not get schema names: "),
query=query, source=e)
log.trace("get_schemas: end")
return schemas
def get_tags(self):
"""Get a list of tag names and number of tags associated with tables.
Returns:
list: a list of lists: [[tag,num of tables], ...]
"""
log.trace("get_tags: start")
query = """
SELECT tag_label,
COUNT(*)
FROM tags
WHERE tag_label <> ''
GROUP BY
tag_label
ORDER BY
COUNT(*) DESC
"""
tags = []
try:
cursor = self._conn.cursor()
log.debug("get_tags: query = %s" % query)
for r in cursor.execute(query):
tags.append([r[0], r[1]])
except Exception as e:
log.trace("get_tags: " + unicode(e))
raise InternalError(_("Could not get tag info: "),
query=query, source=e)
log.trace("get_tags: end")
return tags
def has_table_record(self, tab):
"""
Check if the table record exist in the repository.
Args:
tab: a dictionary of table record.
Returns:
True if the table record exists, otherwise False.
"""
assert (tab['database_name'] and tab['schema_name'] and
tab['table_name'])
log.trace("has_table_record: start %s.%s.%s" %
(tab['database_name'], tab['schema_name'],
tab['table_name']))
query = """
SELECT COUNT(*)
FROM repo
WHERE database_name = '{database_name}'
AND schema_name = '{schema_name}'
AND table_name = '{table_name}'
AND created_at = datetime('{timestamp}')
""".format(**tab)
try:
cursor = self._conn.cursor()
log.debug("has_table_record: query = %s" % query)
cursor.execute(query)
r = cursor.fetchone()
log.debug("has_table_record: r = %s" % unicode(r))
if r[0] > 0:
return True
except Exception as e:
log.trace("has_table_record: " + unicode(e))
raise InternalError(_("Could not get table info: "),
query=query, source=e)
log.trace("has_table_record: end")
return False
def __init__(self, schema_name, table_name, caller=None,
validation_rules=None):
self.schema_name = schema_name
self.table_name = table_name
self.caller = caller
self._column_counter = (
ColumnValidationCounter.ColumnValidationCounter())
self.record_validators = {}
self.statistics_validators = {}
self.sql_validators = {}
self.descriptions = {}
assert validation_rules is None or isinstance(validation_rules, list)
num_rules = 0
if validation_rules:
for r in validation_rules:
log.trace("DbProfilerValidator: " + str(r))
assert len(r) == 9
if self.add_rule(r[0], r[1], r[2], r[3], r[4], r[5], r[6],
r[7], r[8]):
num_rules += 1
log.debug(
u"DbProfilerValidator: initialized with %d validation rules" %
num_rules)
# -----------------------------
# on-the-fly validation
# -----------------------------
def __get_mutual_fund_page_count(self):
logger.info("__get_mutual_fund_page_count() function entry")
# Add post parameters
query_args = {"page": "1",
"rp": str(self.mutualFundCountPerPage),
"sortname": "StandardName",
"sortorder": "asc",
"query": "",
"qtype": "StandardName",
"myFilter": "",
"FundIds": ""}
encoded_args = urllib.urlencode(query_args)
request = urllib2.Request(self.init_url,encoded_args)
# Add headers
request.add_header("Referer",
"http://www2.morningstar.ca/Components/FundTable/FundTable2.aspx?CobrandId=0&Culture=en-CA")
request.add_header("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36")
request.add_header("X-Requested-With",
"XMLHttpRequest")
logger.debug("Http request: %s" % request.get_full_url())
# Get http response and decode the json
response = urllib2.urlopen(request)
json_data = response.read()
decoded_json = json.loads(json_data)
self.totalMutualFundCount = int(decoded_json[u"total"])
logger.debug("Total mutual fund count is %d" % self.totalMutualFundCount)
return self.totalMutualFundCount / self.mutualFundCountPerPage
def __save_mutual_fund_id_list_per_page(self, page_number):
logger.info("__save_mutual_fund_id_list_per_page() function entry. page_number=%d" % page_number)
# Add post parameters
query_args = {"page": str(page_number),
"rp": str(self.mutualFundCountPerPage),
"sortname": "StandardName",
"sortorder": "asc",
"query": "",
"qtype": "StandardName",
"myFilter": "",
"FundIds": ""}
encoded_args = urllib.urlencode(query_args)
request = urllib2.Request(self.init_url,encoded_args)
# Add headers
request.add_header("Referer",
"http://www2.morningstar.ca/Components/FundTable/FundTable2.aspx?CobrandId=0&Culture=en-CA")
request.add_header("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36")
request.add_header("X-Requested-With",
"XMLHttpRequest")
logger.debug("Http request: %s" % request.get_full_url())
# Get http response and decode the json
response = urllib2.urlopen(request)
json_data = response.read()
decoded_json = json.loads(json_data)
if type(decoded_json[u"rows"]) == types.ListType:
for row in decoded_json[u"rows"]:
mutual_fund_id = row[u"id"]
self.mutual_fund_id_list.append(mutual_fund_id)
logger.debug("Save mutual fund id %s" % mutual_fund_id)
def update_json_data(dict_node, name_file, node, path=None):
"""
actualiza el json_data de un fichero con el diccionario pasado
@param dict_node: diccionario con el nodo
@type dict_node: dict
@param name_file: Puede ser el nombre de un canal o server (sin incluir extension)
o bien el nombre de un archivo json (con extension)
@type name_file: str
@param node: nodo a actualizar
@param path: Ruta base del archivo json. Por defecto la ruta de settings_channels.
@return result: Devuelve True si se ha escrito correctamente o False si ha dado un error
@rtype: bool
@return json_data
@rtype: dict
"""
logger.info()
from core import config
from core import filetools
json_data = {}
result = False
if not name_file.endswith(".json"):
name_file += "_data.json"
if not path:
path = filetools.join(config.get_data_path(), "settings_channels")
fname = filetools.join(path, name_file)
try:
data = filetools.read(fname)
dict_data = load_json(data)
# es un dict
if dict_data:
if node in dict_data:
logger.debug(" existe el key %s" % node)
dict_data[node] = dict_node
else:
logger.debug(" NO existe el key %s" % node)
new_dict = {node: dict_node}
dict_data.update(new_dict)
else:
logger.debug(" NO es un dict")
dict_data = {node: dict_node}
json_data = dump_json(dict_data)
result = filetools.write(fname, json_data)
except:
logger.error("No se ha podido actualizar %s" % fname)
return result, json_data
def get_setting(name, channel="", server=""):
"""
Retorna el valor de configuracion del parametro solicitado.
Devuelve el valor del parametro 'name' en la configuracion global o en la configuracion propia del canal 'channel'.
Si se especifica el nombre del canal busca en la ruta \addon_data\plugin.video.pelisalacarta\settings_channels el
archivo channel_data.json y lee el valor del parametro 'name'. Si el archivo channel_data.json no existe busca en la
carpeta channels el archivo channel.xml y crea un archivo channel_data.json antes de retornar el valor solicitado.
Si el parametro 'name' no existe en channel_data.json lo busca en la configuracion global y si ahi tampoco existe
devuelve un str vacio.
Parametros:
name -- nombre del parametro
channel [opcional] -- nombre del canal
Retorna:
value -- El valor del parametro 'name'
"""
# Specific channel setting
if channel:
# logger.info("config.get_setting reading channel setting '"+name+"' from channel xml")
from core import channeltools
value = channeltools.get_channel_setting(name, channel)
# logger.info("config.get_setting -> '"+repr(value)+"'")
return value
elif server:
# logger.info("config.get_setting reading server setting '"+name+"' from server xml")
from core import servertools
value = servertools.get_server_setting(name, server)
# logger.info("config.get_setting -> '"+repr(value)+"'")
return value
# Global setting
else:
try:
if name in overrides:
dev = overrides[name]
#print "Overrides: ",name,"=",dev
#elif name=="debug":
# return "true"
else:
dev=configfile.get("General",name)
#print "Config file: ",name,"=",dev
#print "get_setting",name,dev
return dev
except:
#print "get_setting",name,"(vacío)"
return ""
def get_table(self, database_name, schema_name, table_name):
"""
Get a table record from the repository by object name.
Args:
database_name(str): database name
schema_name(str): schema name
table_name(str): table name
Returns:
a dictionary of table record. {table_record}
"""
assert database_name and schema_name and table_name
table = None
log.trace('get_table: start %s.%s.%s' %
(database_name, schema_name, table_name))
query = """
SELECT data
FROM repo
WHERE database_name = '{0}'
AND schema_name = '{1}'
AND table_name = '{2}'
ORDER BY created_at DESC
LIMIT 1
""".format(database_name, schema_name, table_name)
log.debug("get_table: query = %s" % query)
try:
cursor = self._conn.cursor()
cursor.execute(query)
r = cursor.fetchone()
if r:
table = json.loads(unicode(r[0]))
except Exception as e:
raise InternalError(_("Could not get table data: "),
query=query, source=e)
log.trace('get_table: end')
return table
def write_mutual_fund_to_file(self, mutual_fund_inst, sheet_id, row_id):
data_list = [mutual_fund_inst.fund_id,
mutual_fund_inst.fund_name,
mutual_fund_inst.fund_size,
mutual_fund_inst.mer,
mutual_fund_inst.status,
mutual_fund_inst.min_inve_initial,
mutual_fund_inst.category,
mutual_fund_inst.inve_style,
mutual_fund_inst.inve_objective_strategy,
mutual_fund_inst.growth_of_ten_thousand_YTD,
mutual_fund_inst.growth_of_ten_thousand_1month,
mutual_fund_inst.growth_of_ten_thousand_1year,
mutual_fund_inst.growth_of_ten_thousand_3year,
mutual_fund_inst.growth_of_ten_thousand_5year,
mutual_fund_inst.growth_of_ten_thousand_10year,
mutual_fund_inst.growth_fund_YTD,
mutual_fund_inst.growth_fund_1month,
mutual_fund_inst.growth_fund_1year,
mutual_fund_inst.growth_fund_3year,
mutual_fund_inst.growth_fund_5year,
mutual_fund_inst.growth_fund_10year,
mutual_fund_inst.growth_comp_index_YTD,
mutual_fund_inst.growth_comp_index_1month,
mutual_fund_inst.growth_comp_index_1year,
mutual_fund_inst.growth_comp_index_3year,
mutual_fund_inst.growth_comp_index_5year,
mutual_fund_inst.growth_comp_index_10year,
mutual_fund_inst.growth_comp_category_YTD,
mutual_fund_inst.growth_comp_category_1month,
mutual_fund_inst.growth_comp_category_1year,
mutual_fund_inst.growth_comp_category_3year,
mutual_fund_inst.growth_comp_category_5year,
mutual_fund_inst.growth_comp_category_10year]
self.xlsx_file_lock.acquire()
for i in range(len(data_list)):
cell = self.xlsx_file_inst.get_sheet_by_name(str(sheet_id)).cell(row=row_id+1, column=i+1)
cell.value = data_list[i]
self.xlsx_file_lock.release()
logger.debug("Write data: %s", data_list)
def save_mutual_fund_info(self, mutual_fund_id):
logger.info("__save_mutual_fund_info() function entry. {'mutual_fund_id': %s}" % mutual_fund_id)
# Add GET parameters
query_args = {"t": mutual_fund_id,
"region": "CAN",
"culture": "en-CA"}
request = urllib2.Request(self.mutual_fund_url + "?" + urllib.urlencode(query_args))
# Add headers
request.add_header("Referer",
"http://www2.morningstar.ca/Components/FundTable/FundTable2.aspx?CobrandId=0&Culture=en-CA")
request.add_header("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36")
logger.debug("Http request: %s" % request.get_full_url())
# Get http response and extract the url of mutual fund
response = urllib2.urlopen(request)
soup = BeautifulSoup(response.read(), 'html.parser')
script_list = soup.find_all("script")
pattern = r"t=[a-zA-Z0-9]+®ion=CAN&culture=en-CA&cur=CAD&productCode=CAN"
for script in script_list:
match = re.search(pattern, unicode(script.string))
if match:
url_get_parameter_str = script.string[match.start():match.end()]
# Split url GET parameter string
get_parameter_str_list = url_get_parameter_str.split("&")
get_parameter_dict = {}
for get_parameter_str in get_parameter_str_list:
get_parameter_pair = get_parameter_str.split("=")
get_parameter_dict[get_parameter_pair[0]] = get_parameter_pair[1]
# Create Mutual Fund Instance
mutual_fund_inst = MutualFund()
mutual_fund_inst.fund_id = mutual_fund_id
# save Mutual Fund Head Portion
self.__save_mutual_fund_head_portion(mutual_fund_inst, get_parameter_dict)
if mutual_fund_inst.fund_name == "":
return None
# save Mutual Fund Objective and Strategy Portion
self.__save_mutual_fund_obj_strategy_portion(mutual_fund_inst, get_parameter_dict)
# save Mutual Fund Performance Portion
self.__save_mutual_fund_performance_portion(mutual_fund_inst, get_parameter_dict)
# save Mutual Fund Annual Performance Portion
#self.__save_mutual_fund_annual_performance_portion(mutual_fund_inst, get_parameter_dict)
return mutual_fund_inst