initial commit
This commit is contained in:
parent
30458acfd8
commit
1ac6d0bb9c
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
*.pyc
|
||||
__pycache__/
|
||||
cache/
|
||||
feeds/
|
||||
config/email_session.xml
|
||||
config/subscriber.xml
|
||||
config.py
|
||||
22
README.md
22
README.md
@ -1,2 +1,22 @@
|
||||
# ArxivRobot
|
||||
# What is This?
|
||||
This is a naive and simple arxiv robot, it will fetch today's updated papers from arxiv in specified topic, filter the papers by given keywords, and send the result to a given email address.
|
||||
|
||||
# What i need to do to run this code?
|
||||
|
||||
## package requirements:
|
||||
It seems you only need to install croniter: ```pip install croniter``` will do it.
|
||||
|
||||
## configuration:
|
||||
|
||||
1. create config.py, a sample is given in config-examples.py
|
||||
2. create config/subscriber.xml, a sample is also given in /config/subscriber_example.xml
|
||||
|
||||
## run this code.
|
||||
```python main.py```
|
||||
|
||||
If everything goes okay, you will see a shell interface, type help for more information.
|
||||
|
||||
# PS
|
||||
I am really really a bad coder and not good at writing document and comments. If you have anything in trouble, feel free to open a issue and i will try my best to fix the problem.
|
||||
|
||||
The code is pushed in a hurry, i will add a document to explain this code when i have free time.
|
||||
|
||||
65
analysis_paper.py
Normal file
65
analysis_paper.py
Normal file
@ -0,0 +1,65 @@
|
||||
from arxiv_spider import arxiv_paper
|
||||
import utils
|
||||
import numpy as np
|
||||
|
||||
authors = {}
|
||||
|
||||
years = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
|
||||
|
||||
for year in years:
|
||||
print('Analysising year:', year)
|
||||
papers = utils.load_python_object('./feeds/' + year)
|
||||
for paper in papers:
|
||||
author_this_paper = paper.info['authors']
|
||||
for author in author_this_paper:
|
||||
author = utils.delete_n(author)
|
||||
if author in authors:
|
||||
authors[author] += 1
|
||||
else:
|
||||
authors[author] = 1
|
||||
|
||||
freq = []
|
||||
names = []
|
||||
for author in authors:
|
||||
freq.append(authors[author])
|
||||
names.append(author)
|
||||
|
||||
freq = np.asarray(freq, dtype=np.int32)
|
||||
|
||||
freq_sort = np.argsort(freq)
|
||||
|
||||
num_authors = len(names)
|
||||
|
||||
for i in range(num_authors):
|
||||
aid = freq_sort[num_authors - i - 1]
|
||||
print('Name: {0} | papers: {1}'.format(names[aid], freq[aid]).encode('utf-8'))
|
||||
|
||||
|
||||
# keywords = {}
|
||||
|
||||
# years = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
|
||||
|
||||
# for year in years:
|
||||
# print('Analysising year:', year)
|
||||
# papers = utils.load_python_object('./feeds/' + year)
|
||||
# for paper in papers:
|
||||
# keyword_this_paper = paper.info['title'].split(' ')
|
||||
# for keyword in keyword_this_paper:
|
||||
# keyword = utils.delete_n(keyword).lower()
|
||||
# if keyword in keywords:
|
||||
# keywords[keyword] += 1
|
||||
# else:
|
||||
# keywords[keyword] = 1
|
||||
|
||||
# freq = []
|
||||
# names = []
|
||||
# for keyword in keywords:
|
||||
# freq.append(keywords[keyword])
|
||||
# names.append(keyword)
|
||||
# freq = np.asarray(freq, dtype=np.int32)
|
||||
# freq_sort = np.argsort(freq)
|
||||
# num_keywords = len(names)
|
||||
|
||||
# for i in range(num_keywords):
|
||||
# aid = freq_sort[num_keywords - i - 1]
|
||||
# print('Keyword: {0} | papers: {1}'.format(names[aid], freq[aid]).encode('utf-8'))
|
||||
85
arxiv_bot.py
Normal file
85
arxiv_bot.py
Normal file
@ -0,0 +1,85 @@
|
||||
import arxiv_spider
|
||||
import os
|
||||
import time
|
||||
from lib import utils
|
||||
|
||||
# cache tree:
|
||||
# cache_root
|
||||
# - topic-caches
|
||||
# - feed_$(time).arxiv_feed
|
||||
# - feed_year_$(year).arxiv_feed
|
||||
|
||||
class arxiv_bot():
|
||||
def __init__(self, topics, cache_dir='./cache', arxiv_site='https://arxiv.org', log=False):
|
||||
self.log = log
|
||||
self.site = arxiv_site
|
||||
self.topics = []
|
||||
self.spiders = {}
|
||||
self.cache_dir = cache_dir
|
||||
self.topic_caches = {}
|
||||
if not os.path.isdir(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
self.update_topics(topics)
|
||||
|
||||
def update_topics(self, topics):
|
||||
for topic in topics:
|
||||
if topic not in self.topics:
|
||||
self.topics.append(topic)
|
||||
if self.log:
|
||||
print('Adding topic {0}.'.format(topic))
|
||||
topic_cache = os.path.join(self.cache_dir, topic)
|
||||
self.topic_caches[topic] = topic_cache
|
||||
if not os.path.isdir(topic_cache):
|
||||
if self.log:
|
||||
print('creating topic dir:', topic_cache)
|
||||
os.makedirs(topic_cache)
|
||||
self.spiders[topic] = arxiv_spider.arxiv_spider(topic, self.site)
|
||||
|
||||
# load feed if it is already downloaded. If not, use spiders to get today's feed.
|
||||
def get_today_feed(self):
|
||||
today_feed = {}
|
||||
today = utils.str_day()
|
||||
for topic in self.topics:
|
||||
today_feed_name = 'feed_' + today + '.arxiv_daily_feed'
|
||||
today_feed_path = os.path.join(self.cache_dir, topic, today_feed_name)
|
||||
cache_dir = self.topic_caches[topic]
|
||||
topic_feed = None
|
||||
if os.path.exists(today_feed_path):
|
||||
topic_feed = utils.load_python_object(today_feed_path)
|
||||
else:
|
||||
topic_feed = self.spiders[topic].get_today_paper()
|
||||
print('Fetching topic {0} papers...'.format(topic))
|
||||
for paper in topic_feed:
|
||||
if self.log:
|
||||
print('download abstract for paper', paper.info['title'])
|
||||
paper.download_abstract()
|
||||
utils.save_python_object(topic_feed, today_feed_path)
|
||||
today_feed[topic] = topic_feed
|
||||
return today_feed
|
||||
|
||||
def get_interested_paper(self, topic, keywords):
|
||||
if self.today_feed is None or utils.str_day() is not self.today:
|
||||
self.today_feed = self.get_today_feed()
|
||||
self.today = utils.str_day()
|
||||
print('Updating daily feed.')
|
||||
|
||||
topic_feed = self.today_feed[topic]
|
||||
topic_papers = []
|
||||
for day in topic_feed:
|
||||
topic_papers += topic_feed[day]
|
||||
strong = []
|
||||
weak = []
|
||||
for paper in topic_papers:
|
||||
strong_match = False
|
||||
weak_match = False
|
||||
for keyword in keywords:
|
||||
if paper.info['title'].lower().find(keyword) != -1:
|
||||
strong_match = True
|
||||
break
|
||||
elif paper.info['abstract'].lower().find(keyword) != -1:
|
||||
weak_match = True
|
||||
if strong_match:
|
||||
strong.append(paper)
|
||||
elif weak_match:
|
||||
weak.append(paper)
|
||||
return strong, weak
|
||||
31
arxiv_service.py
Normal file
31
arxiv_service.py
Normal file
@ -0,0 +1,31 @@
|
||||
import arxiv_bot
|
||||
import feeds
|
||||
import email_sender
|
||||
import time
|
||||
import subscriber_utils
|
||||
import utils
|
||||
from croniter import croniter
|
||||
import threading
|
||||
|
||||
class test_service():
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
pass
|
||||
|
||||
def do(self):
|
||||
print('Job {0} run!'.format(self.name))
|
||||
|
||||
class mail_service():
|
||||
def __init__(self, emailer):
|
||||
self.emailer = emailer
|
||||
|
||||
def do(self):
|
||||
self.emailer.send_daily_email()
|
||||
|
||||
class reload_subscriber():
|
||||
def __init__(self, subscriber_mgr):
|
||||
self.mgr = subscriber_mgr
|
||||
|
||||
def do(self):
|
||||
self.mgr.load()
|
||||
|
||||
369
arxiv_spider.py
Normal file
369
arxiv_spider.py
Normal file
@ -0,0 +1,369 @@
|
||||
import requests
|
||||
import pickle
|
||||
import time
|
||||
from lib import utils
|
||||
from lib.parser import dom_node, simple_parser
|
||||
|
||||
import socket
|
||||
import socks
|
||||
|
||||
use_proxy = False
|
||||
if use_proxy:
|
||||
SOCKS5_PROXY_HOST = '127.0.0.1'
|
||||
SOCKS5_PROXY_PORT = 1080
|
||||
default_socket = socket.socket
|
||||
socks.set_default_proxy(socks.SOCKS5, SOCKS5_PROXY_HOST, SOCKS5_PROXY_PORT)
|
||||
socket.socket = socks.socksocket
|
||||
|
||||
class arxiv_paper():
|
||||
def __init__(self, arxiv_id = None, paper_info = None):
|
||||
self.arxiv_id = arxiv_id
|
||||
self.info = paper_info
|
||||
|
||||
def add_author(self, author):
|
||||
self.info['authors'].append(authors)
|
||||
|
||||
def title(self):
|
||||
return self.info['title']
|
||||
|
||||
|
||||
def describe(self):
|
||||
information = ''
|
||||
information += 'ID: {0} (https://arxiv.org/abs/{0})\n'.format(self.arxiv_id)
|
||||
for key in self.info:
|
||||
if self.info[key] is not None:
|
||||
info = utils.formal_text(self.info[key])
|
||||
information += ('\t' + key + ':' + str(info) + '\n')
|
||||
return information
|
||||
|
||||
def show(self):
|
||||
print(self.describe())
|
||||
|
||||
def to_html(self):
|
||||
dom_tree = dom_node(name = 'paper-section')
|
||||
paper_title = None
|
||||
paper_link = None
|
||||
paper_authors = None
|
||||
paper_comments = None
|
||||
paper_subjects = None
|
||||
paper_abstract = None
|
||||
for key in self.info:
|
||||
if self.info[key] is not None:
|
||||
if key == 'title':
|
||||
paper_title = dom_node('paper-title')
|
||||
link_attr = {'href':'https://arxiv.org/abs/{0}'.format(self.arxiv_id)}
|
||||
link_node = dom_node('a', link_attr)
|
||||
link_node.data = self.info[key]
|
||||
paper_title.add_child(link_node)
|
||||
paper_link = dom_node('paper-pdf-link')
|
||||
pdf_link_attr = {'href':'https://arxiv.org/pdf/{0}'.format(self.arxiv_id)}
|
||||
pdf_link = dom_node('a', pdf_link_attr)
|
||||
pdf_link.data = '{0} | [pdf]'.format(self.arxiv_id)
|
||||
paper_link.add_child(pdf_link)
|
||||
|
||||
elif key == 'authors':
|
||||
paper_authors = dom_node('paper-authors')
|
||||
authors_string = ''
|
||||
for author in self.info[key]:
|
||||
authors_string += author + ', '
|
||||
authors_string = authors_string[:-2]
|
||||
paper_authors.data = authors_string
|
||||
|
||||
elif key == 'comments':
|
||||
paper_comments = dom_node('paper-comments')
|
||||
paper_comments.data = self.info[key]
|
||||
|
||||
elif key == 'subjects':
|
||||
paper_subjects = dom_node('paper-subjects')
|
||||
paper_subjects.data = self.info[key]
|
||||
|
||||
elif key == 'abstract':
|
||||
paper_abstract = dom_node('paper-abstract')
|
||||
paper_abstract.data = self.info[key]
|
||||
dom_tree.add_child(paper_title)
|
||||
dom_tree.add_child(paper_link)
|
||||
dom_tree.add_child(paper_authors)
|
||||
dom_tree.add_child(paper_abstract)
|
||||
dom_tree.add_child(paper_comments)
|
||||
dom_tree.add_child(paper_subjects)
|
||||
html = dom_tree.to_string()
|
||||
return html
|
||||
|
||||
def download_abstract(self, forcemode=False):
|
||||
if not forcemode:
|
||||
if self.info['abstract'] is not None:
|
||||
# print('skipping download abstract since already downloaded')
|
||||
return;
|
||||
r = requests.get('https://arxiv.org/abs/' + self.arxiv_id)
|
||||
parser = simple_parser()
|
||||
parser.feed(r.text)
|
||||
tree = parser.root
|
||||
meta_nodes = tree.search('meta')
|
||||
for meta_node in meta_nodes:
|
||||
meta_attr = meta_node.attributes
|
||||
if 'property' in meta_attr:
|
||||
if meta_attr['property'] == 'og:description':
|
||||
self.info['abstract'] = utils.formal_text(meta_attr['content'])
|
||||
return;
|
||||
|
||||
class arxiv_list_parser():
|
||||
def __init__(self, html_page):
|
||||
self.html_page = html_page
|
||||
self.parser = simple_parser()
|
||||
self.parser.feed(html_page)
|
||||
self.tree = self.parser.root
|
||||
|
||||
def get_arxiv_id(self, dt_node):
|
||||
if len(dt_node.childs) == 0:
|
||||
return None
|
||||
else:
|
||||
arxiv_id = dt_node.childs[1].childs[0].attributes['href']
|
||||
arxiv_id = arxiv_id.split('/')[-1]
|
||||
return arxiv_id
|
||||
|
||||
def get_paper_info(self, dd_node):
|
||||
title = None
|
||||
authors = []
|
||||
comments = None
|
||||
subjects = None
|
||||
if len(dd_node.childs) == 0:
|
||||
return None
|
||||
else:
|
||||
elements = dd_node.childs[0].childs
|
||||
for element in elements:
|
||||
if 'class' in element.attributes:
|
||||
element_class = element.attributes['class']
|
||||
if element_class == 'list-title mathjax':
|
||||
title = utils.formal_text(element.data)
|
||||
elif element_class == 'list-authors':
|
||||
for child in element.childs:
|
||||
if child.name == 'a':
|
||||
authors.append(utils.formal_text(child.data))
|
||||
elif element_class == 'list-comments mathjax':
|
||||
comments = utils.formal_text(element.data)
|
||||
elif element_class == 'list-subjects':
|
||||
subjects = utils.formal_text(element.data)
|
||||
paper_info = {
|
||||
'title':title,
|
||||
'authors':authors,
|
||||
'comments':comments,
|
||||
'subjects':subjects,
|
||||
'abstract':None
|
||||
}
|
||||
return paper_info
|
||||
|
||||
def get_papers(self):
|
||||
dts = self.tree.search('dt')
|
||||
dds = self.tree.search('dd')
|
||||
papers = []
|
||||
for dt, dd in zip(dts, dds):
|
||||
arxiv_id = self.get_arxiv_id(dt)
|
||||
if arxiv_id == None:
|
||||
continue;
|
||||
paper_info = self.get_paper_info(dd)
|
||||
if paper_info == None:
|
||||
continue;
|
||||
paper = arxiv_paper(arxiv_id, paper_info)
|
||||
papers.append(paper)
|
||||
return papers
|
||||
|
||||
def get_paper_num(self):
|
||||
totally_paper_node = self.tree.search('small')[0].data
|
||||
total_num_split = totally_paper_node.split(' ')
|
||||
num_total = 0
|
||||
for split in total_num_split:
|
||||
if split.isdigit():
|
||||
num_total = int(split)
|
||||
break;
|
||||
return num_total
|
||||
|
||||
def get_recent_info(self):
|
||||
# get each day start id and day_name
|
||||
day_name = []
|
||||
day_start = []
|
||||
li_nodes = self.tree.search('ul')[0].childs
|
||||
for li in li_nodes:
|
||||
link = li.childs[0].attributes['href']
|
||||
start = None
|
||||
if link.find('#item') != -1:
|
||||
start = link.split('#')[-1][4:]
|
||||
else:
|
||||
start = link.split('=')[-2].split('&')[0]
|
||||
day_name.append(li.childs[0].data)
|
||||
day_start.append(int(start))
|
||||
# get total paper num
|
||||
num_total = self.get_paper_num()
|
||||
# get each day num.
|
||||
num_days = len(day_start)
|
||||
day_num = []
|
||||
for i in range(num_days):
|
||||
if i < num_days - 1:
|
||||
day_num.append(day_start[i+1] - day_start[i])
|
||||
else:
|
||||
day_num.append(num_total - day_start[i])
|
||||
|
||||
# generate final info.
|
||||
recent_papers_info = {}
|
||||
for day, start, num in zip(day_name, day_start, day_num):
|
||||
current_day_info = {}
|
||||
current_day_info['start'] = start
|
||||
current_day_info['num'] = num
|
||||
recent_papers_info[day] = current_day_info
|
||||
return recent_papers_info
|
||||
|
||||
class arxiv_spider():
|
||||
def __init__(self, topic, arxiv_url = 'https://arxiv.org'):
|
||||
self.link = arxiv_url
|
||||
self.topic = topic
|
||||
self.base_url = self.link + '/list/' + self.topic
|
||||
|
||||
|
||||
def get_yearly_papers(self, year, log=False):
|
||||
yearly_url = self.base_url + '/' + year
|
||||
if log:
|
||||
print('visiting url [{0}] for basic information'.format(yearly_url))
|
||||
r = requests.get(yearly_url)
|
||||
list_parser = arxiv_list_parser(r.text)
|
||||
total_num = list_parser.get_paper_num()
|
||||
print('Total Number for this year:', total_num)
|
||||
yearly_url_all = yearly_url + '?skip={0}&show={1}'.format(0, total_num)
|
||||
if log:
|
||||
print('visiting url [{0}] for all papers'.format(yearly_url_all))
|
||||
r = requests.get(yearly_url_all)
|
||||
list_parser = arxiv_list_parser(r.text)
|
||||
yearly_papers = list_parser.get_papers()
|
||||
return yearly_papers
|
||||
|
||||
# papers:
|
||||
# papers = {
|
||||
# 'key is day string': [content is a list of arxiv_paper class]
|
||||
# }
|
||||
|
||||
def get_papers_on_search_list(self, search_url, log=True):
|
||||
if log:
|
||||
print('visiting url [{0}] for today papers.'.format(search_url))
|
||||
search_content = requests.get(search_url)
|
||||
search_content = search_content.text
|
||||
parser = simple_parser()
|
||||
parser.feed(search_content)
|
||||
tree = parser.root
|
||||
paper_nodes = tree.search('entry')
|
||||
print('num_searched_nodes:', len(paper_nodes))
|
||||
papers = []
|
||||
for node in paper_nodes:
|
||||
arxiv_id = node.search('id')[0].data.split('/')[-1]
|
||||
title = node.search('title')[0].data
|
||||
author_nodes = node.search('name')
|
||||
authors = [item.data for item in author_nodes]
|
||||
category_nodes = node.search('category')
|
||||
categories = [item.attributes['term'] for item in category_nodes]
|
||||
subjects = ''
|
||||
for cat in categories:
|
||||
subjects += cat + ','
|
||||
subjects = subjects[:-1]
|
||||
comments_node = node.search('arxiv:comment')
|
||||
if len(comments_node) == 0:
|
||||
comments = ''
|
||||
else:
|
||||
comments = node.search('arxiv:comment')[0].data
|
||||
abstract = node.search('summary')[0].data
|
||||
|
||||
title = utils.formal_text(title)
|
||||
subjects = utils.formal_text(subjects)
|
||||
comments = utils.formal_text(comments)
|
||||
abstract = utils.formal_text(abstract)
|
||||
|
||||
|
||||
paper_info = {
|
||||
'title':title,
|
||||
'authors':authors,
|
||||
'comments':comments,
|
||||
'subjects':subjects,
|
||||
'abstract':abstract
|
||||
}
|
||||
|
||||
paper = arxiv_paper(arxiv_id, paper_info)
|
||||
papers.append(paper)
|
||||
return papers
|
||||
|
||||
def get_papers_by_ids(self, ids, log=True):
|
||||
num_groups = int((len(ids) + 9.1)/10)
|
||||
if log:
|
||||
print('spliting into {0} groups.'.format(num_groups))
|
||||
papers = []
|
||||
for i in range(num_groups):
|
||||
this_batch = ids[i * 10:(i+1)*10]
|
||||
id_list = ''
|
||||
for paper_id in this_batch:
|
||||
id_list += paper_id + ','
|
||||
id_list = id_list[:-1]
|
||||
search_url = 'http://export.arxiv.org/api/query?id_list=' + id_list
|
||||
batch_papers = self.get_papers_on_search_list(search_url, log)
|
||||
papers += batch_papers
|
||||
return papers
|
||||
|
||||
|
||||
def get_today_ids(self, log=True):
|
||||
rss_url = 'http://export.arxiv.org/rss/{0}'.format(self.topic)
|
||||
if log:
|
||||
print('visiting url [{0}] for today papers id.'.format(rss_url))
|
||||
rss_content = requests.get(rss_url)
|
||||
rss_content = rss_content.text
|
||||
parser = simple_parser()
|
||||
parser.feed(rss_content)
|
||||
rss = parser.root
|
||||
id_nodes = rss.search('rdf:li')
|
||||
paper_ids = []
|
||||
for node in id_nodes:
|
||||
paper_link = node.attributes['rdf:resource']
|
||||
paper_id = paper_link.split('/')[-1]
|
||||
paper_ids.append(paper_id)
|
||||
print('num_paper_ids:', len(paper_ids))
|
||||
return paper_ids
|
||||
|
||||
def get_today_paper(self, return_day_name=False, log=True):
|
||||
today_ids = self.get_today_ids(log)
|
||||
papers = self.get_papers_by_ids(today_ids)
|
||||
print('num of papers:', len(papers))
|
||||
return papers
|
||||
|
||||
|
||||
|
||||
def get_today_paper_backup(self, return_day_name=False):
|
||||
papers = self.get_recent_papers(recent_days=[1])
|
||||
today = None
|
||||
paper = None
|
||||
for day in papers:
|
||||
today = day
|
||||
paper = papers[day]
|
||||
if return_day_name:
|
||||
return paper, today
|
||||
else:
|
||||
return paper
|
||||
|
||||
|
||||
def get_recent_papers(self, recent_days=[1, 2, 3, 4, 5], log=False):
|
||||
recent_url = self.base_url + '/recent'
|
||||
if log:
|
||||
print('visiting url [{0}] for basic information'.format(recent_url))
|
||||
r = requests.get(recent_url)
|
||||
list_parser = arxiv_list_parser(r.text)
|
||||
recent_papers_info = list_parser.get_recent_info()
|
||||
print('paper info:', recent_papers_info)
|
||||
|
||||
day_id = 1
|
||||
papers = {}
|
||||
for day in recent_papers_info:
|
||||
if day_id in recent_days:
|
||||
today_start = recent_papers_info[day]['start']
|
||||
today_num = recent_papers_info[day]['num']
|
||||
page_url = '/pastweek?skip={0}&show={1}'.format(today_start, today_num)
|
||||
day_url = self.base_url + page_url
|
||||
if log:
|
||||
print('visiting url [{0}] for paper on day {1}'.format(day_url, day))
|
||||
r = requests.get(day_url)
|
||||
list_parser = arxiv_list_parser(r.text)
|
||||
today_papers = list_parser.get_papers()
|
||||
papers[day] = today_papers
|
||||
day_id += 1
|
||||
return papers
|
||||
9
config-examples.py
Normal file
9
config-examples.py
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
#### email related config ####
|
||||
username = 'email@email.com' # send email using this account
|
||||
password = 'yourpassword' # your email login passoword
|
||||
sender_name = 'ArxivRobot' # the name of your robot
|
||||
replyto = 'yourmail@mail.com' # all replay email will be foreward to this email address
|
||||
|
||||
smtp_ssl_addr = 'smtp.smtp.com'
|
||||
# smtp server, only ssl supported. you can support more by editing function send_main in email_sender.py
|
||||
109
config/style.css
Normal file
109
config/style.css
Normal file
@ -0,0 +1,109 @@
|
||||
<head>
|
||||
<style type="text/css">
|
||||
paper-group
|
||||
{
|
||||
float: left;
|
||||
width: 100%;
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
text-align: center;
|
||||
color: white;
|
||||
background-color: #3f3f3f;
|
||||
font-family:Arial,Helvetica,sans-serif;
|
||||
font-size: 2.0em;
|
||||
border-radius:10px;
|
||||
}
|
||||
/*abstract
|
||||
{
|
||||
color:black;
|
||||
text-align:center;
|
||||
background-color:#d0e4fe;
|
||||
}
|
||||
*/
|
||||
paper-section {
|
||||
float: left;
|
||||
margin-top: 20px;
|
||||
margin-bottom: 20px;
|
||||
margin-left: 10%;
|
||||
margin-right: 10%;
|
||||
width: 80%;
|
||||
/*border: 1px solid green;
|
||||
background-color: lightgrey;*/
|
||||
}
|
||||
|
||||
paper-title {
|
||||
float: left;
|
||||
width: 100%;
|
||||
/*border: 1px solid green;
|
||||
background-color: lightgrey;*/
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
color: black;
|
||||
}
|
||||
|
||||
paper-authors {
|
||||
float: left;
|
||||
width: 90%;
|
||||
/*border: 1px solid green;
|
||||
background-color: lightgrey;*/
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.0em;
|
||||
color: #900;
|
||||
}
|
||||
|
||||
paper-pdf-link {
|
||||
float: right;
|
||||
width: 10%;
|
||||
text-align: right;
|
||||
/*border: 1px solid green;
|
||||
background-color: lightgrey;*/
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.0em;
|
||||
color: #900;
|
||||
}
|
||||
|
||||
paper-abstract {
|
||||
float: left;
|
||||
width: 100%;
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
padding-left: 10px;
|
||||
padding-right: 10px;
|
||||
background-color: #EEFFEE;
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.2em;
|
||||
color: black;
|
||||
text-align: justify;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
paper-comments {
|
||||
float: left;
|
||||
width: 50%;
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.0em;
|
||||
color: black;
|
||||
word-break:break-all;
|
||||
}
|
||||
|
||||
paper-subjects {
|
||||
float: right;
|
||||
text-align: right;
|
||||
width: 50%;
|
||||
font-family:Arial,sans-serif;
|
||||
font-size: 1.0em;
|
||||
color: black;
|
||||
word-break:break-all;
|
||||
}
|
||||
|
||||
|
||||
a:link { text-decoration: none}
|
||||
a:active { text-decoration:blink}
|
||||
a:hover { text-decoration:underline}
|
||||
a:visited { text-decoration: none}
|
||||
a {
|
||||
color: black;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
27
config/subscriber_example.xml
Normal file
27
config/subscriber_example.xml
Normal file
@ -0,0 +1,27 @@
|
||||
<subscriber>
|
||||
<name>name1</name>
|
||||
<email>mail1@mail.com</email>
|
||||
<topics>
|
||||
<topic>cs.CV</topic>
|
||||
<topic>cs.LG</topic>
|
||||
<topic>stat.ML</topic>
|
||||
</topics>
|
||||
<keywords>
|
||||
<keyword>keyword1</keyword>
|
||||
<keyword>keyword2</keyword>
|
||||
</keywords>
|
||||
</subscriber>
|
||||
|
||||
<subscriber>
|
||||
<name>name2</name>
|
||||
<email>mail2@mail.com</email>
|
||||
<topics>
|
||||
<topic>cs.CV</topic>
|
||||
<topic>cs.LG</topic>
|
||||
<topic>stat.ML</topic>
|
||||
</topics>
|
||||
<keywords>
|
||||
<keyword>keyword1</keyword>
|
||||
<keyword>keyword2</keyword>
|
||||
</keywords>
|
||||
</subscriber>
|
||||
10
download_html.py
Normal file
10
download_html.py
Normal file
@ -0,0 +1,10 @@
|
||||
import requests
|
||||
from html.parser import HTMLParser
|
||||
|
||||
# r = requests.get('https://arxiv.org/list/cs.CV/recent')
|
||||
r = requests.get('https://arxiv.org/list/cs.CV/recent')
|
||||
# r = requests.get('http://xxx.itp.ac.cn/list/cs.CV/recent')
|
||||
# r = requests.get('https://arxiv.org/list/cs.CV/pastweek?skip=25&show=25')
|
||||
|
||||
# print(r.status_code)
|
||||
print(r.text)
|
||||
138
email_sender.py
Normal file
138
email_sender.py
Normal file
@ -0,0 +1,138 @@
|
||||
from lib.parser import dom_node, simple_parser
|
||||
from lib import parser
|
||||
from lib import utils
|
||||
import os
|
||||
import config
|
||||
|
||||
import smtplib
|
||||
import email
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.image import MIMEImage
|
||||
from email.mime.base import MIMEBase
|
||||
from email.mime.application import MIMEApplication
|
||||
from email.header import Header
|
||||
from email import generator
|
||||
def send_mail(reciver, title, content):
|
||||
|
||||
# with open('email.html', 'w', encoding="utf-8") as f:
|
||||
# f.write(content)
|
||||
# 20:24
|
||||
|
||||
username = config.username
|
||||
password = config.password
|
||||
replyto = config.replyto
|
||||
msg = MIMEMultipart('alternative')
|
||||
msg['Subject'] = Header(title)
|
||||
msg['From'] = '%s <%s>' % (Header(config.sender_name), username)
|
||||
msg['To'] = reciver
|
||||
msg['Reply-to'] = replyto
|
||||
msg['Message-id'] = email.utils.make_msgid()
|
||||
msg['Date'] = email.utils.formatdate()
|
||||
texthtml = MIMEText(content, _subtype='html', _charset='UTF-8')
|
||||
msg.attach(texthtml)
|
||||
|
||||
# with open('email.eml', 'w') as outfile:
|
||||
# gen = generator.Generator(outfile)
|
||||
# gen.flatten(msg)
|
||||
|
||||
try:
|
||||
client = smtplib.SMTP_SSL(config.smtp_ssl_addr)
|
||||
# client.connect('smtpdm-ap-southeast-1.aliyun.com', 80)
|
||||
client.set_debuglevel(0)
|
||||
client.login(username, password)
|
||||
client.sendmail(username, reciver, msg.as_string())
|
||||
|
||||
client.quit()
|
||||
print ('Email send to {0} success!'.format(reciver))
|
||||
return True
|
||||
except smtplib.SMTPConnectError as e:
|
||||
print ('Connection Error:', e.smtp_code, e.smtp_error)
|
||||
except smtplib.SMTPAuthenticationError as e:
|
||||
print ('Authentication Error:', e.smtp_code, e.smtp_error)
|
||||
except smtplib.SMTPSenderRefused as e:
|
||||
print ('Sender Refused:', e.smtp_code, e.smtp_error)
|
||||
except smtplib.SMTPRecipientsRefused as e:
|
||||
print ('SMTPRecipients Refused:', e.smtp_code, e.smtp_error)
|
||||
except smtplib.SMTPDataError as e:
|
||||
print ('Data Error:', e.smtp_code, e.smtp_error)
|
||||
except smtplib.SMTPException as e:
|
||||
print ('SMTPException:', e.message)
|
||||
except Exception as e:
|
||||
print ('Unknown error:', str(e))
|
||||
return True
|
||||
|
||||
class arxiv_emailer():
|
||||
def __init__(self, arxiv_bot, feeds_generator, session_file = './config/email_session.xml', debug=False):
|
||||
self.debug = debug
|
||||
self.email_info = dom_node()
|
||||
self.session_file = session_file
|
||||
self.sessions = None
|
||||
if self.session_file is not None:
|
||||
self.load_session()
|
||||
|
||||
self.bot = arxiv_bot
|
||||
self.feeds = feeds_generator
|
||||
|
||||
def send_daily_email(self):
|
||||
emails = self.feeds.generate_daily_emails()
|
||||
today = utils.str_day()
|
||||
for name in emails:
|
||||
email = emails[name]
|
||||
send = False
|
||||
if name not in self.sessions:
|
||||
print('New user found!')
|
||||
self.sessions[name] = {}
|
||||
self.sessions[name]['last-send'] = today
|
||||
send = True
|
||||
|
||||
if self.sessions[name]['last-send'] != today:
|
||||
send = True
|
||||
|
||||
if send:
|
||||
print('Sending email to user {0} [{1}]'.format(name, email['reciver']))
|
||||
print('reciver:', email['reciver'])
|
||||
print('title:', email['title'])
|
||||
print('content:', len(email['content']))
|
||||
success = False
|
||||
if not self.debug:
|
||||
success = send_mail(email['reciver'], email['title'], email['content'])
|
||||
if success:
|
||||
self.sessions[name]['last-send'] = today
|
||||
self.save_session()
|
||||
else:
|
||||
print('skipping user {0} since already sent!'.format(name))
|
||||
|
||||
def load_session(self, session_file=None):
|
||||
if session_file is None:
|
||||
session_file = self.session_file
|
||||
tree = None
|
||||
with open(session_file, 'r') as f:
|
||||
xml = f.read()
|
||||
xmlparser = simple_parser()
|
||||
xmlparser.feed(xml)
|
||||
tree = xmlparser.root
|
||||
sessions = parser.dom2dict(tree)
|
||||
if 'root' in sessions:
|
||||
sessions = sessions['root']
|
||||
else:
|
||||
sessions = {}
|
||||
self.sessions = sessions
|
||||
print(self.sessions)
|
||||
return sessions
|
||||
|
||||
def save_session(self, session_file=None):
|
||||
if session_file is None:
|
||||
session_file = self.session_file
|
||||
if session_file is None:
|
||||
return None
|
||||
xml = parser.dict2dom(self.sessions).to_string()
|
||||
with open(session_file, 'w') as f:
|
||||
f.write(xml)
|
||||
return xml
|
||||
|
||||
if __name__ == '__main__':
|
||||
emailer = arxiv_emailer(None, None, None)
|
||||
emailer.send_daily_email()
|
||||
print(emailer.load_session())
|
||||
print(emailer.save_session())
|
||||
103
feeds.py
Normal file
103
feeds.py
Normal file
@ -0,0 +1,103 @@
|
||||
from lib.parser import dom_node
|
||||
from lib import utils
|
||||
|
||||
|
||||
class feed_manager():
|
||||
def __init__(self, submgr, arxivbot, style='./config/style.css'):
|
||||
self.style_path = style
|
||||
self.style = ''
|
||||
self.bot = arxivbot
|
||||
self.submgr = submgr
|
||||
self.update_style()
|
||||
|
||||
def update_style(self, path = None):
|
||||
if path is None:
|
||||
path = self.style_path
|
||||
print('loading style from:', path)
|
||||
with open(path, 'r') as f:
|
||||
self.style = f.read()
|
||||
self.style += '\n'
|
||||
|
||||
def fetch_today_feed(self):
|
||||
self.today_feed = self.bot.get_today_feed()
|
||||
|
||||
def filter_papers_for_user(self, subscriber):
|
||||
strong_papers = []
|
||||
weak_papers = []
|
||||
keywords = subscriber['keywords']
|
||||
papers = []
|
||||
for topic in subscriber['topics']:
|
||||
if topic in self.today_feed:
|
||||
papers += self.today_feed[topic]
|
||||
else:
|
||||
print('Warning: topic {0} is subscribed but not downloaded!'.format(topic))
|
||||
known_ids = []
|
||||
unique_papers = []
|
||||
for paper in papers:
|
||||
paper_id = paper.arxiv_id
|
||||
if paper_id not in known_ids:
|
||||
unique_papers.append(paper)
|
||||
known_ids.append(paper_id)
|
||||
print('removing {0} repeated papers.'.format(len(papers) - len(unique_papers)))
|
||||
papers = unique_papers
|
||||
for paper in papers:
|
||||
strong = False
|
||||
weak = False
|
||||
for keyword in keywords:
|
||||
if paper.info['title'].lower().find(keyword) != -1:
|
||||
strong = True
|
||||
break;
|
||||
elif paper.info['abstract'].lower().find(keyword) != -1:
|
||||
weak = True
|
||||
if strong:
|
||||
strong_papers.append(paper)
|
||||
elif weak:
|
||||
weak_papers.append(paper)
|
||||
return strong_papers, weak_papers
|
||||
|
||||
def generate_group_feed(self, paper_groups):
|
||||
group_html = ''
|
||||
for key in paper_groups:
|
||||
header = dom_node('paper-group')
|
||||
header.data = key
|
||||
group_html += header.to_string() + '\n'
|
||||
for paper in paper_groups[key]:
|
||||
group_html += paper.to_html() + '\n'
|
||||
return group_html
|
||||
|
||||
def generate_daily_feed_by_matched_paper(self, strong_interested, weak_interested):
|
||||
feeds = {}
|
||||
if len(strong_interested) > 0:
|
||||
feeds['Strong Interested Paper'] = strong_interested
|
||||
if len(weak_interested) > 0:
|
||||
feeds['Weak Interested Paper'] = weak_interested
|
||||
xml_feed = self.generate_group_feed(feeds)
|
||||
return xml_feed
|
||||
|
||||
def generate_daily_email_by_matched_paper(self, strong_interested, weak_interested):
|
||||
xml_feed = self.generate_daily_feed_by_matched_paper(strong_interested, weak_interested)
|
||||
email_content = ''
|
||||
if xml_feed != '':
|
||||
email_content = self.style + xml_feed
|
||||
return email_content
|
||||
|
||||
def generate_daily_emails(self):
|
||||
self.fetch_today_feed()
|
||||
emails = {}
|
||||
# email is a dict, containing title, reciver and content.
|
||||
today = utils.str_day()
|
||||
for name in self.submgr.subscribers:
|
||||
subscriber = self.submgr.subscribers[name]
|
||||
strong, weak = self.filter_papers_for_user(subscriber)
|
||||
content = self.generate_daily_email_by_matched_paper(strong, weak)
|
||||
reciver = subscriber['email']
|
||||
if content == '':
|
||||
print('Skipping user {0} [{1}] since no paper matched.'.format(name, reciver))
|
||||
continue;
|
||||
title = "Your Interested Paper On Arxiv Today ({0})".format(today)
|
||||
email = {}
|
||||
email['reciver'] = reciver
|
||||
email['title'] = title
|
||||
email['content'] = content
|
||||
emails[name] = email
|
||||
return emails
|
||||
304
lib/console.py
Normal file
304
lib/console.py
Normal file
@ -0,0 +1,304 @@
|
||||
from . import utils
|
||||
import os
|
||||
import traceback
|
||||
|
||||
class console():
|
||||
def __init__(self, name='base'):
|
||||
self.name = name
|
||||
self.hint = '$ '
|
||||
self.exit_cmd = ['exit', 'quit', 'bye']
|
||||
self.exit_info = 'Bye~'
|
||||
self.commands = {}
|
||||
self.alias = {}
|
||||
self.warn_level = 4
|
||||
self.exit_flag = False
|
||||
self.debug = True
|
||||
self.platform = utils.detect_platform()
|
||||
self.is_child = False
|
||||
self.father = None
|
||||
|
||||
self.regist_internal_command()
|
||||
|
||||
def get_hint(self):
|
||||
if self.platform == 'Linux':
|
||||
hint = '\033[0;33m({0})\033[0;31m{1}\033[0m'.format(self.name, self.hint)
|
||||
else:
|
||||
hint = '({0}){1}'.format(self.name, self.hint)
|
||||
return hint
|
||||
|
||||
def regist_internal_command(self):
|
||||
self.regist(
|
||||
'help',
|
||||
action=self.command_help,
|
||||
alias=['h'],
|
||||
help_info='display this help info.',
|
||||
kind='sys'
|
||||
)
|
||||
self.regist(
|
||||
'exit',
|
||||
action=self.command_exit_console,
|
||||
alias=['quit','bye'],
|
||||
help_info='exit current console.',
|
||||
kind='sys'
|
||||
)
|
||||
self.regist(
|
||||
'cls',
|
||||
action=self.command_clear_screen,
|
||||
alias=['clear', 'clc'],
|
||||
help_info='clear screen.',
|
||||
kind='sys'
|
||||
)
|
||||
self.regist(
|
||||
'alias',
|
||||
action=self.command_alias,
|
||||
help_info='display alias info or create new alias.',
|
||||
kind='sys'
|
||||
)
|
||||
self.regist(
|
||||
'os',
|
||||
action=self.command_os,
|
||||
help_info='run a system command.',
|
||||
kind='sys'
|
||||
)
|
||||
|
||||
|
||||
def translate_command(self, command):
|
||||
while command in self.alias and command not in self.commands:
|
||||
command = self.alias[command]
|
||||
return command
|
||||
|
||||
def find_equal_command(self, command, ret_type = str, ignored = []):
|
||||
finished = []
|
||||
new = []
|
||||
|
||||
cmds = [command]
|
||||
while len(finished) != len(cmds):
|
||||
# find child
|
||||
if command in self.alias:
|
||||
if self.alias[command] not in cmds:
|
||||
cmds.append(self.alias[command])
|
||||
# find fathers
|
||||
for al in self.alias:
|
||||
if self.alias[al] == command:
|
||||
if al not in cmds:
|
||||
cmds.append(al)
|
||||
# found finished.
|
||||
finished.append(command)
|
||||
for cmd in cmds:
|
||||
if cmd not in finished:
|
||||
command = cmd
|
||||
|
||||
|
||||
if ret_type is str:
|
||||
finished = utils.list2csv(finished)
|
||||
return finished
|
||||
|
||||
|
||||
|
||||
def get_alias(self, command, ret_type=str):
|
||||
alias = []
|
||||
for al in self.alias:
|
||||
if self.alias[al] == command:
|
||||
alias.append(al)
|
||||
|
||||
if ret_type is str:
|
||||
alias = utils.list2csv(alias)
|
||||
|
||||
return alias
|
||||
|
||||
def command_exist(self, command):
|
||||
if command in self.commands or command in self.alias:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def add_alias(self, command, alias):
|
||||
if self.command_exist(alias):
|
||||
if warn_level >= 3:
|
||||
print('Alias {0} will not be added since already used'.format(al))
|
||||
else:
|
||||
self.alias[alias] = command
|
||||
|
||||
# kind: standard or shared
|
||||
# standard: help info will be displayed
|
||||
# shared: help info will not be displayed in sub command.
|
||||
def regist(self, command, action, alias=None, help_info='no help provided.', kind='standard'):
|
||||
if type(action) == console:
|
||||
action.is_child = True
|
||||
action.father = self
|
||||
exist = self.command_exist(command)
|
||||
if exist:
|
||||
if self.warn_level >=3:
|
||||
print('Command {0} will not be added sinece already exist.'.format(command))
|
||||
return
|
||||
|
||||
if type(alias) is list:
|
||||
for al in alias:
|
||||
self.add_alias(command, al)
|
||||
elif type(alias) is str:
|
||||
self.add_alias(command, alias)
|
||||
elif alias is None:
|
||||
pass
|
||||
else:
|
||||
if self.warn_level > 3:
|
||||
print('Unknown alias type, no alias will be added.')
|
||||
self.commands[command] = {}
|
||||
self.commands[command]['action'] = action
|
||||
self.commands[command]['help'] = help_info
|
||||
self.commands[command]['kind'] = kind
|
||||
|
||||
def handle_command(self, command, args):
|
||||
if command in self.commands:
|
||||
act = self.commands[command]['action']
|
||||
try:
|
||||
act(args)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
except:
|
||||
print('Exception occured while processing command \"{0} {1}\".'.format(command, args))
|
||||
print('More information are shown below.\n', traceback.format_exc())
|
||||
else:
|
||||
print('Unknown command \"{0}\"'.format(command))
|
||||
|
||||
# seperate command and its args.
|
||||
def parse_command(self, string):
|
||||
string += ' '
|
||||
length = len(string)
|
||||
command_end = 0
|
||||
parse_start = False
|
||||
for i in range(length):
|
||||
blank = utils.is_blank(string[i])
|
||||
if not blank:
|
||||
parse_start=True
|
||||
if parse_start and blank:
|
||||
command_end = i
|
||||
break
|
||||
|
||||
command = string[:command_end]
|
||||
command = utils.remove_blank_in_endpoint(command)
|
||||
args = utils.remove_blank_in_endpoint(string[command_end:])
|
||||
return command, args
|
||||
|
||||
def parse(self, string):
|
||||
command, args = self.parse_command(string)
|
||||
exitsted_commands = []
|
||||
while command in self.alias:
|
||||
if command not in exitsted_commands:
|
||||
exitsted_commands.append(command)
|
||||
command = self.alias[command]
|
||||
string = command + ' ' + args
|
||||
command, args = self.parse_command(string)
|
||||
else:
|
||||
break
|
||||
|
||||
return command, args
|
||||
|
||||
|
||||
def show_help_info(self, command, prefix, indent, depth=0):
|
||||
command = self.translate_command(command)
|
||||
action = self.commands[command]['action']
|
||||
kind = self.commands[command]['kind']
|
||||
if kind == 'sys' and depth > 0:
|
||||
return
|
||||
alias = self.get_alias(command, ret_type=str)
|
||||
if alias != '':
|
||||
print('{0}{1}({2}):'.format(prefix, command, alias))
|
||||
else:
|
||||
print('{0}{1}:'.format(prefix, command))
|
||||
print('{0}{1}{2}'.format(prefix, indent, self.commands[command]['help']))
|
||||
if type(action) == console:
|
||||
action.command_help('', prefix=prefix+indent, indent=indent, depth=depth+1)
|
||||
|
||||
def debug_log(self, command, args):
|
||||
if self.debug:
|
||||
print('command:[{0}] args:[{1}]'.format(command, args))
|
||||
|
||||
def command_exit_console(self, args):
|
||||
if not self.is_child:
|
||||
print(self.exit_info)
|
||||
self.exit_flag = True
|
||||
|
||||
def command_clear_screen(self, args):
|
||||
if self.platform == 'Windows':
|
||||
os.system('cls')
|
||||
elif self.platform == 'Linux':
|
||||
os.system('clear')
|
||||
return False
|
||||
|
||||
def command_help(self, args, prefix = '', indent=' ', depth=0):
|
||||
command, args = self.parse_command(args)
|
||||
if command is not "":
|
||||
if self.command_exist(command):
|
||||
self.show_help_info(command, prefix, indent, depth)
|
||||
else:
|
||||
print('Unknown command \"{0}\"'.format(command))
|
||||
else:
|
||||
for command in self.commands:
|
||||
self.show_help_info(command, prefix, indent, depth)
|
||||
|
||||
def command_alias(self, args):
|
||||
alias_parse = args.split('=')
|
||||
if len(alias_parse) == 2:
|
||||
alias = utils.remove_blank_in_endpoint(alias_parse[0])
|
||||
command = utils.remove_blank_in_endpoint(alias_parse[1])
|
||||
if command is not '':
|
||||
self.alias[alias]=command
|
||||
else:
|
||||
del self.alias[alias]
|
||||
elif args == '':
|
||||
for alias in self.alias:
|
||||
print('{0}={1}'.format(alias, self.alias[alias]))
|
||||
elif len(alias_parse) == 1:
|
||||
if args in self.alias:
|
||||
print('{0}={1}'.format(args, self.alias[args]))
|
||||
equal_alias = self.find_equal_command(args)
|
||||
if equal_alias != '':
|
||||
print('Hint: {0} are all equivalent.'.format(equal_alias))
|
||||
elif args in self.commands:
|
||||
als = self.get_alias(args, ret_type=str)
|
||||
if als == '':
|
||||
print('command {0} has no alias.'.format(args))
|
||||
else:
|
||||
print('command {0} is aliased as {1}'.format(args, als))
|
||||
equal_alias = self.find_equal_command(args)
|
||||
if equal_alias != '' and equal_alias != args:
|
||||
print('Hint: {0} are all equivalent.'.format(equal_alias))
|
||||
else:
|
||||
print('No alias \"{0}\" found.'.format(args))
|
||||
else:
|
||||
print('Syntax error, command not understood.')
|
||||
|
||||
def command_os(self, args):
|
||||
if args == '':
|
||||
print('please specify os command')
|
||||
else:
|
||||
os.system(args)
|
||||
|
||||
def execute(self, string):
|
||||
command, args = self.parse(string)
|
||||
if command is not "":
|
||||
self.handle_command(command, args)
|
||||
|
||||
def __call__(self, args):
|
||||
if args != '':
|
||||
self.execute(args)
|
||||
else:
|
||||
self.exit_flag=False
|
||||
self.interactive()
|
||||
|
||||
def interactive(self):
|
||||
while not self.exit_flag:
|
||||
try:
|
||||
input_str = input(self.get_hint())
|
||||
self.execute(input_str)
|
||||
except(KeyboardInterrupt):
|
||||
print('')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
con = console()
|
||||
con_sub = console()
|
||||
con_sub_sub = console()
|
||||
con_sub.regist('test_subsubcommand', con_sub_sub, alias='tss', help_info='A sub command.')
|
||||
con.regist('test_subcommand', con_sub, alias='ts', help_info='A sub command.')
|
||||
con.interactive()
|
||||
127
lib/parallel.py
Normal file
127
lib/parallel.py
Normal file
@ -0,0 +1,127 @@
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
|
||||
class Job():
|
||||
def __init__(self, func, args=[], kwargs={}, name=None):
|
||||
if name == None:
|
||||
name = 'job'
|
||||
self.id = None
|
||||
self.name = name
|
||||
self.func = func
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.results = None
|
||||
|
||||
def run(self):
|
||||
self.results = self.func(*self.args, **self.kwargs)
|
||||
|
||||
def set_name(self, name):
|
||||
self.name = name
|
||||
|
||||
def set_id(self, jid):
|
||||
self.id = jid
|
||||
|
||||
def __call__(self):
|
||||
self.run()
|
||||
|
||||
class Worker(threading.Thread):
|
||||
def __init__(self, work_queue, finished_queue):
|
||||
super(Worker, self).__init__()
|
||||
self.queue = work_queue
|
||||
self.finished = finished_queue
|
||||
self.terminate = False
|
||||
self.daemon=True
|
||||
|
||||
def stop(self):
|
||||
self.terminate = True
|
||||
|
||||
def run(self):
|
||||
while not self.terminate:
|
||||
try:
|
||||
task = self.queue.get(timeout=1)
|
||||
task.run()
|
||||
self.queue.task_done()
|
||||
self.finished.put(task)
|
||||
except queue.Empty:
|
||||
pass
|
||||
except KeyboardInterrupt:
|
||||
print("you stop the threading")
|
||||
|
||||
class ParallelHost():
|
||||
def __init__(self, num_threads=8):
|
||||
self.num_threads = num_threads
|
||||
self.workers = []
|
||||
self.tasks = queue.Queue()
|
||||
self.results = queue.Queue()
|
||||
self.rets = {}
|
||||
self.id = 0
|
||||
for i in range(self.num_threads):
|
||||
worker = Worker(self.tasks, self.results)
|
||||
self.workers.append(worker)
|
||||
for worker in self.workers:
|
||||
worker.start()
|
||||
|
||||
def __del__(self):
|
||||
self.stop('kill')
|
||||
|
||||
# soft stop: wait until all job done
|
||||
# hard stop: stop even with unfinished job
|
||||
# kill stop: whatever the thread is doing, exit.
|
||||
def stop(self, mode='soft'):
|
||||
print('Trying to stop.')
|
||||
if mode == 'soft':
|
||||
self.tasks.join()
|
||||
print('All job finished.')
|
||||
for worker in self.workers:
|
||||
worker.stop()
|
||||
if mode == 'kill':
|
||||
worker.join(0.01)
|
||||
|
||||
def commit(self, job):
|
||||
self.id += 1
|
||||
job.set_id(self.id)
|
||||
self.tasks.put(job)
|
||||
return self.id
|
||||
|
||||
def add_job(self, func, args=[], kwargs={}, name=None):
|
||||
job = Job(func, args, kwargs, name)
|
||||
return self.commit(job)
|
||||
|
||||
def collect_all(self):
|
||||
while not self.results.empty():
|
||||
task = self.results.get()
|
||||
jid = task.id
|
||||
self.rets[jid] = task.results
|
||||
|
||||
def get_result(self, jid, block=False):
|
||||
if jid in self.rets:
|
||||
ret = self.rets[jid]
|
||||
del self.rets[jid]
|
||||
return ret
|
||||
while True:
|
||||
if self.results.empty() and not block:
|
||||
break
|
||||
task = self.results.get()
|
||||
if task.jid == jid:
|
||||
return task.results
|
||||
else:
|
||||
self.rets[task.jid] = task.results
|
||||
|
||||
def clear_results(self):
|
||||
while not self.results.empty():
|
||||
self.results.get()
|
||||
self.rets = {}
|
||||
|
||||
if __name__ == '__main__':
|
||||
host = ParallelHost()
|
||||
|
||||
def loop_print(info, num):
|
||||
for i in range(num):
|
||||
print(info + ':' + str(i))
|
||||
time.sleep(1)
|
||||
|
||||
for i in range(10):
|
||||
host.add_job(loop_print, ["loop_print_{0}".format(i), 5])
|
||||
|
||||
host.terminate('kill')
|
||||
151
lib/parser.py
Normal file
151
lib/parser.py
Normal file
@ -0,0 +1,151 @@
|
||||
from html.parser import HTMLParser
|
||||
from . import utils
|
||||
|
||||
def dict_to_arrtibute_string(attributes):
|
||||
string = ''
|
||||
for key in attributes:
|
||||
string += key + '=\"{0}\";'.format(str(attributes[key]))
|
||||
return string
|
||||
|
||||
def attribute_string_to_dict(attrs):
|
||||
attr_dict = {}
|
||||
for attr in attrs:
|
||||
attr_dict[attr[0]] = attr[1]
|
||||
return attr_dict
|
||||
|
||||
|
||||
class dom_node():
|
||||
def __init__(self, name = None, attributes = {}):
|
||||
if name is not None:
|
||||
self.name = name
|
||||
else:
|
||||
self.name = 'Node'
|
||||
|
||||
self.attributes = attributes
|
||||
self.childs = []
|
||||
self.data = None
|
||||
self.father = None
|
||||
|
||||
def add_child(self, child):
|
||||
if child is not None:
|
||||
child.father = self
|
||||
self.childs.append(child)
|
||||
|
||||
def to_string(self, prefix='', indent=' '):
|
||||
|
||||
string = prefix + '<' + self.name
|
||||
if self.attributes:
|
||||
string += ' ' + dict_to_arrtibute_string(self.attributes)
|
||||
string += '>\n'
|
||||
|
||||
for child in self.childs:
|
||||
string += child.to_string(prefix=prefix+indent, indent=indent)
|
||||
|
||||
if self.data is not None:
|
||||
string += prefix + indent + self.data + '\n'
|
||||
|
||||
string += prefix + '</{0}>\n'.format(self.name)
|
||||
|
||||
return string
|
||||
|
||||
|
||||
def has_child(self, name):
|
||||
has = False
|
||||
for child in self.childs:
|
||||
if child.name == name:
|
||||
has = True
|
||||
break;
|
||||
return has
|
||||
|
||||
def search(self, name):
|
||||
founded_node = []
|
||||
if type(name) is list:
|
||||
if self.name in name:
|
||||
founded_node.append(self)
|
||||
else:
|
||||
if self.name == name:
|
||||
founded_node.append(self)
|
||||
for child in self.childs:
|
||||
search_result = child.search(name)
|
||||
founded_node += search_result
|
||||
return founded_node
|
||||
|
||||
def dict2dom(d, root_name='root'):
|
||||
node = dom_node(root_name)
|
||||
for key in d:
|
||||
elem = d[key]
|
||||
child_node = dom_node(name=str(key))
|
||||
if type(elem) is dict:
|
||||
child_node = dict2dom(elem, root_name=str(key))
|
||||
elif type(elem) is list:
|
||||
for subelem in elem:
|
||||
if type(subelem) is dict:
|
||||
sub_node = dict2dom(subelem, root_name='li')
|
||||
child_node.add_child(sub_node)
|
||||
else:
|
||||
sub_node = dom_node('li')
|
||||
sub_node.data = str(subelem)
|
||||
child_node.add_child(sub_node)
|
||||
else:
|
||||
child_node.data = str(elem)
|
||||
node.add_child(child_node)
|
||||
return node
|
||||
|
||||
# if a dom node has data only, then it's {'name':'data'}
|
||||
# if a dom node has childs, then it's {'name':{}}
|
||||
# if a dom node has data as well as childs, data will be ignored.
|
||||
# if a dom has multi child with same name, it will be stored as list.
|
||||
def dom2dict(dom, replace_li = True):
|
||||
dictionary = {}
|
||||
for child in dom.childs:
|
||||
name = child.name
|
||||
content = None
|
||||
if len(child.childs) != 0:
|
||||
content = dom2dict(child, replace_li)
|
||||
else:
|
||||
content = child.data
|
||||
if content is None:
|
||||
content = ''
|
||||
content = utils.clean_text(content)
|
||||
if name in dictionary:
|
||||
if type(dictionary[name]) is not list:
|
||||
previous = dictionary[name]
|
||||
dictionary[name] = [previous, content]
|
||||
else:
|
||||
dictionary[name].append(content)
|
||||
else:
|
||||
dictionary[name] = content
|
||||
|
||||
if replace_li:
|
||||
for key in dictionary:
|
||||
item = dictionary[key]
|
||||
if type(item) is dict:
|
||||
li = None
|
||||
if len(item.keys()) == 1:
|
||||
for subkey in item:
|
||||
if subkey == 'li':
|
||||
li = item[subkey]
|
||||
if li is not None:
|
||||
dictionary[key] = li
|
||||
return dictionary
|
||||
|
||||
class simple_parser(HTMLParser):
|
||||
def __init__(self):
|
||||
super(simple_parser, self).__init__()
|
||||
self.root = dom_node('root')
|
||||
self.current_node = self.root
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs_dict = attribute_string_to_dict(attrs)
|
||||
this_node = dom_node(tag, attrs_dict)
|
||||
self.current_node.add_child(this_node)
|
||||
self.current_node = this_node
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
self.current_node = self.current_node.father
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.current_node.data is None:
|
||||
self.current_node.data = data
|
||||
else:
|
||||
self.current_node.data += data
|
||||
19
lib/screen.py
Normal file
19
lib/screen.py
Normal file
@ -0,0 +1,19 @@
|
||||
import sys
|
||||
|
||||
class VirtualScreen():
|
||||
def __init__(self, max_history=1000):
|
||||
self.max_history = max_history
|
||||
self.contents = []
|
||||
|
||||
def write(self, message):
|
||||
self.contents.append(message)
|
||||
|
||||
def last(self, line=10, output=sys.stdout):
|
||||
num_lines = len(self.contents)
|
||||
start_line = num_lines - line
|
||||
if start_line < 0:
|
||||
start_line = 0
|
||||
display = self.contents[start_line:]
|
||||
for line in display:
|
||||
output.write(line)
|
||||
output.write('\n')
|
||||
244
lib/service.py
Normal file
244
lib/service.py
Normal file
@ -0,0 +1,244 @@
|
||||
import time
|
||||
import sys
|
||||
import shlex
|
||||
import argparse
|
||||
|
||||
from croniter import croniter
|
||||
from . import utils
|
||||
from . import parallel
|
||||
from . import console
|
||||
from . import screen
|
||||
from . import utils
|
||||
|
||||
class service():
|
||||
def __init__(self, action, args=[], kwargs={}, cron='* * * * *', managed_output=False, name='service'):
|
||||
self.name = name
|
||||
self.action = action
|
||||
self.managed_output = managed_output
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.output = sys.stdout
|
||||
self.last_result = None
|
||||
self.cronexpr = cron
|
||||
self.croniter = croniter(self.cronexpr, time.time())
|
||||
self.next_time = self.croniter.get_next()
|
||||
|
||||
def run(self, daemon=None, dry=False):
|
||||
if not dry:
|
||||
self.next_time = self.croniter.get_next()
|
||||
|
||||
new_args = []
|
||||
if self.managed_output:
|
||||
new_args = [self.output, *self.args]
|
||||
else:
|
||||
new_args = self.args
|
||||
if daemon is None:
|
||||
self.last_result = self.action(*new_args, **self.kwargs)
|
||||
else:
|
||||
daemon.add_job(self.action, new_args, self.kwargs, self.name)
|
||||
|
||||
class ServiceManager():
|
||||
def __init__(self, debug=False, output=sys.stdout):
|
||||
self.debug = debug
|
||||
self.services = {}
|
||||
self.deleted_services = {}
|
||||
self.protected_service = []
|
||||
self.daemon = parallel.ParallelHost()
|
||||
self.sid = 0
|
||||
self.terminate = False
|
||||
self.output = output
|
||||
|
||||
self.set_refresh_time()
|
||||
|
||||
def stop(self):
|
||||
self.daemon.stop()
|
||||
self.terminate = True
|
||||
|
||||
def __del__(self):
|
||||
self.stop()
|
||||
|
||||
def log(self, *args, end='\n'):
|
||||
self.output.write('[{0}]'.format(utils.str_time()))
|
||||
for arg in args:
|
||||
arg = str(arg)
|
||||
self.output.write(arg)
|
||||
self.output.write(end)
|
||||
|
||||
def add(self, service, protected=False):
|
||||
self.sid += 1
|
||||
service.output = self.output
|
||||
self.services[self.sid] = service
|
||||
if protected:
|
||||
self.protected_service.append(self.sid)
|
||||
return self.sid
|
||||
|
||||
def delete(self, sid):
|
||||
if sid in self.protected_service:
|
||||
self.log('Can not delete protected service.')
|
||||
return
|
||||
if sid in self.services:
|
||||
self.deleted_services[sid] = self.services[sid]
|
||||
del self.services[sid]
|
||||
else:
|
||||
self.log('The sid [{0}] do not exist!'.format(sid))
|
||||
|
||||
def recover(self, sid):
|
||||
if sid in self.deleted_services:
|
||||
self.services[sid] = self.deleted_services[sid]
|
||||
del self.deleted_services[sid]
|
||||
else:
|
||||
self.log('The sid [{0}] is not found recycle bin.'.format(sid))
|
||||
|
||||
def set_refresh_time(self, refresh_cron='* * * * *'):
|
||||
def refresh():
|
||||
pass
|
||||
refresh_service = service(refresh, cron=refresh_cron, name='refresh')
|
||||
self.add(refresh_service, protected = True)
|
||||
|
||||
def get_next(self):
|
||||
next_sid = -1
|
||||
next_time = -1
|
||||
for sid in self.services:
|
||||
service = self.services[sid]
|
||||
if service.next_time < next_time or next_sid < 0:
|
||||
next_sid = sid
|
||||
next_time = service.next_time
|
||||
return next_sid, next_time
|
||||
|
||||
def loop(self):
|
||||
while not self.terminate:
|
||||
next_sid, next_time = self.get_next()
|
||||
service = self.services[next_sid]
|
||||
sleep_time = next_time - time.time()
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
self.log('Running service {0} (SID={1})'.format(service.name, next_sid))
|
||||
if next_sid in self.services:
|
||||
service.run(self.daemon)
|
||||
else:
|
||||
self.log('the sheduled service wiil not run since it is canceled.')
|
||||
|
||||
|
||||
# mode: background: return immidietly
|
||||
# foreground: stuck here.
|
||||
def start(self, mode='background'):
|
||||
if mode == 'background':
|
||||
self.daemon.add_job(self.loop, name='service main loop')
|
||||
else:
|
||||
self.loop()
|
||||
|
||||
def get_service_console(manager, name='service'):
|
||||
|
||||
con = console.console(name)
|
||||
|
||||
def command_show(args):
|
||||
print('Active services:')
|
||||
for sid in manager.services:
|
||||
print('SID: {0} | Name: {1}'.format(sid, manager.services[sid].name))
|
||||
print('Deleted services:')
|
||||
for sid in manager.deleted_services:
|
||||
print('SID: {0} | Name: {1}'.format(sid, manager.deleted_services[sid].name))
|
||||
|
||||
def command_add(args):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('cron', type=str, help='A cron expr')
|
||||
parser.add_argument('task', type=str, help='task to run, should be a valid command')
|
||||
parser.add_argument('--name', '-n', type=str, default='command service', help='name of the task')
|
||||
args = shlex.split(args)
|
||||
args = parser.parse_args(args)
|
||||
cron = args.cron
|
||||
if not croniter.is_valid(cron):
|
||||
print('Invalid cron expression.')
|
||||
task = args.task
|
||||
name = args.name
|
||||
service_to_add = service(con.execute, args=[task], cron=cron, name=name)
|
||||
manager.add(service_to_add)
|
||||
|
||||
def command_delete(args):
|
||||
sid = None
|
||||
if args.isdigit():
|
||||
if int(args) in manager.services:
|
||||
sid = int(args)
|
||||
if sid is not None:
|
||||
manager.delete(sid)
|
||||
else:
|
||||
print('command arugment \"{0}\" is not understood.'.format(args))
|
||||
|
||||
def command_recover(args):
|
||||
sid = None
|
||||
if args.isdigit():
|
||||
if int(args) in manager.deleted_services:
|
||||
sid = int(args)
|
||||
if sid is not None:
|
||||
manager.recover(sid)
|
||||
else:
|
||||
print('command arugment \"{0}\" is not understood.'.format(args))
|
||||
|
||||
def command_run(args):
|
||||
sid = None
|
||||
if args.isdigit():
|
||||
if int(args) in manager.services:
|
||||
sid = int(args)
|
||||
if sid is not None:
|
||||
manager.services[sid].run(dry=True)
|
||||
else:
|
||||
print('command arugment \"{0}\" is not understood.'.format(args))
|
||||
|
||||
def command_info(args):
|
||||
line = None
|
||||
if args != '':
|
||||
if args.isdigit():
|
||||
line = int(args)
|
||||
if line is None:
|
||||
line = 10
|
||||
manager.output.last(line)
|
||||
|
||||
def command_next(args):
|
||||
next_sid, next_time = manager.get_next()
|
||||
info = ''
|
||||
indent = ' '
|
||||
info += 'Next Job: {0}'.format(manager.services[next_sid].name)
|
||||
info += '\n{0}SID: {1}'.format(indent, next_sid)
|
||||
info += '\n{0}Scheduled Running Time: {1}'.format(indent, utils.time2str(next_time))
|
||||
info += '\n{0}Remeaning Time: {1}s'.format(indent, utils.float2str(next_time-time.time()))
|
||||
print(info)
|
||||
|
||||
con.regist('show', command_show, help_info='Show all services.', alias=['ls'])
|
||||
con.regist('run', command_run, help_info='Run a service.')
|
||||
con.regist('info', command_info, help_info='Display service output log.')
|
||||
con.regist('next', command_next, help_info='Next job to run.')
|
||||
con.regist('add', command_add, help_info='Register a command as service.')
|
||||
con.regist('delete', command_delete, help_info='Delete a service', alias=['del'])
|
||||
con.regist('recover', command_recover, help_info='Recover a service.')
|
||||
return con
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def func1(output):
|
||||
output.write('func1')
|
||||
|
||||
def func2(output):
|
||||
output.write('func2')
|
||||
|
||||
def add(a, b):
|
||||
print('{0} + {1} = {2}'.format(a, b, a+b))
|
||||
|
||||
def command_add(args):
|
||||
numbers = args.split(' ')
|
||||
a = float(numbers[0])
|
||||
b = float(numbers[1])
|
||||
add(a, b)
|
||||
|
||||
log_screen = screen.VirtualScreen()
|
||||
manager = ServiceManager(output=log_screen)
|
||||
test1 = service(func1, cron='* * * * *', name='test1', managed_output=True)
|
||||
test2 = service(func2, cron='* * * * *', name='test2', managed_output=True)
|
||||
manager.add(test1)
|
||||
manager.add(test2)
|
||||
manager.start('background')
|
||||
|
||||
con = get_service_console(manager)
|
||||
master = console.console()
|
||||
master.regist('service', con, help_info='service console')
|
||||
master.regist('add', command_add, help_info='Add two numbers.')
|
||||
master.interactive()
|
||||
15
lib/try.py
Normal file
15
lib/try.py
Normal file
@ -0,0 +1,15 @@
|
||||
def func(a, b, c, time=0, work=1):
|
||||
print('a:{0} b:{1} c:{2}'.format(a, b, c))
|
||||
print('time:{0} work:{1}'.format(time, work))
|
||||
|
||||
def funcwrap(func, kargs, kkargs):
|
||||
func(*kargs, **kkargs)
|
||||
|
||||
|
||||
kargs = [1, 2, 3]
|
||||
kkargs = {
|
||||
"time":1234,
|
||||
"work":1232
|
||||
}
|
||||
|
||||
funcwrap(func, kargs, kkargs)
|
||||
139
lib/utils.py
Normal file
139
lib/utils.py
Normal file
@ -0,0 +1,139 @@
|
||||
import pickle
|
||||
import time
|
||||
import os
|
||||
import re
|
||||
import platform
|
||||
|
||||
def detect_platform():
|
||||
p = 'Unknown'
|
||||
if platform.platform().find('Windows') != -1:
|
||||
p = 'Windows'
|
||||
elif platform.platform().find('Linux') != -1:
|
||||
p = 'Linux'
|
||||
return p
|
||||
|
||||
def ensure_dir_exist(directory, show_info = True):
|
||||
exist = os.path.isdir(directory)
|
||||
if not exist:
|
||||
print('directory', directory, ' not found, creating...')
|
||||
os.mkdir(directory)
|
||||
|
||||
def validateTitle(title):
|
||||
rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
|
||||
new_title = re.sub(rstr, " ", title) # 替换为空格
|
||||
return new_title
|
||||
|
||||
def list2csv(l):
|
||||
csv = ''
|
||||
for item in l:
|
||||
csv += str(item) + ','
|
||||
csv = csv[:-1]
|
||||
return csv
|
||||
|
||||
def clean_text(string):
|
||||
if string is None:
|
||||
return ''
|
||||
while '\n' in string:
|
||||
string = string.replace('\n', ' ')
|
||||
splits = clean_split(string)
|
||||
string = ''
|
||||
for split in splits:
|
||||
string += split + ' '
|
||||
string = string[:-1]
|
||||
return string
|
||||
|
||||
def clean_split(string, delimiter=' '):
|
||||
sub_strs = string.split(delimiter)
|
||||
splits = []
|
||||
for sub_str in sub_strs:
|
||||
if sub_str is not '':
|
||||
splits.append(sub_str)
|
||||
return splits
|
||||
|
||||
def remove_blank_in_endpoint(string):
|
||||
length = len(string)
|
||||
|
||||
first_index = 0
|
||||
for i in range(length):
|
||||
if is_blank(string[first_index]):
|
||||
first_index += 1
|
||||
else:
|
||||
break
|
||||
|
||||
last_index = length - 1
|
||||
for i in range(length):
|
||||
if is_blank(string[last_index]):
|
||||
last_index -= 1
|
||||
else:
|
||||
break
|
||||
last_index += 1
|
||||
return string[first_index:last_index]
|
||||
|
||||
def is_blank(ch):
|
||||
blank_ch = [' ', '\t', '\n']
|
||||
if ch in blank_ch:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def dict_to_arrtibute_string(attributes):
|
||||
string = ''
|
||||
for key in attributes:
|
||||
string += key + '=\"{0}\";'.format(str(attributes[key]))
|
||||
return string
|
||||
|
||||
def attribute_string_to_dict(attrs):
|
||||
attr_dict = {}
|
||||
for attr in attrs:
|
||||
attr_dict[attr[0]] = attr[1]
|
||||
return attr_dict
|
||||
|
||||
def save_python_object(obj, save_path):
|
||||
with open(save_path, 'wb') as file:
|
||||
pickle.dump(obj, file)
|
||||
|
||||
def load_python_object(path):
|
||||
with open(path, 'rb') as file:
|
||||
return pickle.load(file)
|
||||
|
||||
def delete_n(string):
|
||||
while '\n' in string:
|
||||
string = string.replace('\n', ' ')
|
||||
return string
|
||||
|
||||
def remove_additional_blank(string):
|
||||
words = string.split(' ')
|
||||
string = ''
|
||||
for word in words:
|
||||
if word is not '':
|
||||
string += word + ' '
|
||||
return string[:-1]
|
||||
|
||||
def formal_text(text):
|
||||
text = delete_n(text)
|
||||
text = remove_additional_blank(text)
|
||||
return text
|
||||
|
||||
def float2str(f, precision=2):
|
||||
f = str(f)
|
||||
f_base = f[:f.find('.') + precision]
|
||||
return f_base
|
||||
|
||||
# ========== time realted operation ========== #
|
||||
|
||||
def str_day():
|
||||
day = time.strftime("%Y-%m-%d", time.localtime())
|
||||
return day
|
||||
|
||||
def time2str(t):
|
||||
localtime = time.localtime(int(t))
|
||||
return str_time(localtime)
|
||||
|
||||
def str_time(local_time = None):
|
||||
if local_time is None:
|
||||
local_time = time.localtime()
|
||||
day = time.strftime("%Y-%m-%d-%Hh-%Mm-%Ss)", local_time)
|
||||
return day
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(str_day())
|
||||
40
main.py
Normal file
40
main.py
Normal file
@ -0,0 +1,40 @@
|
||||
import arxiv_bot
|
||||
import email_sender
|
||||
import subscriber_utils
|
||||
import feeds
|
||||
from lib import utils
|
||||
import os
|
||||
from lib import service
|
||||
from lib.console import console
|
||||
from lib import screen
|
||||
|
||||
subscribe_manager = subscriber_utils.subscribe_manager()
|
||||
# subscribe_manager.load()
|
||||
|
||||
bot = arxiv_bot.arxiv_bot(subscribe_manager.get_subscribed_topics())
|
||||
feeds_generator = feeds.feed_manager(subscribe_manager, bot)
|
||||
emailer = email_sender.arxiv_emailer(bot, feeds_generator, debug=False)
|
||||
|
||||
log_screen = screen.VirtualScreen()
|
||||
manager = service.ServiceManager(output=log_screen)
|
||||
|
||||
daily_mail_service = service.service(
|
||||
emailer.send_daily_email,
|
||||
cron='0 4 * * 1-5',
|
||||
name = 'send daily email'
|
||||
)
|
||||
manager.add(daily_mail_service)
|
||||
|
||||
shell = console('ArxivBot')
|
||||
def command_load(args):
|
||||
if args == 'subscriber':
|
||||
subscribe_manager.load()
|
||||
|
||||
shell.regist('load', command_load, help_info='load config. (only subscriber supported till now)')
|
||||
service_shell = service.get_service_console(manager, 'ServiceManager')
|
||||
shell.regist('service', service_shell, help_info='service mamager')
|
||||
|
||||
# cron time:
|
||||
# min hour day month week
|
||||
manager.start()
|
||||
shell.interactive()
|
||||
92
subscriber_utils.py
Normal file
92
subscriber_utils.py
Normal file
@ -0,0 +1,92 @@
|
||||
from lib.parser import dom_node, simple_parser
|
||||
|
||||
|
||||
class subscribe_manager():
|
||||
def __init__(self, subscriber_config = './config/subscriber.xml'):
|
||||
self.subscriber_config = None
|
||||
self.subscribers = {}
|
||||
if subscriber_config is not None:
|
||||
self.subscriber_config = subscriber_config
|
||||
self.load()
|
||||
|
||||
def show(self):
|
||||
if self.subscribers is None:
|
||||
print('No subscriber found!')
|
||||
else:
|
||||
for name in self.subscribers:
|
||||
print('Name:', name, 'Email:', self.subscribers[name]['email'])
|
||||
|
||||
def load(self, path=None):
|
||||
if path is None:
|
||||
path = self.subscriber_config
|
||||
if path is None:
|
||||
return None
|
||||
tree = None
|
||||
with open(path, 'r') as f:
|
||||
xml = f.read()
|
||||
parser = simple_parser()
|
||||
parser.feed(xml)
|
||||
tree = parser.root
|
||||
subscribers = {}
|
||||
if tree is not None:
|
||||
for person in tree.childs:
|
||||
person_name = None
|
||||
person_email = None
|
||||
person_topics = []
|
||||
person_keywords = []
|
||||
for item in person.childs:
|
||||
if item.name == 'name':
|
||||
person_name = item.data
|
||||
elif item.name == 'email':
|
||||
person_email = item.data
|
||||
elif item.name == 'topics':
|
||||
for topic in item.childs:
|
||||
if topic.name == 'topic':
|
||||
person_topics.append(topic.data)
|
||||
elif item.name == 'keywords':
|
||||
for keyword in item.childs:
|
||||
if keyword.name == 'keyword':
|
||||
person_keywords.append(keyword.data)
|
||||
if person_name is not None and person_email is not None and person_topics is not None:
|
||||
subscriber = {}
|
||||
subscriber['keywords'] = person_keywords
|
||||
subscriber['email'] = person_email
|
||||
subscriber['topics'] = person_topics
|
||||
subscribers[person_name] = subscriber
|
||||
self.subscribers = subscribers
|
||||
print('Subscriber load success! All subscribers are shown below:')
|
||||
self.show();
|
||||
|
||||
def get_subscribed_topics(self):
|
||||
topics = []
|
||||
for name in self.subscribers:
|
||||
subscriber = self.subscribers[name]
|
||||
topics += subscriber['topics']
|
||||
topics = set(topics)
|
||||
return topics
|
||||
|
||||
def get_subscribed_keywords(self):
|
||||
keywords = []
|
||||
for name in self.subscribers:
|
||||
keywords += self.subscribers[name]['keywords']
|
||||
keywords = set(keywords)
|
||||
return keywords
|
||||
|
||||
def get_keywords_of_topics(self):
|
||||
keywords_of_topics = {}
|
||||
for name in self.subscribers:
|
||||
subscriber = self.subscribers[name]
|
||||
topic_group = subscriber['topics']
|
||||
for topic in topic_group:
|
||||
if topic not in keywords_of_topics:
|
||||
keywords_of_topics[topic] = []
|
||||
keywords_of_topics[topic] += subscriber['keywords']
|
||||
return keywords_of_topics
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
manager = subscribe_manager()
|
||||
print(manager.subscribers)
|
||||
print(manager.get_subscribed_topics())
|
||||
print(manager.get_subscribed_keywords())
|
||||
print(manager.get_keywords_of_topics())
|
||||
21
try.py
Normal file
21
try.py
Normal file
@ -0,0 +1,21 @@
|
||||
import arxiv_service
|
||||
import time
|
||||
|
||||
now = arxiv_service.cron_time(time.localtime(time.time()))
|
||||
# now.show()
|
||||
# while True:
|
||||
# now.next_day()
|
||||
# now.show()
|
||||
|
||||
# running time
|
||||
# minute hour day month week year
|
||||
# * means always.
|
||||
# a-b means from a to b (a and b included)
|
||||
# a means run at this time.
|
||||
# must match all to execute a command.
|
||||
|
||||
schedule = arxiv_service.cron_expr('0 0 29 2 * *')
|
||||
for i in range(10):
|
||||
now = schedule.next_run(now)
|
||||
now.show()
|
||||
print(now.to_struct_time())
|
||||
Loading…
Reference in New Issue
Block a user