104 lines
3.2 KiB
Python
104 lines
3.2 KiB
Python
from lib.parser import dom_node
|
|
from lib import utils
|
|
|
|
|
|
class feed_manager():
|
|
def __init__(self, submgr, arxivbot, style='./config/style.css'):
|
|
self.style_path = style
|
|
self.style = ''
|
|
self.bot = arxivbot
|
|
self.submgr = submgr
|
|
self.update_style()
|
|
|
|
def update_style(self, path = None):
|
|
if path is None:
|
|
path = self.style_path
|
|
print('loading style from:', path)
|
|
with open(path, 'r') as f:
|
|
self.style = f.read()
|
|
self.style += '\n'
|
|
|
|
def fetch_today_feed(self):
|
|
self.today_feed = self.bot.get_today_feed()
|
|
|
|
def filter_papers_for_user(self, subscriber):
|
|
strong_papers = []
|
|
weak_papers = []
|
|
keywords = subscriber['keywords']
|
|
papers = []
|
|
for topic in subscriber['topics']:
|
|
if topic in self.today_feed:
|
|
papers += self.today_feed[topic]
|
|
else:
|
|
print('Warning: topic {0} is subscribed but not downloaded!'.format(topic))
|
|
known_ids = []
|
|
unique_papers = []
|
|
for paper in papers:
|
|
paper_id = paper.arxiv_id
|
|
if paper_id not in known_ids:
|
|
unique_papers.append(paper)
|
|
known_ids.append(paper_id)
|
|
print('removing {0} repeated papers.'.format(len(papers) - len(unique_papers)))
|
|
papers = unique_papers
|
|
for paper in papers:
|
|
strong = False
|
|
weak = False
|
|
for keyword in keywords:
|
|
if paper.info['title'].lower().find(keyword) != -1:
|
|
strong = True
|
|
break;
|
|
elif paper.info['abstract'].lower().find(keyword) != -1:
|
|
weak = True
|
|
if strong:
|
|
strong_papers.append(paper)
|
|
elif weak:
|
|
weak_papers.append(paper)
|
|
return strong_papers, weak_papers
|
|
|
|
def generate_group_feed(self, paper_groups):
|
|
group_html = ''
|
|
for key in paper_groups:
|
|
header = dom_node('paper-group')
|
|
header.data = key
|
|
group_html += header.to_string() + '\n'
|
|
for paper in paper_groups[key]:
|
|
group_html += paper.to_html() + '\n'
|
|
return group_html
|
|
|
|
def generate_daily_feed_by_matched_paper(self, strong_interested, weak_interested):
|
|
feeds = {}
|
|
if len(strong_interested) > 0:
|
|
feeds['Strong Interested Paper'] = strong_interested
|
|
if len(weak_interested) > 0:
|
|
feeds['Weak Interested Paper'] = weak_interested
|
|
xml_feed = self.generate_group_feed(feeds)
|
|
return xml_feed
|
|
|
|
def generate_daily_email_by_matched_paper(self, strong_interested, weak_interested):
|
|
xml_feed = self.generate_daily_feed_by_matched_paper(strong_interested, weak_interested)
|
|
email_content = ''
|
|
if xml_feed != '':
|
|
email_content = self.style + xml_feed
|
|
return email_content
|
|
|
|
def generate_daily_emails(self):
|
|
self.fetch_today_feed()
|
|
emails = {}
|
|
# email is a dict, containing title, reciver and content.
|
|
today = utils.str_day()
|
|
for name in self.submgr.subscribers:
|
|
subscriber = self.submgr.subscribers[name]
|
|
strong, weak = self.filter_papers_for_user(subscriber)
|
|
content = self.generate_daily_email_by_matched_paper(strong, weak)
|
|
reciver = subscriber['email']
|
|
if content == '':
|
|
print('Skipping user {0} [{1}] since no paper matched.'.format(name, reciver))
|
|
continue;
|
|
title = "Your Interested Paper On Arxiv Today ({0})".format(today)
|
|
email = {}
|
|
email['reciver'] = reciver
|
|
email['title'] = title
|
|
email['content'] = content
|
|
emails[name] = email
|
|
return emails
|