ArxivRobot/feeds.py
2019-12-23 12:42:31 +08:00

104 lines
3.2 KiB
Python

from lib.parser import dom_node
from lib import utils
class feed_manager():
def __init__(self, submgr, arxivbot, style='./config/style.css'):
self.style_path = style
self.style = ''
self.bot = arxivbot
self.submgr = submgr
self.update_style()
def update_style(self, path = None):
if path is None:
path = self.style_path
print('loading style from:', path)
with open(path, 'r') as f:
self.style = f.read()
self.style += '\n'
def fetch_today_feed(self):
self.today_feed = self.bot.get_today_feed()
def filter_papers_for_user(self, subscriber):
strong_papers = []
weak_papers = []
keywords = subscriber['keywords']
papers = []
for topic in subscriber['topics']:
if topic in self.today_feed:
papers += self.today_feed[topic]
else:
print('Warning: topic {0} is subscribed but not downloaded!'.format(topic))
known_ids = []
unique_papers = []
for paper in papers:
paper_id = paper.arxiv_id
if paper_id not in known_ids:
unique_papers.append(paper)
known_ids.append(paper_id)
print('removing {0} repeated papers.'.format(len(papers) - len(unique_papers)))
papers = unique_papers
for paper in papers:
strong = False
weak = False
for keyword in keywords:
if paper.info['title'].lower().find(keyword) != -1:
strong = True
break;
elif paper.info['abstract'].lower().find(keyword) != -1:
weak = True
if strong:
strong_papers.append(paper)
elif weak:
weak_papers.append(paper)
return strong_papers, weak_papers
def generate_group_feed(self, paper_groups):
group_html = ''
for key in paper_groups:
header = dom_node('paper-group')
header.data = key
group_html += header.to_string() + '\n'
for paper in paper_groups[key]:
group_html += paper.to_html() + '\n'
return group_html
def generate_daily_feed_by_matched_paper(self, strong_interested, weak_interested):
feeds = {}
if len(strong_interested) > 0:
feeds['Strong Interested Paper'] = strong_interested
if len(weak_interested) > 0:
feeds['Weak Interested Paper'] = weak_interested
xml_feed = self.generate_group_feed(feeds)
return xml_feed
def generate_daily_email_by_matched_paper(self, strong_interested, weak_interested):
xml_feed = self.generate_daily_feed_by_matched_paper(strong_interested, weak_interested)
email_content = ''
if xml_feed != '':
email_content = self.style + xml_feed
return email_content
def generate_daily_emails(self):
self.fetch_today_feed()
emails = {}
# email is a dict, containing title, reciver and content.
today = utils.str_day()
for name in self.submgr.subscribers:
subscriber = self.submgr.subscribers[name]
strong, weak = self.filter_papers_for_user(subscriber)
content = self.generate_daily_email_by_matched_paper(strong, weak)
reciver = subscriber['email']
if content == '':
print('Skipping user {0} [{1}] since no paper matched.'.format(name, reciver))
continue;
title = "Your Interested Paper On Arxiv Today ({0})".format(today)
email = {}
email['reciver'] = reciver
email['title'] = title
email['content'] = content
emails[name] = email
return emails