From 1ac6d0bb9c58aef5b62f45337eb8e3b8487fa77f Mon Sep 17 00:00:00 2001
From: zmy <izmy@qq.com>
Date: Mon, 23 Dec 2019 12:42:31 +0800
Subject: [PATCH] initial commit

---
 .gitignore                    |   7 +
 README.md                     |  22 +-
 analysis_paper.py             |  65 ++++++
 arxiv_bot.py                  |  85 ++++++++
 arxiv_service.py              |  31 +++
 arxiv_spider.py               | 369 ++++++++++++++++++++++++++++++++++
 config-examples.py            |   9 +
 config/style.css              | 109 ++++++++++
 config/subscriber_example.xml |  27 +++
 download_html.py              |  10 +
 email_sender.py               | 138 +++++++++++++
 feeds.py                      | 103 ++++++++++
 lib/console.py                | 304 ++++++++++++++++++++++++++++
 lib/parallel.py               | 127 ++++++++++++
 lib/parser.py                 | 151 ++++++++++++++
 lib/screen.py                 |  19 ++
 lib/service.py                | 244 ++++++++++++++++++++++
 lib/try.py                    |  15 ++
 lib/utils.py                  | 139 +++++++++++++
 main.py                       |  40 ++++
 subscriber_utils.py           |  92 +++++++++
 try.py                        |  21 ++
 22 files changed, 2126 insertions(+), 1 deletion(-)
 create mode 100644 .gitignore
 create mode 100644 analysis_paper.py
 create mode 100644 arxiv_bot.py
 create mode 100644 arxiv_service.py
 create mode 100644 arxiv_spider.py
 create mode 100644 config-examples.py
 create mode 100644 config/style.css
 create mode 100644 config/subscriber_example.xml
 create mode 100644 download_html.py
 create mode 100644 email_sender.py
 create mode 100644 feeds.py
 create mode 100644 lib/console.py
 create mode 100644 lib/parallel.py
 create mode 100644 lib/parser.py
 create mode 100644 lib/screen.py
 create mode 100644 lib/service.py
 create mode 100644 lib/try.py
 create mode 100644 lib/utils.py
 create mode 100644 main.py
 create mode 100644 subscriber_utils.py
 create mode 100644 try.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fbd985f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+*.pyc
+__pycache__/
+cache/
+feeds/
+config/email_session.xml
+config/subscriber.xml
+config.py
\ No newline at end of file
diff --git a/README.md b/README.md
index eec0658..52836d4 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,22 @@
-# ArxivRobot
+# What is This?
+This is a naive and simple arxiv robot, it will fetch today's updated papers from arxiv in specified topic, filter the papers by given keywords, and send the result to a given email address.
 
+# What i need to do to run this code?
+
+## package requirements:
+It seems you only need to install croniter: ```pip install croniter``` will do it.
+
+## configuration:
+
+1. create config.py, a sample is given in config-examples.py
+2. create config/subscriber.xml, a sample is also given in /config/subscriber_example.xml
+
+## run this code.
+```python main.py```
+
+If everything goes okay, you will see a shell interface, type help for more information.
+
+# PS
+I am really really a bad coder and not good at writing document and comments. If you have anything in trouble, feel free to open a issue and i will try my best to fix the problem.
+
+The code is pushed in a hurry, i will add a document to explain this code when i have free time.
diff --git a/analysis_paper.py b/analysis_paper.py
new file mode 100644
index 0000000..e8a5f9f
--- /dev/null
+++ b/analysis_paper.py
@@ -0,0 +1,65 @@
+from arxiv_spider import arxiv_paper
+import utils
+import numpy as np
+
+authors = {}
+
+years = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
+
+for year in years:
+	print('Analysising year:', year)
+	papers = utils.load_python_object('./feeds/' + year)
+	for paper in papers:
+		author_this_paper = paper.info['authors']
+		for author in author_this_paper:
+			author = utils.delete_n(author)
+			if author in authors:
+				authors[author] += 1
+			else:
+				authors[author] = 1
+
+freq = []
+names = []
+for author in authors:
+	freq.append(authors[author])
+	names.append(author)
+
+freq = np.asarray(freq, dtype=np.int32)
+
+freq_sort = np.argsort(freq)
+
+num_authors = len(names)
+
+for i in range(num_authors):
+	aid = freq_sort[num_authors - i - 1]
+	print('Name: {0} | papers: {1}'.format(names[aid], freq[aid]).encode('utf-8'))
+
+
+# keywords = {}
+
+# years = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
+
+# for year in years:
+# 	print('Analysising year:', year)
+# 	papers = utils.load_python_object('./feeds/' + year)
+# 	for paper in papers:
+# 		keyword_this_paper = paper.info['title'].split(' ')
+# 		for keyword in keyword_this_paper:
+# 			keyword = utils.delete_n(keyword).lower()
+# 			if keyword in keywords:
+# 				keywords[keyword] += 1
+# 			else:
+# 				keywords[keyword] = 1
+
+# freq = []
+# names = []
+# for keyword in keywords:
+# 	freq.append(keywords[keyword])
+# 	names.append(keyword)
+# freq = np.asarray(freq, dtype=np.int32)
+# freq_sort = np.argsort(freq)
+# num_keywords = len(names)
+
+# for i in range(num_keywords):
+# 	aid = freq_sort[num_keywords - i - 1]
+# 	print('Keyword: {0} | papers: {1}'.format(names[aid], freq[aid]).encode('utf-8'))
\ No newline at end of file
diff --git a/arxiv_bot.py b/arxiv_bot.py
new file mode 100644
index 0000000..b545c8f
--- /dev/null
+++ b/arxiv_bot.py
@@ -0,0 +1,85 @@
+import arxiv_spider
+import os
+import time
+from lib import utils
+
+# cache tree:
+# cache_root
+# 	- topic-caches
+# 	  	- feed_$(time).arxiv_feed
+# 		- feed_year_$(year).arxiv_feed
+
+class arxiv_bot():
+	def __init__(self, topics, cache_dir='./cache', arxiv_site='https://arxiv.org', log=False):
+		self.log = log
+		self.site = arxiv_site
+		self.topics = []
+		self.spiders = {}
+		self.cache_dir = cache_dir
+		self.topic_caches = {}
+		if not os.path.isdir(self.cache_dir):
+			os.makedirs(self.cache_dir)
+		self.update_topics(topics)
+
+	def update_topics(self, topics):
+		for topic in topics:
+			if topic not in self.topics:
+				self.topics.append(topic)
+				if self.log:
+					print('Adding topic {0}.'.format(topic))
+				topic_cache = os.path.join(self.cache_dir, topic)
+				self.topic_caches[topic] = topic_cache
+				if not os.path.isdir(topic_cache):
+					if self.log:
+						print('creating topic dir:', topic_cache)
+					os.makedirs(topic_cache)
+				self.spiders[topic] = arxiv_spider.arxiv_spider(topic, self.site)
+
+	# load feed if it is already downloaded. If not, use spiders to get today's feed.
+	def get_today_feed(self):
+		today_feed = {}
+		today = utils.str_day()
+		for topic in self.topics:
+			today_feed_name = 'feed_' + today + '.arxiv_daily_feed'
+			today_feed_path = os.path.join(self.cache_dir, topic, today_feed_name)
+			cache_dir = self.topic_caches[topic]
+			topic_feed = None
+			if os.path.exists(today_feed_path):
+				topic_feed = utils.load_python_object(today_feed_path)
+			else:
+				topic_feed = self.spiders[topic].get_today_paper()
+				print('Fetching topic {0} papers...'.format(topic))
+				for paper in topic_feed:
+					if self.log:
+						print('download abstract for paper', paper.info['title'])
+					paper.download_abstract()
+				utils.save_python_object(topic_feed, today_feed_path)
+			today_feed[topic] = topic_feed
+		return today_feed
+
+	def get_interested_paper(self, topic, keywords):
+		if self.today_feed is None or utils.str_day() is not self.today:
+			self.today_feed = self.get_today_feed()
+			self.today = utils.str_day()
+			print('Updating daily feed.')
+
+		topic_feed = self.today_feed[topic]
+		topic_papers = []
+		for day in topic_feed:
+			topic_papers += topic_feed[day]
+		strong = []
+		weak = []
+		for paper in topic_papers:
+			strong_match = False
+			weak_match = False
+			for keyword in keywords:
+				if paper.info['title'].lower().find(keyword) != -1:
+					strong_match = True
+					break
+				elif paper.info['abstract'].lower().find(keyword) != -1:
+					weak_match = True
+			if strong_match:
+				strong.append(paper)
+			elif weak_match:
+				weak.append(paper)
+		return strong, weak
diff --git a/arxiv_service.py b/arxiv_service.py
new file mode 100644
index 0000000..c2af06d
--- /dev/null
+++ b/arxiv_service.py
@@ -0,0 +1,31 @@
+import arxiv_bot
+import feeds
+import email_sender
+import time
+import subscriber_utils
+import utils
+from croniter import croniter
+import threading
+
+class test_service():
+    def __init__(self, name):
+        self.name = name
+        pass
+
+    def do(self):
+        print('Job {0} run!'.format(self.name))
+
+class mail_service():
+    def __init__(self, emailer):
+        self.emailer = emailer
+
+    def do(self):
+        self.emailer.send_daily_email()
+
+class reload_subscriber():
+    def __init__(self, subscriber_mgr):
+        self.mgr = subscriber_mgr
+
+    def do(self):
+        self.mgr.load()
+
diff --git a/arxiv_spider.py b/arxiv_spider.py
new file mode 100644
index 0000000..e1ae13d
--- /dev/null
+++ b/arxiv_spider.py
@@ -0,0 +1,369 @@
+import requests
+import pickle
+import time
+from lib import utils
+from lib.parser import dom_node, simple_parser
+
+import socket
+import socks
+
+use_proxy = False
+if use_proxy:
+    SOCKS5_PROXY_HOST = '127.0.0.1'
+    SOCKS5_PROXY_PORT = 1080
+    default_socket = socket.socket
+    socks.set_default_proxy(socks.SOCKS5, SOCKS5_PROXY_HOST, SOCKS5_PROXY_PORT)
+    socket.socket = socks.socksocket
+
+class arxiv_paper():
+    def __init__(self, arxiv_id = None, paper_info = None):
+        self.arxiv_id = arxiv_id
+        self.info = paper_info
+
+    def add_author(self, author):
+        self.info['authors'].append(authors)
+
+    def title(self):
+        return self.info['title']
+
+
+    def describe(self):
+        information = ''
+        information += 'ID: {0} (https://arxiv.org/abs/{0})\n'.format(self.arxiv_id)
+        for key in self.info:
+            if self.info[key] is not None:
+                info = utils.formal_text(self.info[key])
+                information += ('\t' + key + ':' + str(info) + '\n')
+        return information
+
+    def show(self):
+        print(self.describe())
+
+    def to_html(self):
+        dom_tree = dom_node(name = 'paper-section')
+        paper_title = None
+        paper_link = None
+        paper_authors = None
+        paper_comments = None
+        paper_subjects = None
+        paper_abstract = None
+        for key in self.info:
+            if self.info[key] is not None:
+                if key == 'title':
+                    paper_title = dom_node('paper-title')
+                    link_attr = {'href':'https://arxiv.org/abs/{0}'.format(self.arxiv_id)}
+                    link_node = dom_node('a', link_attr)
+                    link_node.data = self.info[key]
+                    paper_title.add_child(link_node)
+                    paper_link = dom_node('paper-pdf-link')
+                    pdf_link_attr = {'href':'https://arxiv.org/pdf/{0}'.format(self.arxiv_id)}
+                    pdf_link = dom_node('a', pdf_link_attr)
+                    pdf_link.data = '{0} | [pdf]'.format(self.arxiv_id)
+                    paper_link.add_child(pdf_link)
+
+                elif key == 'authors':
+                    paper_authors = dom_node('paper-authors')
+                    authors_string = ''
+                    for author in self.info[key]:
+                        authors_string += author + ', '
+                    authors_string = authors_string[:-2]
+                    paper_authors.data = authors_string
+
+                elif key == 'comments':
+                    paper_comments = dom_node('paper-comments')
+                    paper_comments.data = self.info[key]
+
+                elif key == 'subjects':
+                    paper_subjects = dom_node('paper-subjects')
+                    paper_subjects.data = self.info[key]
+
+                elif key == 'abstract':
+                    paper_abstract = dom_node('paper-abstract')
+                    paper_abstract.data = self.info[key]
+        dom_tree.add_child(paper_title)
+        dom_tree.add_child(paper_link)
+        dom_tree.add_child(paper_authors)
+        dom_tree.add_child(paper_abstract)
+        dom_tree.add_child(paper_comments)
+        dom_tree.add_child(paper_subjects)
+        html = dom_tree.to_string()
+        return html
+
+    def download_abstract(self, forcemode=False):
+        if not forcemode:
+            if self.info['abstract'] is not None:
+                # print('skipping download abstract since already downloaded')
+                return;
+        r = requests.get('https://arxiv.org/abs/' + self.arxiv_id)
+        parser = simple_parser()
+        parser.feed(r.text)
+        tree = parser.root
+        meta_nodes = tree.search('meta')
+        for meta_node in meta_nodes:
+            meta_attr = meta_node.attributes
+            if 'property' in meta_attr:
+                if meta_attr['property'] == 'og:description':
+                    self.info['abstract'] = utils.formal_text(meta_attr['content'])
+                    return;
+
+class arxiv_list_parser():
+    def __init__(self, html_page):
+        self.html_page = html_page
+        self.parser = simple_parser()
+        self.parser.feed(html_page)
+        self.tree = self.parser.root
+
+    def get_arxiv_id(self, dt_node):
+        if len(dt_node.childs) == 0:
+            return None
+        else:
+            arxiv_id = dt_node.childs[1].childs[0].attributes['href']
+            arxiv_id = arxiv_id.split('/')[-1]
+            return arxiv_id
+
+    def get_paper_info(self, dd_node):
+        title = None
+        authors = []
+        comments = None
+        subjects = None
+        if len(dd_node.childs) == 0:
+            return None
+        else:
+            elements = dd_node.childs[0].childs
+            for element in elements:
+                if 'class' in element.attributes:
+                    element_class = element.attributes['class']
+                    if element_class == 'list-title mathjax':
+                        title = utils.formal_text(element.data)
+                    elif element_class == 'list-authors':
+                        for child in element.childs:
+                            if child.name == 'a':
+                                authors.append(utils.formal_text(child.data))
+                    elif element_class == 'list-comments mathjax':
+                        comments = utils.formal_text(element.data)
+                    elif element_class == 'list-subjects':
+                        subjects = utils.formal_text(element.data)
+        paper_info = {
+            'title':title,
+            'authors':authors,
+            'comments':comments,
+            'subjects':subjects,
+            'abstract':None
+        }
+        return paper_info
+
+    def get_papers(self):
+        dts = self.tree.search('dt')
+        dds = self.tree.search('dd')
+        papers = []
+        for dt, dd in zip(dts, dds):
+            arxiv_id = self.get_arxiv_id(dt)
+            if arxiv_id == None:
+                continue;
+            paper_info = self.get_paper_info(dd)
+            if paper_info == None:
+                continue;
+            paper = arxiv_paper(arxiv_id, paper_info)
+            papers.append(paper)
+        return papers
+
+    def get_paper_num(self):
+        totally_paper_node = self.tree.search('small')[0].data
+        total_num_split = totally_paper_node.split(' ')
+        num_total = 0
+        for split in total_num_split:
+            if split.isdigit():
+                num_total = int(split)
+                break;
+        return num_total
+
+    def get_recent_info(self):
+        # get each day start id and day_name
+        day_name = []
+        day_start = []
+        li_nodes = self.tree.search('ul')[0].childs
+        for li in li_nodes:
+            link = li.childs[0].attributes['href']
+            start = None
+            if link.find('#item') != -1:
+                start = link.split('#')[-1][4:]
+            else:
+                start = link.split('=')[-2].split('&')[0]
+            day_name.append(li.childs[0].data)
+            day_start.append(int(start))
+        # get total paper num
+        num_total = self.get_paper_num()
+        # get each day num.
+        num_days = len(day_start)
+        day_num = []
+        for i in range(num_days):
+            if i < num_days - 1:
+                day_num.append(day_start[i+1] - day_start[i])
+            else:
+                day_num.append(num_total - day_start[i])
+
+        # generate final info.
+        recent_papers_info = {}
+        for day, start, num in zip(day_name, day_start, day_num):
+            current_day_info = {}
+            current_day_info['start'] = start
+            current_day_info['num'] = num
+            recent_papers_info[day] = current_day_info
+        return recent_papers_info
+
+class arxiv_spider():
+    def __init__(self, topic, arxiv_url = 'https://arxiv.org'):
+        self.link = arxiv_url
+        self.topic = topic
+        self.base_url = self.link + '/list/' + self.topic
+
+
+    def get_yearly_papers(self, year, log=False):
+        yearly_url = self.base_url + '/' + year
+        if log:
+            print('visiting url [{0}] for basic information'.format(yearly_url))
+        r = requests.get(yearly_url)
+        list_parser = arxiv_list_parser(r.text)
+        total_num = list_parser.get_paper_num()
+        print('Total Number for this year:', total_num)
+        yearly_url_all = yearly_url + '?skip={0}&show={1}'.format(0, total_num)
+        if log:
+            print('visiting url [{0}] for all papers'.format(yearly_url_all))
+        r = requests.get(yearly_url_all)
+        list_parser = arxiv_list_parser(r.text)
+        yearly_papers = list_parser.get_papers()
+        return yearly_papers
+
+    # papers:
+    # papers = {
+    #     'key is day string': [content is a list of arxiv_paper class]
+    # }
+
+    def get_papers_on_search_list(self, search_url, log=True):
+        if log:
+            print('visiting url [{0}] for today papers.'.format(search_url))
+        search_content = requests.get(search_url)
+        search_content = search_content.text
+        parser = simple_parser()
+        parser.feed(search_content)
+        tree = parser.root
+        paper_nodes = tree.search('entry')
+        print('num_searched_nodes:', len(paper_nodes))
+        papers = []
+        for node in paper_nodes:
+            arxiv_id = node.search('id')[0].data.split('/')[-1]
+            title = node.search('title')[0].data
+            author_nodes = node.search('name')
+            authors = [item.data for item in author_nodes]
+            category_nodes = node.search('category')
+            categories = [item.attributes['term'] for item in category_nodes]
+            subjects = ''
+            for cat in categories:
+                subjects += cat + ','
+            subjects = subjects[:-1]
+            comments_node = node.search('arxiv:comment')
+            if len(comments_node) == 0:
+                comments = ''
+            else:
+                comments = node.search('arxiv:comment')[0].data
+            abstract = node.search('summary')[0].data
+
+            title = utils.formal_text(title)
+            subjects = utils.formal_text(subjects)
+            comments = utils.formal_text(comments)
+            abstract = utils.formal_text(abstract)
+
+
+            paper_info = {
+            'title':title,
+            'authors':authors,
+            'comments':comments,
+            'subjects':subjects,
+            'abstract':abstract
+            }
+
+            paper = arxiv_paper(arxiv_id, paper_info)
+            papers.append(paper)
+        return papers
+
+    def get_papers_by_ids(self, ids, log=True):
+        num_groups = int((len(ids) + 9.1)/10)
+        if log:
+            print('spliting into {0} groups.'.format(num_groups))
+        papers = []
+        for i in range(num_groups):
+            this_batch = ids[i * 10:(i+1)*10]
+            id_list = ''
+            for paper_id in this_batch:
+                id_list += paper_id + ','
+            id_list = id_list[:-1]
+            search_url = 'http://export.arxiv.org/api/query?id_list=' + id_list
+            batch_papers = self.get_papers_on_search_list(search_url, log)
+            papers += batch_papers
+        return papers
+
+
+    def get_today_ids(self, log=True):
+        rss_url = 'http://export.arxiv.org/rss/{0}'.format(self.topic)
+        if log:
+            print('visiting url [{0}] for today papers id.'.format(rss_url))
+        rss_content = requests.get(rss_url)
+        rss_content = rss_content.text
+        parser = simple_parser()
+        parser.feed(rss_content)
+        rss = parser.root
+        id_nodes = rss.search('rdf:li')
+        paper_ids = []
+        for node in id_nodes:
+            paper_link = node.attributes['rdf:resource']
+            paper_id = paper_link.split('/')[-1]
+            paper_ids.append(paper_id)
+        print('num_paper_ids:', len(paper_ids))
+        return paper_ids
+
+    def get_today_paper(self, return_day_name=False, log=True):
+        today_ids = self.get_today_ids(log)
+        papers = self.get_papers_by_ids(today_ids)
+        print('num of papers:', len(papers))
+        return papers
+
+
+
+    def get_today_paper_backup(self, return_day_name=False):
+        papers = self.get_recent_papers(recent_days=[1])
+        today = None
+        paper = None
+        for day in papers:
+            today = day
+            paper = papers[day]
+        if return_day_name:
+            return paper, today
+        else:
+            return paper
+
+
+    def get_recent_papers(self, recent_days=[1, 2, 3, 4, 5], log=False):
+        recent_url = self.base_url + '/recent'
+        if log:
+            print('visiting url [{0}] for basic information'.format(recent_url))
+        r = requests.get(recent_url)
+        list_parser = arxiv_list_parser(r.text)
+        recent_papers_info = list_parser.get_recent_info()
+        print('paper info:', recent_papers_info)
+
+        day_id = 1
+        papers = {}
+        for day in recent_papers_info:
+            if day_id in recent_days:
+                today_start = recent_papers_info[day]['start']
+                today_num = recent_papers_info[day]['num']
+                page_url = '/pastweek?skip={0}&show={1}'.format(today_start, today_num)
+                day_url = self.base_url + page_url
+                if log:
+                    print('visiting url [{0}] for paper on day {1}'.format(day_url, day))
+                r = requests.get(day_url)
+                list_parser = arxiv_list_parser(r.text)
+                today_papers = list_parser.get_papers()
+                papers[day] = today_papers
+            day_id += 1
+        return papers
diff --git a/config-examples.py b/config-examples.py
new file mode 100644
index 0000000..e617471
--- /dev/null
+++ b/config-examples.py
@@ -0,0 +1,9 @@
+
+#### email related config ####
+username = 'email@email.com' # send email using this account
+password = 'yourpassword' # your email login passoword
+sender_name = 'ArxivRobot' # the name of your robot
+replyto = 'yourmail@mail.com' # all replay email will be foreward to this email address
+
+smtp_ssl_addr = 'smtp.smtp.com'
+# smtp server, only ssl supported. you can support more by editing function send_main in email_sender.py
diff --git a/config/style.css b/config/style.css
new file mode 100644
index 0000000..62e1cdc
--- /dev/null
+++ b/config/style.css
@@ -0,0 +1,109 @@
+<head>
+<style type="text/css">
+paper-group
+{
+    float: left;
+    width: 100%;
+    padding-top: 10px;
+    padding-bottom: 10px;
+    text-align: center;
+    color: white;
+    background-color: #3f3f3f;
+    font-family:Arial,Helvetica,sans-serif;
+    font-size: 2.0em;
+    border-radius:10px;
+}
+/*abstract
+{
+    color:black;
+    text-align:center;
+    background-color:#d0e4fe;
+}
+*/
+paper-section {
+    float: left;
+    margin-top: 20px;
+    margin-bottom: 20px;
+    margin-left: 10%;
+    margin-right: 10%;
+    width: 80%;
+    /*border: 1px solid green;
+    background-color: lightgrey;*/
+}
+
+paper-title {
+    float: left;
+    width: 100%;
+    /*border: 1px solid green;
+    background-color: lightgrey;*/
+    font-family:Arial,sans-serif;
+    font-size: 1.5em;
+    font-weight: bold;
+    color: black;
+}
+
+paper-authors {
+    float: left;
+    width: 90%;
+    /*border: 1px solid green;
+    background-color: lightgrey;*/
+    font-family:Arial,sans-serif;
+    font-size: 1.0em;
+    color: #900;
+}
+
+paper-pdf-link {
+    float: right;
+    width: 10%;
+    text-align: right;
+    /*border: 1px solid green;
+    background-color: lightgrey;*/
+    font-family:Arial,sans-serif;
+    font-size: 1.0em;
+    color: #900;
+}
+
+paper-abstract {
+    float: left;
+    width: 100%;
+    padding-top: 10px;
+    padding-bottom: 10px;
+    padding-left: 10px;
+    padding-right: 10px;
+    background-color: #EEFFEE;
+    font-family:Arial,sans-serif;
+    font-size: 1.2em;
+    color: black;
+    text-align: justify;
+    border-radius:10px;
+}
+
+paper-comments {
+    float: left;
+    width: 50%;
+    font-family:Arial,sans-serif;
+    font-size: 1.0em;
+    color: black;
+    word-break:break-all;
+}
+
+paper-subjects {
+    float: right;
+    text-align: right;
+    width: 50%;
+    font-family:Arial,sans-serif;
+    font-size: 1.0em;
+    color: black;
+    word-break:break-all;
+}
+
+
+a:link { text-decoration: none}
+a:active { text-decoration:blink}
+a:hover { text-decoration:underline}
+a:visited { text-decoration: none}
+a {
+    color: black;
+}
+</style>
+</head>
\ No newline at end of file
diff --git a/config/subscriber_example.xml b/config/subscriber_example.xml
new file mode 100644
index 0000000..09a65e9
--- /dev/null
+++ b/config/subscriber_example.xml
@@ -0,0 +1,27 @@
+<subscriber>
+	<name>name1</name>
+	<email>mail1@mail.com</email>
+	<topics>
+		<topic>cs.CV</topic>
+		<topic>cs.LG</topic>
+		<topic>stat.ML</topic>
+	</topics>
+	<keywords>
+		<keyword>keyword1</keyword>
+		<keyword>keyword2</keyword>
+	</keywords>
+</subscriber>
+
+<subscriber>
+	<name>name2</name>
+	<email>mail2@mail.com</email>
+	<topics>
+		<topic>cs.CV</topic>
+		<topic>cs.LG</topic>
+		<topic>stat.ML</topic>
+	</topics>
+	<keywords>
+		<keyword>keyword1</keyword>
+		<keyword>keyword2</keyword>
+	</keywords>
+</subscriber>
diff --git a/download_html.py b/download_html.py
new file mode 100644
index 0000000..d2a02c2
--- /dev/null
+++ b/download_html.py
@@ -0,0 +1,10 @@
+import requests
+from html.parser import HTMLParser
+
+# r = requests.get('https://arxiv.org/list/cs.CV/recent')
+r = requests.get('https://arxiv.org/list/cs.CV/recent')
+# r = requests.get('http://xxx.itp.ac.cn/list/cs.CV/recent')
+# r = requests.get('https://arxiv.org/list/cs.CV/pastweek?skip=25&show=25')
+
+# print(r.status_code)
+print(r.text)
diff --git a/email_sender.py b/email_sender.py
new file mode 100644
index 0000000..a98afe5
--- /dev/null
+++ b/email_sender.py
@@ -0,0 +1,138 @@
+from lib.parser import dom_node, simple_parser
+from lib import parser
+from lib import utils
+import os
+import config
+
+import smtplib
+import email
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.image import MIMEImage
+from email.mime.base import MIMEBase
+from email.mime.application import MIMEApplication
+from email.header import Header
+from email import generator
+def send_mail(reciver, title, content):
+
+        # with open('email.html', 'w', encoding="utf-8") as f:
+        #       f.write(content)
+        # 20:24
+
+        username = config.username
+        password = config.password
+        replyto = config.replyto
+        msg = MIMEMultipart('alternative')
+        msg['Subject'] = Header(title)
+        msg['From'] = '%s <%s>' % (Header(config.sender_name), username)
+        msg['To'] = reciver
+        msg['Reply-to'] = replyto
+        msg['Message-id'] = email.utils.make_msgid()
+        msg['Date'] = email.utils.formatdate()
+        texthtml = MIMEText(content, _subtype='html', _charset='UTF-8')
+        msg.attach(texthtml)
+
+        # with open('email.eml', 'w') as outfile:
+        #     gen = generator.Generator(outfile)
+        #     gen.flatten(msg)
+
+        try:
+            client = smtplib.SMTP_SSL(config.smtp_ssl_addr)
+            # client.connect('smtpdm-ap-southeast-1.aliyun.com', 80)
+            client.set_debuglevel(0)
+            client.login(username, password)
+            client.sendmail(username, reciver, msg.as_string())
+
+            client.quit()
+            print ('Email send to {0} success!'.format(reciver))
+            return True
+        except smtplib.SMTPConnectError as e:
+            print ('Connection Error:', e.smtp_code, e.smtp_error)
+        except smtplib.SMTPAuthenticationError as e:
+            print ('Authentication Error:', e.smtp_code, e.smtp_error)
+        except smtplib.SMTPSenderRefused as e:
+            print ('Sender Refused:', e.smtp_code, e.smtp_error)
+        except smtplib.SMTPRecipientsRefused as e:
+            print ('SMTPRecipients Refused:', e.smtp_code, e.smtp_error)
+        except smtplib.SMTPDataError as e:
+            print ('Data Error:', e.smtp_code, e.smtp_error)
+        except smtplib.SMTPException as e:
+            print ('SMTPException:', e.message)
+        except Exception as e:
+            print ('Unknown error:', str(e))
+        return True
+
+class arxiv_emailer():
+    def __init__(self, arxiv_bot, feeds_generator, session_file = './config/email_session.xml', debug=False):
+        self.debug = debug
+        self.email_info = dom_node()
+        self.session_file = session_file
+        self.sessions = None
+        if self.session_file is not None:
+                self.load_session()
+
+        self.bot = arxiv_bot
+        self.feeds = feeds_generator
+
+    def send_daily_email(self):
+        emails = self.feeds.generate_daily_emails()
+        today = utils.str_day()
+        for name in emails:
+            email = emails[name]
+            send = False
+            if name not in self.sessions:
+                print('New user found!')
+                self.sessions[name] = {}
+                self.sessions[name]['last-send'] = today
+                send = True
+
+            if self.sessions[name]['last-send'] != today:
+                send = True
+
+            if send:
+                print('Sending email to user {0} [{1}]'.format(name, email['reciver']))
+                print('reciver:', email['reciver'])
+                print('title:', email['title'])
+                print('content:', len(email['content']))
+                success = False
+                if not self.debug:
+                        success = send_mail(email['reciver'], email['title'], email['content'])
+                if success:
+                    self.sessions[name]['last-send'] = today
+                self.save_session()
+            else:
+                print('skipping user {0} since already sent!'.format(name))
+
+    def load_session(self, session_file=None):
+        if session_file is None:
+            session_file = self.session_file
+        tree = None
+        with open(session_file, 'r') as f:
+            xml = f.read()
+            xmlparser = simple_parser()
+            xmlparser.feed(xml)
+            tree = xmlparser.root
+        sessions = parser.dom2dict(tree)
+        if 'root' in sessions:
+            sessions = sessions['root']
+        else:
+            sessions = {}
+        self.sessions = sessions
+        print(self.sessions)
+        return sessions
+
+    def save_session(self, session_file=None):
+        if session_file is None:
+            session_file = self.session_file
+        if session_file is None:
+            return None
+        xml = parser.dict2dom(self.sessions).to_string()
+        with open(session_file, 'w') as f:
+            f.write(xml)
+        return xml
+
+if __name__ == '__main__':
+    emailer = arxiv_emailer(None, None, None)
+    emailer.send_daily_email()
+    print(emailer.load_session())
+    print(emailer.save_session())
diff --git a/feeds.py b/feeds.py
new file mode 100644
index 0000000..4e3c450
--- /dev/null
+++ b/feeds.py
@@ -0,0 +1,103 @@
+from lib.parser import dom_node
+from lib import utils
+
+
+class feed_manager():
+	def __init__(self, submgr, arxivbot, style='./config/style.css'):
+		self.style_path = style
+		self.style = ''
+		self.bot = arxivbot
+		self.submgr = submgr
+		self.update_style()
+
+	def update_style(self, path = None):
+		if path is None:
+			path = self.style_path
+		print('loading style from:', path)
+		with open(path, 'r') as f:
+			self.style = f.read()
+			self.style += '\n'
+
+	def fetch_today_feed(self):
+		self.today_feed = self.bot.get_today_feed()
+
+	def filter_papers_for_user(self, subscriber):
+		strong_papers = []
+		weak_papers = []
+		keywords = subscriber['keywords']
+		papers = []
+		for topic in subscriber['topics']:
+			if topic in self.today_feed:
+				papers += self.today_feed[topic]
+			else:
+				print('Warning: topic {0} is subscribed but not downloaded!'.format(topic))
+		known_ids = []
+		unique_papers = []
+		for paper in papers:
+			paper_id = paper.arxiv_id
+			if paper_id not in known_ids:
+				unique_papers.append(paper)
+				known_ids.append(paper_id)
+		print('removing {0} repeated papers.'.format(len(papers) - len(unique_papers)))
+		papers = unique_papers
+		for paper in papers:
+			strong = False
+			weak = False
+			for keyword in keywords:
+				if paper.info['title'].lower().find(keyword) != -1:
+					strong = True
+					break;
+				elif paper.info['abstract'].lower().find(keyword) != -1:
+					weak = True
+			if strong:
+				strong_papers.append(paper)
+			elif weak:
+				weak_papers.append(paper)
+		return strong_papers, weak_papers
+
+	def generate_group_feed(self, paper_groups):
+	    group_html = ''
+	    for key in paper_groups:
+	        header = dom_node('paper-group')
+	        header.data = key
+	        group_html += header.to_string() + '\n'
+	        for paper in paper_groups[key]:
+	            group_html += paper.to_html() + '\n'
+	    return group_html
+
+	def generate_daily_feed_by_matched_paper(self, strong_interested, weak_interested):
+		feeds = {}
+		if len(strong_interested) > 0:
+			feeds['Strong Interested Paper'] = strong_interested
+		if len(weak_interested) > 0:
+			feeds['Weak Interested Paper'] = weak_interested
+		xml_feed = self.generate_group_feed(feeds)
+		return xml_feed
+
+	def generate_daily_email_by_matched_paper(self, strong_interested, weak_interested):
+		xml_feed = self.generate_daily_feed_by_matched_paper(strong_interested, weak_interested)
+		email_content = ''
+		if xml_feed != '':
+			email_content = self.style + xml_feed
+		return email_content
+
+	def generate_daily_emails(self):
+		self.fetch_today_feed()
+		emails = {}
+		# email is a dict, containing title, reciver and content.
+		today = utils.str_day()
+		for name in self.submgr.subscribers:
+			subscriber = self.submgr.subscribers[name]
+			strong, weak = self.filter_papers_for_user(subscriber)
+			content = self.generate_daily_email_by_matched_paper(strong, weak)
+			reciver = subscriber['email']
+			if content == '':
+				print('Skipping user {0} [{1}] since no paper matched.'.format(name, reciver))
+				continue;
+			title = "Your Interested Paper On Arxiv Today ({0})".format(today)
+			email = {}
+			email['reciver'] = reciver
+			email['title'] = title
+			email['content'] = content
+			emails[name] = email
+		return emails
diff --git a/lib/console.py b/lib/console.py
new file mode 100644
index 0000000..3ee7203
--- /dev/null
+++ b/lib/console.py
@@ -0,0 +1,304 @@
+from . import utils
+import os
+import traceback
+
+class console():
+	def __init__(self, name='base'):
+		self.name = name
+		self.hint = '$ '
+		self.exit_cmd = ['exit', 'quit', 'bye']
+		self.exit_info = 'Bye~'
+		self.commands = {}
+		self.alias = {}
+		self.warn_level = 4
+		self.exit_flag = False
+		self.debug = True
+		self.platform = utils.detect_platform()
+		self.is_child = False
+		self.father = None
+
+		self.regist_internal_command()
+
+	def get_hint(self):
+		if self.platform == 'Linux':
+			hint = '\033[0;33m({0})\033[0;31m{1}\033[0m'.format(self.name, self.hint)
+		else:
+			hint = '({0}){1}'.format(self.name, self.hint)
+		return hint
+
+	def regist_internal_command(self):
+		self.regist(
+			'help',
+			action=self.command_help,
+			alias=['h'],
+			help_info='display this help info.',
+			kind='sys'
+			)
+		self.regist(
+			'exit',
+			action=self.command_exit_console,
+			alias=['quit','bye'],
+			help_info='exit current console.',
+			kind='sys'
+			)
+		self.regist(
+			'cls',
+			action=self.command_clear_screen,
+			alias=['clear', 'clc'],
+			help_info='clear screen.',
+			kind='sys'
+			)
+		self.regist(
+			'alias',
+			action=self.command_alias,
+			help_info='display alias info or create new alias.',
+			kind='sys'
+			)
+		self.regist(
+			'os',
+			action=self.command_os,
+			help_info='run a system command.',
+			kind='sys'
+			)
+
+
+	def translate_command(self, command):
+		while command in self.alias and command not in self.commands:
+			command = self.alias[command]
+		return command
+
+	def find_equal_command(self, command, ret_type = str, ignored = []):
+		finished = []
+		new = []
+
+		cmds = [command]
+		while len(finished) != len(cmds):
+			# find child
+			if command in self.alias:
+				if self.alias[command] not in cmds:
+					cmds.append(self.alias[command])
+			# find fathers
+			for al in self.alias:
+				if self.alias[al] == command:
+					if al not in cmds:
+						cmds.append(al)
+			# found finished.
+			finished.append(command)
+			for cmd in cmds:
+				if cmd not in finished:
+					command = cmd
+
+
+		if ret_type is str:
+			finished = utils.list2csv(finished)
+		return finished
+
+
+
+	def get_alias(self, command, ret_type=str):
+		alias = []
+		for al in self.alias:
+			if self.alias[al] == command:
+				alias.append(al)
+
+		if ret_type is str:
+			alias = utils.list2csv(alias)
+
+		return alias
+
+	def command_exist(self, command):
+		if command in self.commands or command in self.alias:
+			return True
+		else:
+			return False
+
+	def add_alias(self, command, alias):
+		if self.command_exist(alias):
+			if warn_level >= 3:
+				print('Alias {0} will not be added since already used'.format(al))
+		else:
+			self.alias[alias] = command
+
+	# kind: standard or shared
+	#	standard: help info will be displayed
+	#	shared: help info will not be displayed in sub command.
+	def regist(self, command, action, alias=None, help_info='no help provided.', kind='standard'):
+		if type(action) == console:
+			action.is_child = True
+			action.father = self
+		exist = self.command_exist(command)
+		if exist:
+			if self.warn_level >=3:
+				print('Command {0} will not be added sinece already exist.'.format(command))
+			return
+
+		if type(alias) is list:
+			for al in alias:
+				self.add_alias(command, al)
+		elif type(alias) is str:
+			self.add_alias(command, alias)
+		elif alias is None:
+			pass
+		else:
+			if self.warn_level > 3:
+				print('Unknown alias type, no alias will be added.')
+		self.commands[command] = {}
+		self.commands[command]['action'] = action
+		self.commands[command]['help'] = help_info
+		self.commands[command]['kind'] = kind
+
+	def handle_command(self, command, args):
+		if command in self.commands:
+			act = self.commands[command]['action']
+			try:
+				act(args)
+			except KeyboardInterrupt:
+				pass
+			except:
+				print('Exception occured while processing command \"{0} {1}\".'.format(command, args))
+				print('More information are shown below.\n', traceback.format_exc())
+		else:
+			print('Unknown command \"{0}\"'.format(command))
+
+	# seperate command and its args.
+	def parse_command(self, string):
+		string += ' '
+		length = len(string)
+		command_end = 0
+		parse_start = False
+		for i in range(length):
+			blank = utils.is_blank(string[i])
+			if not blank:
+				parse_start=True
+			if parse_start and blank:
+				command_end = i
+				break
+
+		command = string[:command_end]
+		command = utils.remove_blank_in_endpoint(command)
+		args = utils.remove_blank_in_endpoint(string[command_end:])
+		return command, args
+
+	def parse(self, string):
+		command, args = self.parse_command(string)
+		exitsted_commands = []
+		while command in self.alias:
+			if command not in exitsted_commands:
+				exitsted_commands.append(command)
+				command = self.alias[command]
+				string = command + ' ' + args
+				command, args = self.parse_command(string)
+			else:
+				break
+
+		return command, args
+
+
+	def show_help_info(self, command, prefix, indent, depth=0):
+		command = self.translate_command(command)
+		action = self.commands[command]['action']
+		kind = self.commands[command]['kind']
+		if kind == 'sys' and depth > 0:
+			return
+		alias = self.get_alias(command, ret_type=str)
+		if alias != '':
+			print('{0}{1}({2}):'.format(prefix, command, alias))
+		else:
+			print('{0}{1}:'.format(prefix, command))
+		print('{0}{1}{2}'.format(prefix, indent, self.commands[command]['help']))
+		if type(action) == console:
+			action.command_help('', prefix=prefix+indent, indent=indent, depth=depth+1)
+
+	def debug_log(self, command, args):
+		if self.debug:
+			print('command:[{0}] args:[{1}]'.format(command, args))
+
+	def command_exit_console(self, args):
+		if not self.is_child:
+			print(self.exit_info)
+		self.exit_flag = True
+
+	def command_clear_screen(self, args):
+		if self.platform == 'Windows':
+			os.system('cls')
+		elif self.platform == 'Linux':
+			os.system('clear')
+		return False
+
+	def command_help(self, args, prefix = '', indent='    ', depth=0):
+		command, args = self.parse_command(args)
+		if command is not "":
+			if self.command_exist(command):
+				self.show_help_info(command, prefix, indent, depth)
+			else:
+				print('Unknown command \"{0}\"'.format(command))
+		else:
+			for command in self.commands:
+				self.show_help_info(command, prefix, indent, depth)
+
+	def command_alias(self, args):
+		alias_parse = args.split('=')
+		if len(alias_parse) == 2:
+			alias = utils.remove_blank_in_endpoint(alias_parse[0])
+			command = utils.remove_blank_in_endpoint(alias_parse[1])
+			if command is not '':
+				self.alias[alias]=command
+			else:
+				del self.alias[alias]
+		elif args == '':
+			for alias in self.alias:
+				print('{0}={1}'.format(alias, self.alias[alias]))
+		elif len(alias_parse) == 1:
+			if args in self.alias:
+				print('{0}={1}'.format(args, self.alias[args]))
+				equal_alias = self.find_equal_command(args)
+				if equal_alias != '':
+					print('Hint: {0} are all equivalent.'.format(equal_alias))
+			elif args in self.commands:
+				als = self.get_alias(args, ret_type=str)
+				if als == '':
+					print('command {0} has no alias.'.format(args))
+				else:
+					print('command {0} is aliased as {1}'.format(args, als))
+				equal_alias = self.find_equal_command(args)
+				if equal_alias != '' and equal_alias != args:
+					print('Hint: {0} are all equivalent.'.format(equal_alias))
+			else:
+				print('No alias \"{0}\" found.'.format(args))
+		else:
+			print('Syntax error, command not understood.')
+
+	def command_os(self, args):
+		if args == '':
+			print('please specify os command')
+		else:
+			os.system(args)
+
+	def execute(self, string):
+		command, args = self.parse(string)
+		if command is not "":
+			self.handle_command(command, args)
+
+	def __call__(self, args):
+		if args != '':
+			self.execute(args)
+		else:
+			self.exit_flag=False
+			self.interactive()
+
+	def interactive(self):
+		while not self.exit_flag:
+			try:
+				input_str = input(self.get_hint())
+				self.execute(input_str)
+			except(KeyboardInterrupt):
+				print('')
+
+
+if __name__ == '__main__':
+	con = console()
+	con_sub = console()
+	con_sub_sub = console()
+	con_sub.regist('test_subsubcommand', con_sub_sub, alias='tss', help_info='A sub command.')
+	con.regist('test_subcommand', con_sub, alias='ts', help_info='A sub command.')
+	con.interactive()
\ No newline at end of file
diff --git a/lib/parallel.py b/lib/parallel.py
new file mode 100644
index 0000000..1b1fe74
--- /dev/null
+++ b/lib/parallel.py
@@ -0,0 +1,127 @@
+import threading
+import queue
+import time
+
+class Job():
+    def __init__(self, func, args=[], kwargs={}, name=None):
+        if name == None:
+            name = 'job'
+        self.id = None
+        self.name = name
+        self.func = func
+        self.args = args
+        self.kwargs = kwargs
+        self.results = None
+
+    def run(self):
+        self.results = self.func(*self.args, **self.kwargs)
+
+    def set_name(self, name):
+        self.name = name
+
+    def set_id(self, jid):
+        self.id = jid
+
+    def __call__(self):
+        self.run()
+
+class Worker(threading.Thread):
+    def __init__(self, work_queue, finished_queue):
+        super(Worker, self).__init__()
+        self.queue = work_queue
+        self.finished = finished_queue
+        self.terminate = False
+        self.daemon=True
+
+    def stop(self):
+        self.terminate = True
+
+    def run(self):
+        while not self.terminate:
+            try:
+                task = self.queue.get(timeout=1)
+                task.run()
+                self.queue.task_done()
+                self.finished.put(task)
+            except queue.Empty:
+                pass
+            except KeyboardInterrupt:
+                print("you stop the threading")
+
+class ParallelHost():
+    def __init__(self, num_threads=8):
+        self.num_threads = num_threads
+        self.workers = []
+        self.tasks = queue.Queue()
+        self.results = queue.Queue()
+        self.rets = {}
+        self.id = 0
+        for i in range(self.num_threads):
+            worker = Worker(self.tasks, self.results)
+            self.workers.append(worker)
+        for worker in self.workers:
+            worker.start()
+
+    def __del__(self):
+        self.stop('kill')
+
+    # soft stop: wait until all job done
+    # hard stop: stop even with unfinished job
+    # kill stop: whatever the thread is doing, exit.
+    def stop(self, mode='soft'):
+        print('Trying to stop.')
+        if mode == 'soft':
+            self.tasks.join()
+            print('All job finished.')
+        for worker in self.workers:
+            worker.stop()
+        if mode == 'kill':
+            worker.join(0.01)
+
+    def commit(self, job):
+        self.id += 1
+        job.set_id(self.id)
+        self.tasks.put(job)
+        return self.id
+
+    def add_job(self, func, args=[], kwargs={}, name=None):
+        job = Job(func, args, kwargs, name)
+        return self.commit(job)
+
+    def collect_all(self):
+        while not self.results.empty():
+            task = self.results.get()
+            jid = task.id
+            self.rets[jid] = task.results
+
+    def get_result(self, jid, block=False):
+        if jid in self.rets:
+            ret = self.rets[jid]
+            del self.rets[jid]
+            return ret
+        while True:
+            if self.results.empty() and not block:
+                break
+            task = self.results.get()
+            if task.jid == jid:
+                return task.results
+            else:
+                self.rets[task.jid] = task.results
+
+    def clear_results(self):
+        while not self.results.empty():
+            self.results.get()
+        self.rets = {}
+
+if __name__ == '__main__':
+    host = ParallelHost()
+
+    def loop_print(info, num):
+        for i in range(num):
+            print(info + ':' + str(i))
+            time.sleep(1)
+
+    for i in range(10):
+        host.add_job(loop_print, ["loop_print_{0}".format(i), 5])
+
+    host.terminate('kill')
diff --git a/lib/parser.py b/lib/parser.py
new file mode 100644
index 0000000..d6b1504
--- /dev/null
+++ b/lib/parser.py
@@ -0,0 +1,151 @@
+from html.parser import HTMLParser
+from . import utils
+
+def dict_to_arrtibute_string(attributes):
+    string = ''
+    for key in attributes:
+        string += key + '=\"{0}\";'.format(str(attributes[key]))
+    return string
+
+def attribute_string_to_dict(attrs):
+    attr_dict = {}
+    for attr in attrs:
+        attr_dict[attr[0]] = attr[1]
+    return attr_dict
+
+
+class dom_node():
+    def __init__(self, name = None, attributes = {}):
+        if name is not None:
+            self.name = name
+        else:
+            self.name = 'Node'
+
+        self.attributes = attributes
+        self.childs = []
+        self.data = None
+        self.father = None
+
+    def add_child(self, child):
+        if child is not None:
+            child.father = self
+            self.childs.append(child)
+
+    def to_string(self, prefix='', indent='    '):
+
+        string = prefix + '<' + self.name
+        if self.attributes:
+            string += ' ' + dict_to_arrtibute_string(self.attributes)
+        string += '>\n'
+
+        for child in self.childs:
+            string += child.to_string(prefix=prefix+indent, indent=indent)
+
+        if self.data is not None:
+            string += prefix + indent + self.data + '\n'
+
+        string += prefix + '</{0}>\n'.format(self.name)
+
+        return string
+
+
+    def has_child(self, name):
+        has = False
+        for child in self.childs:
+            if child.name  == name:
+                has = True
+                break;
+        return has
+
+    def search(self, name):
+        founded_node = []
+        if type(name) is list:
+            if self.name in name:
+                founded_node.append(self)
+        else:
+            if self.name == name:
+                founded_node.append(self)
+        for child in self.childs:
+            search_result = child.search(name)
+            founded_node += search_result
+        return founded_node
+
+def dict2dom(d, root_name='root'):
+	node = dom_node(root_name)
+	for key in d:
+		elem = d[key]
+		child_node = dom_node(name=str(key))
+		if type(elem) is dict:
+			child_node = dict2dom(elem, root_name=str(key))
+		elif type(elem) is list:
+			for subelem in elem:
+				if type(subelem) is dict:
+					sub_node = dict2dom(subelem, root_name='li')
+					child_node.add_child(sub_node)
+				else:
+					sub_node = dom_node('li')
+					sub_node.data = str(subelem)
+					child_node.add_child(sub_node)
+		else:
+			child_node.data = str(elem)
+		node.add_child(child_node)
+	return node
+
+# if a dom node has data only, then it's {'name':'data'}
+# if a dom node has childs, then it's {'name':{}}
+# if a dom node has data as well as childs, data will be ignored.
+# if a dom has multi child with same name, it will be stored as list.
+def dom2dict(dom, replace_li = True):
+	dictionary = {}
+	for child in dom.childs:
+		name = child.name
+		content = None
+		if len(child.childs) != 0:
+			content = dom2dict(child, replace_li)
+		else:
+			content = child.data
+			if content is None:
+				content = ''
+			content = utils.clean_text(content)
+		if name in dictionary:
+			if type(dictionary[name]) is not list:
+				previous = dictionary[name]
+				dictionary[name] = [previous, content]
+			else:
+				dictionary[name].append(content)
+		else:
+			dictionary[name] = content
+
+	if replace_li:
+		for key in dictionary:
+			item = dictionary[key]
+			if type(item) is dict:
+				li = None
+				if len(item.keys()) == 1:
+					for subkey in item:
+						if subkey == 'li':
+							li = item[subkey]
+				if li is not None:
+					dictionary[key] = li
+	return dictionary
+
+class simple_parser(HTMLParser):
+    def __init__(self):
+        super(simple_parser, self).__init__()
+        self.root = dom_node('root')
+        self.current_node = self.root
+
+    def handle_starttag(self, tag, attrs):
+        attrs_dict = attribute_string_to_dict(attrs)
+        this_node = dom_node(tag, attrs_dict)
+        self.current_node.add_child(this_node)
+        self.current_node = this_node
+
+    def handle_endtag(self, tag):
+        self.current_node = self.current_node.father
+
+    def handle_data(self, data):
+        if self.current_node.data is None:
+            self.current_node.data = data
+        else:
+            self.current_node.data += data
\ No newline at end of file
diff --git a/lib/screen.py b/lib/screen.py
new file mode 100644
index 0000000..63e6191
--- /dev/null
+++ b/lib/screen.py
@@ -0,0 +1,19 @@
+import sys
+
+class VirtualScreen():
+	def __init__(self, max_history=1000):
+		self.max_history = max_history
+		self.contents = []
+
+	def write(self, message):
+		self.contents.append(message)
+
+	def last(self, line=10, output=sys.stdout):
+		num_lines = len(self.contents)
+		start_line = num_lines - line
+		if start_line < 0:
+			start_line = 0
+		display = self.contents[start_line:]
+		for line in display:
+			output.write(line)
+		output.write('\n')
\ No newline at end of file
diff --git a/lib/service.py b/lib/service.py
new file mode 100644
index 0000000..3b5f4f8
--- /dev/null
+++ b/lib/service.py
@@ -0,0 +1,244 @@
+import time
+import sys
+import shlex
+import argparse
+
+from croniter import croniter
+from . import utils
+from . import parallel
+from . import console
+from . import screen
+from . import utils
+
+class service():
+    def __init__(self, action, args=[], kwargs={}, cron='* * * * *', managed_output=False, name='service'):
+        self.name = name
+        self.action = action
+        self.managed_output = managed_output
+        self.args = args
+        self.kwargs = kwargs
+        self.output = sys.stdout
+        self.last_result = None
+        self.cronexpr = cron
+        self.croniter = croniter(self.cronexpr, time.time())
+        self.next_time = self.croniter.get_next()
+
+    def run(self, daemon=None, dry=False):
+        if not dry:
+            self.next_time = self.croniter.get_next()
+
+        new_args = []
+        if self.managed_output:
+            new_args = [self.output, *self.args]
+        else:
+            new_args = self.args
+        if daemon is None:
+            self.last_result = self.action(*new_args, **self.kwargs)
+        else:
+            daemon.add_job(self.action, new_args, self.kwargs, self.name)
+
+class ServiceManager():
+    def __init__(self, debug=False, output=sys.stdout):
+        self.debug = debug
+        self.services = {}
+        self.deleted_services = {}
+        self.protected_service = []
+        self.daemon = parallel.ParallelHost()
+        self.sid = 0
+        self.terminate = False
+        self.output = output
+
+        self.set_refresh_time()
+
+    def stop(self):
+        self.daemon.stop()
+        self.terminate = True
+
+    def __del__(self):
+        self.stop()
+
+    def log(self, *args, end='\n'):
+        self.output.write('[{0}]'.format(utils.str_time()))
+        for arg in args:
+            arg = str(arg)
+            self.output.write(arg)
+        self.output.write(end)
+
+    def add(self, service, protected=False):
+        self.sid += 1
+        service.output = self.output
+        self.services[self.sid] = service
+        if protected:
+            self.protected_service.append(self.sid)
+        return self.sid
+
+    def delete(self, sid):
+        if sid in self.protected_service:
+            self.log('Can not delete protected service.')
+            return
+        if sid in self.services:
+            self.deleted_services[sid] = self.services[sid]
+            del self.services[sid]
+        else:
+            self.log('The sid [{0}] do not exist!'.format(sid))
+
+    def recover(self, sid):
+        if sid in self.deleted_services:
+            self.services[sid] = self.deleted_services[sid]
+            del self.deleted_services[sid]
+        else:
+            self.log('The sid [{0}] is not found recycle bin.'.format(sid))
+
+    def set_refresh_time(self, refresh_cron='* * * * *'):
+        def refresh():
+            pass
+        refresh_service = service(refresh, cron=refresh_cron, name='refresh')
+        self.add(refresh_service, protected = True)
+
+    def get_next(self):
+        next_sid = -1
+        next_time = -1
+        for sid in self.services:
+            service = self.services[sid]
+            if service.next_time < next_time or next_sid < 0:
+                next_sid = sid
+                next_time = service.next_time
+        return next_sid, next_time
+
+    def loop(self):
+        while not self.terminate:
+            next_sid, next_time = self.get_next()
+            service = self.services[next_sid]
+            sleep_time = next_time - time.time()
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+            self.log('Running service {0} (SID={1})'.format(service.name, next_sid))
+            if next_sid in self.services:
+                service.run(self.daemon)
+            else:
+                self.log('the sheduled service wiil not run since it is canceled.')
+
+
+    # mode: background: return immidietly
+    #       foreground: stuck here.
+    def start(self, mode='background'):
+        if mode == 'background':
+            self.daemon.add_job(self.loop, name='service main loop')
+        else:
+            self.loop()
+
+def get_service_console(manager, name='service'):
+
+    con = console.console(name)
+
+    def command_show(args):
+        print('Active services:')
+        for sid in manager.services:
+            print('SID: {0} | Name: {1}'.format(sid, manager.services[sid].name))
+        print('Deleted services:')
+        for sid in manager.deleted_services:
+            print('SID: {0} | Name: {1}'.format(sid, manager.deleted_services[sid].name))
+
+    def command_add(args):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('cron', type=str, help='A cron expr')
+        parser.add_argument('task', type=str, help='task to run, should be a valid command')
+        parser.add_argument('--name', '-n', type=str, default='command service', help='name of the task')
+        args = shlex.split(args)
+        args = parser.parse_args(args)
+        cron = args.cron
+        if not croniter.is_valid(cron):
+            print('Invalid cron expression.')
+        task = args.task
+        name = args.name
+        service_to_add = service(con.execute, args=[task], cron=cron, name=name)
+        manager.add(service_to_add)
+
+    def command_delete(args):
+        sid = None
+        if args.isdigit():
+            if int(args) in manager.services:
+                sid = int(args)
+        if sid is not None:
+            manager.delete(sid)
+        else:
+            print('command arugment \"{0}\" is not understood.'.format(args))
+
+    def command_recover(args):
+        sid = None
+        if args.isdigit():
+            if int(args) in manager.deleted_services:
+                sid = int(args)
+        if sid is not None:
+            manager.recover(sid)
+        else:
+            print('command arugment \"{0}\" is not understood.'.format(args))
+
+    def command_run(args):
+        sid = None
+        if args.isdigit():
+            if int(args) in manager.services:
+                sid = int(args)
+        if sid is not None:
+            manager.services[sid].run(dry=True)
+        else:
+            print('command arugment \"{0}\" is not understood.'.format(args))
+
+    def command_info(args):
+        line = None
+        if args != '':
+            if args.isdigit():
+                line = int(args)
+        if line is None:
+            line = 10
+        manager.output.last(line)
+
+    def command_next(args):
+        next_sid, next_time = manager.get_next()
+        info = ''
+        indent = '    '
+        info += 'Next Job: {0}'.format(manager.services[next_sid].name)
+        info += '\n{0}SID: {1}'.format(indent, next_sid)
+        info += '\n{0}Scheduled Running Time: {1}'.format(indent, utils.time2str(next_time))
+        info += '\n{0}Remeaning Time: {1}s'.format(indent, utils.float2str(next_time-time.time()))
+        print(info)
+
+    con.regist('show', command_show, help_info='Show all services.', alias=['ls'])
+    con.regist('run', command_run, help_info='Run a service.')
+    con.regist('info', command_info, help_info='Display service output log.')
+    con.regist('next', command_next, help_info='Next job to run.')
+    con.regist('add', command_add, help_info='Register a command as service.')
+    con.regist('delete', command_delete, help_info='Delete a service', alias=['del'])
+    con.regist('recover', command_recover, help_info='Recover a service.')
+    return con
+
+
+if __name__ == '__main__':
+    def func1(output):
+        output.write('func1')
+
+    def func2(output):
+        output.write('func2')
+
+    def add(a, b):
+        print('{0} + {1} = {2}'.format(a, b, a+b))
+
+    def command_add(args):
+        numbers = args.split(' ')
+        a = float(numbers[0])
+        b = float(numbers[1])
+        add(a, b)
+
+    log_screen = screen.VirtualScreen()
+    manager = ServiceManager(output=log_screen)
+    test1 = service(func1, cron='* * * * *', name='test1', managed_output=True)
+    test2 = service(func2, cron='* * * * *', name='test2', managed_output=True)
+    manager.add(test1)
+    manager.add(test2)
+    manager.start('background')
+
+    con = get_service_console(manager)
+    master = console.console()
+    master.regist('service', con, help_info='service console')
+    master.regist('add', command_add, help_info='Add two numbers.')
+    master.interactive()
diff --git a/lib/try.py b/lib/try.py
new file mode 100644
index 0000000..64c44b3
--- /dev/null
+++ b/lib/try.py
@@ -0,0 +1,15 @@
+def func(a, b, c, time=0, work=1):
+	print('a:{0} b:{1} c:{2}'.format(a, b, c))
+	print('time:{0} work:{1}'.format(time, work))
+
+def funcwrap(func, kargs, kkargs):
+	func(*kargs, **kkargs)
+
+
+kargs = [1, 2, 3]
+kkargs = {
+	"time":1234,
+	"work":1232
+}
+
+funcwrap(func, kargs, kkargs)
\ No newline at end of file
diff --git a/lib/utils.py b/lib/utils.py
new file mode 100644
index 0000000..733075b
--- /dev/null
+++ b/lib/utils.py
@@ -0,0 +1,139 @@
+import pickle
+import time
+import os
+import re
+import platform
+
+def detect_platform():
+    p = 'Unknown'
+    if platform.platform().find('Windows') != -1:
+        p = 'Windows'
+    elif platform.platform().find('Linux') != -1:
+        p = 'Linux'
+    return p
+
+def ensure_dir_exist(directory, show_info = True):
+    exist = os.path.isdir(directory)
+    if not exist:
+        print('directory', directory, ' not found, creating...')
+        os.mkdir(directory)
+
+def validateTitle(title):
+    rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
+    new_title = re.sub(rstr, " ", title)  # 替换为空格
+    return new_title
+
+def list2csv(l):
+    csv = ''
+    for item in l:
+        csv += str(item) + ','
+    csv = csv[:-1]
+    return csv
+
+def clean_text(string):
+    if string is None:
+        return ''
+    while '\n' in string:
+        string = string.replace('\n', ' ')
+    splits = clean_split(string)
+    string = ''
+    for split in splits:
+        string += split + ' '
+    string = string[:-1]
+    return string
+
+def clean_split(string, delimiter=' '):
+    sub_strs = string.split(delimiter)
+    splits = []
+    for sub_str in sub_strs:
+        if sub_str is not '':
+            splits.append(sub_str)
+    return splits
+
+def remove_blank_in_endpoint(string):
+    length = len(string)
+
+    first_index = 0
+    for i in range(length):
+        if is_blank(string[first_index]):
+            first_index += 1
+        else:
+            break
+
+    last_index = length - 1
+    for i in range(length):
+        if is_blank(string[last_index]):
+            last_index -= 1
+        else:
+            break
+    last_index += 1
+    return string[first_index:last_index]
+
+def is_blank(ch):
+    blank_ch = [' ', '\t', '\n']
+    if ch in blank_ch:
+        return True
+    else:
+        return False
+
+def dict_to_arrtibute_string(attributes):
+    string = ''
+    for key in attributes:
+        string += key + '=\"{0}\";'.format(str(attributes[key]))
+    return string
+
+def attribute_string_to_dict(attrs):
+    attr_dict = {}
+    for attr in attrs:
+        attr_dict[attr[0]] = attr[1]
+    return attr_dict
+
+def save_python_object(obj, save_path):
+    with open(save_path, 'wb') as file:
+        pickle.dump(obj, file)
+
+def load_python_object(path):
+    with open(path, 'rb') as file:
+            return pickle.load(file)
+
+def delete_n(string):
+    while '\n' in string:
+        string = string.replace('\n', ' ')
+    return string
+
+def remove_additional_blank(string):
+    words = string.split(' ')
+    string = ''
+    for word in words:
+        if word is not '':
+            string += word + ' '
+    return string[:-1]
+
+def formal_text(text):
+    text = delete_n(text)
+    text = remove_additional_blank(text)
+    return text
+
+def float2str(f, precision=2):
+    f = str(f)
+    f_base = f[:f.find('.') + precision]
+    return f_base
+
+# ========== time realted operation ========== #
+
+def str_day():
+    day = time.strftime("%Y-%m-%d", time.localtime())
+    return day
+
+def time2str(t):
+    localtime = time.localtime(int(t))
+    return str_time(localtime)
+
+def str_time(local_time = None):
+    if local_time is None:
+        local_time = time.localtime()
+    day = time.strftime("%Y-%m-%d-%Hh-%Mm-%Ss)", local_time)
+    return day
+
+if __name__ == '__main__':
+    print(str_day())
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..f057d0c
--- /dev/null
+++ b/main.py
@@ -0,0 +1,40 @@
+import arxiv_bot
+import email_sender
+import subscriber_utils
+import feeds
+from lib import utils
+import os
+from lib import service
+from lib.console import console
+from lib import screen
+
+subscribe_manager = subscriber_utils.subscribe_manager()
+# subscribe_manager.load()
+
+bot = arxiv_bot.arxiv_bot(subscribe_manager.get_subscribed_topics())
+feeds_generator = feeds.feed_manager(subscribe_manager, bot)
+emailer = email_sender.arxiv_emailer(bot, feeds_generator, debug=False)
+
+log_screen = screen.VirtualScreen()
+manager = service.ServiceManager(output=log_screen)
+
+daily_mail_service = service.service(
+	emailer.send_daily_email,
+	cron='0 4 * * 1-5',
+	name = 'send daily email'
+)
+manager.add(daily_mail_service)
+
+shell = console('ArxivBot')
+def command_load(args):
+	if args == 'subscriber':
+		subscribe_manager.load()
+
+shell.regist('load', command_load, help_info='load config. (only subscriber supported till now)')
+service_shell = service.get_service_console(manager, 'ServiceManager')
+shell.regist('service', service_shell, help_info='service mamager')
+
+# cron time:
+# min hour day month week
+manager.start()
+shell.interactive()
diff --git a/subscriber_utils.py b/subscriber_utils.py
new file mode 100644
index 0000000..3e12fb7
--- /dev/null
+++ b/subscriber_utils.py
@@ -0,0 +1,92 @@
+from lib.parser import dom_node, simple_parser
+
+
+class subscribe_manager():
+    def __init__(self, subscriber_config = './config/subscriber.xml'):
+        self.subscriber_config = None
+        self.subscribers = {}
+        if subscriber_config is not None:
+            self.subscriber_config  = subscriber_config
+            self.load()
+
+    def show(self):
+        if self.subscribers is None:
+            print('No subscriber found!')
+        else:
+            for name in self.subscribers:
+                print('Name:', name, 'Email:', self.subscribers[name]['email'])
+
+    def load(self, path=None):
+        if path is None:
+            path = self.subscriber_config
+        if path is None:
+            return None
+        tree = None
+        with open(path, 'r') as f:
+            xml = f.read()
+            parser = simple_parser()
+            parser.feed(xml)
+            tree = parser.root
+        subscribers = {}
+        if tree is not None:
+            for person in tree.childs:
+                person_name = None
+                person_email = None
+                person_topics = []
+                person_keywords = []
+                for item in person.childs:
+                    if item.name == 'name':
+                        person_name = item.data
+                    elif item.name == 'email':
+                        person_email = item.data
+                    elif item.name == 'topics':
+                        for topic in item.childs:
+                            if topic.name == 'topic':
+                                person_topics.append(topic.data)
+                    elif item.name == 'keywords':
+                        for keyword in item.childs:
+                            if keyword.name == 'keyword':
+                                person_keywords.append(keyword.data)
+                if person_name is not None and person_email is not None and person_topics is not None:
+                    subscriber = {}
+                    subscriber['keywords'] = person_keywords
+                    subscriber['email'] = person_email
+                    subscriber['topics'] = person_topics
+                    subscribers[person_name] = subscriber
+        self.subscribers = subscribers
+        print('Subscriber load success! All subscribers are shown below:')
+        self.show();
+
+    def get_subscribed_topics(self):
+        topics = []
+        for name in self.subscribers:
+            subscriber = self.subscribers[name]
+            topics += subscriber['topics']
+        topics = set(topics)
+        return topics
+
+    def get_subscribed_keywords(self):
+        keywords = []
+        for name in self.subscribers:
+            keywords += self.subscribers[name]['keywords']
+        keywords = set(keywords)
+        return keywords
+
+    def get_keywords_of_topics(self):
+        keywords_of_topics = {}
+        for name in self.subscribers:
+            subscriber = self.subscribers[name]
+            topic_group = subscriber['topics']
+            for topic in topic_group:
+                if topic not in keywords_of_topics:
+                    keywords_of_topics[topic] = []
+                keywords_of_topics[topic] += subscriber['keywords']
+        return keywords_of_topics
+
+
+if __name__ == '__main__':
+    manager = subscribe_manager()
+    print(manager.subscribers)
+    print(manager.get_subscribed_topics())
+    print(manager.get_subscribed_keywords())
+    print(manager.get_keywords_of_topics())
\ No newline at end of file
diff --git a/try.py b/try.py
new file mode 100644
index 0000000..809cc6e
--- /dev/null
+++ b/try.py
@@ -0,0 +1,21 @@
+import arxiv_service
+import time
+
+now = arxiv_service.cron_time(time.localtime(time.time()))
+# now.show()
+# while True:
+# 	now.next_day()
+# 	now.show()
+
+# running time
+# minute hour day month week year
+# * means always.
+# a-b means from a to b (a and b included)
+# a means run at this time.
+# must match all to execute a command.
+
+schedule = arxiv_service.cron_expr('0 0 29 2 * *')
+for i in range(10):
+	now = schedule.next_run(now)
+	now.show()
+	print(now.to_struct_time())
\ No newline at end of file