преди 4 години · 99be2de6a3
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+__pycache__/
			
 
				+output/
			
 
				+temp/
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,8 @@
 
				+# 自考题库转epub电子书
			
 
				+## 使用方法
			
 
				+- 修改zktools.py数据库连接信息
			
 
				+- pip install -r requirements.txt
			
 
				+- 启动 python index.py
			
 
				+## TODO
			
 
				+- 完善错误捕获
			
 
				+- 优化部分硬编码
			
--- a/epub.py
+++ b/epub.py
@@ -0,0 +1,68 @@
 
				+import base64
			
 
				+import hashlib
			
 
				+import time
			
 
				+import re
			
 
				+import requests
			
 
				+import zipfile
			
 
				+import os
			
 
				+from filetools import FileTools
			
 
				+from template import Template
			
 
				+
			
 
				+file_tools= FileTools()
			
 
				+template=Template(file_tools.get_ab_path('templates'))
			
 
				+
			
 
				+class Epub:
			
 
				+    id = ""
			
 
				+    name = ""
			
 
				+    temp_dir = "" # 相对路径
			
 
				+
			
 
				+    def __init__(self, name):
			
 
				+        self.name=name
			
 
				+        self.id = str(base64.b64encode(bytes(hashlib.md5(base64.b64encode(
			
 
				+            bytes(name, encoding='utf-8'))).hexdigest(), encoding='utf-8'))[-10:-1], encoding='utf-8')
			
 
				+        self.temp_dir='temp/%s%s' % (int(time.time()), self.id)
			
 
				+        file_tools.mkdir(self.temp_dir)
			
 
				+        self.write_base_file()
			
 
				+
			
 
				+    def get_path(self, file):
			
 
				+        return "%s/%s" % (self.temp_dir, file)
			
 
				+    
			
 
				+    def write_base_file(self):
			
 
				+        file_tools.mkdir(self.get_path('META-INF'))
			
 
				+        file_tools.mkdir(self.get_path('OEBPF'))
			
 
				+        file_tools.mkdir(self.get_path('OEBPF/content'))
			
 
				+        file_tools.mkdir(self.get_path('OEBPF/css'))
			
 
				+        file_tools.mkdir(self.get_path('OEBPF/images'))
			
 
				+        file='mimetype'; file_tools.write(self.get_path(file), template.get(file))
			
 
				+        file='META-INF/container.xml'; file_tools.write(self.get_path(file), template.get(file))
			
 
				+        file='OEBPF/css/ebook.css'; file_tools.write(self.get_path(file), template.get(file))
			
 
				+        file='OEBPF/css/ebook.css'; file_tools.write(self.get_path(file), template.get(file))
			
 
				+        file='OEBPF/ebook.opf'; file_tools.write(self.get_path(file), template.get(file, name=self.name, id=self.id))
			
 
				+        file='OEBPF/navigation.ncx'; file_tools.write(self.get_path(file), template.get(file, name=self.name, id=self.id))
			
 
				+
			
 
				+    def image_process(self, text):
			
 
				+        pattern = re.compile(r'https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/\d+/images/\d+.[a-z]{3}')
			
 
				+        res=pattern.findall(text)
			
 
				+        if res:
			
 
				+            header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
			
 
				+                        'Chrome/63.0.3239.132 Safari/537.36'}
			
 
				+            for image in res:
			
 
				+                temp_file=image.replace('/images/','_').replace('https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/','%s/OEBPF/images/' % self.temp_dir)
			
 
				+                pic=requests.get(image, headers=header)
			
 
				+                print(file_tools.write_b(temp_file, pic.content))
			
 
				+        return text.replace('/images/','_').replace('https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/','../images/')
			
 
				+
			
 
				+    def process(self, text):
			
 
				+        cwd=os.getcwd()
			
 
				+        text = self.image_process(text)
			
 
				+        file='OEBPF/content/s1.xhtml'; file_tools.write(self.get_path(file), template.get(file, title=self.name, data=text))
			
 
				+        z=zipfile.ZipFile(file_tools.get_ab_path("output/%s.epub"%self.name), 'w', zipfile.ZIP_STORED)
			
 
				+        os.chdir(file_tools.get_ab_path(self.temp_dir))
			
 
				+        for dirpath, dirnames, filenames in os.walk('./'):
			
 
				+            for filename in filenames:
			
 
				+                z.write(os.path.join(dirpath, filename))
			
 
				+        os.chdir(cwd)
			
 
				+        
			
 
				+    def clean(self):
			
 
				+        file_tools.rm(self.temp_dir)
			
 
				+
			
--- a/filetools.py
+++ b/filetools.py
@@ -0,0 +1,44 @@
 
				+import os
			
 
				+import shutil
			
 
				+script_path = os.path.dirname(os.path.realpath(__file__))
			
 
				+
			
 
				+class FileTools:
			
 
				+    def mkdir(self, dir):
			
 
				+        real = self.get_ab_path(dir)
			
 
				+        if not os.path.exists(real):
			
 
				+            try:
			
 
				+                os.mkdir(real)
			
 
				+                return True
			
 
				+            except:
			
 
				+                return False
			
 
				+
			
 
				+    def write(self, file, content):
			
 
				+        try:
			
 
				+            with open(self.get_ab_path(file), 'w') as f:
			
 
				+                f.write(content)
			
 
				+            return True
			
 
				+        except:
			
 
				+            return False
			
 
				+
			
 
				+    def write_b(self, file, content):
			
 
				+        try:
			
 
				+            print(file)
			
 
				+            with open(self.get_ab_path(file), 'wb') as f:
			
 
				+                f.write(content)
			
 
				+            return True
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+    def rm(self, dir):
			
 
				+        full=self.get_ab_path(dir)
			
 
				+        try:
			
 
				+            shutil.rmtree(path=full)
			
 
				+            return True
			
 
				+        except:
			
 
				+            return False
			
 
				+
			
 
				+    def get_ab_path(self,path):
			
 
				+        return "%s/%s" % (script_path, path)
			
 
				+
			
--- a/index.py
+++ b/index.py
@@ -0,0 +1,26 @@
 
				+from epub import Epub
			
 
				+from filetools import FileTools
			
 
				+from zktools import ZkTools
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    file = FileTools()
			
 
				+    file.mkdir("temp")
			
 
				+    file.mkdir("output")
			
 
				+    zk=ZkTools()
			
 
				+    course = zk.get_courses()
			
 
				+    data={}
			
 
				+    for i in course:
			
 
				+        course_name = i['name']
			
 
				+        data[i['name']]=[]
			
 
				+        questions = zk.get_questions(i['id'])
			
 
				+        for j in questions:
			
 
				+            data[i['name']].append({"name":j['name'], "data":zk.get_content(j['id'])})
			
 
				+
			
 
				+    for i in data:
			
 
				+        questions=data[i]
			
 
				+        for question_index in range(0, len(questions)):
			
 
				+            question=questions[question_index]
			
 
				+            epub = Epub("百日题库-%s-%02d-%s" % (i.split(' ')[1], question_index+1, question['name'].replace('/', '_')))
			
 
				+            epub.process(zk.format(question['data']))
			
 
				+            epub.clean()
			
 
				+
			
--- a/jsonencoder.py
+++ b/jsonencoder.py
@@ -0,0 +1,12 @@
 
				+import datetime
			
 
				+import json
			
 
				+
			
 
				+class JsonEncoder(json.JSONEncoder):
			
 
				+    def default(self, obj):
			
 
				+        if isinstance(obj, datetime.datetime):
			
 
				+            return obj.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        elif isinstance(obj, bytes):
			
 
				+            return str(obj, 'utf-8')
			
 
				+        else:
			
 
				+            return json.JSONEncoder.default(self, obj)
			
 
				+
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
 
				+pymysql
			
 
				+jinja2
			
--- a/template.py
+++ b/template.py
@@ -0,0 +1,16 @@
 
				+from jinja2 import FileSystemLoader, Environment
			
 
				+
			
 
				+class Template:
			
 
				+    dir = ""
			
 
				+    env = None
			
 
				+
			
 
				+    def __init__(self, template_dir):
			
 
				+        self.dir = template_dir
			
 
				+        self.env = Environment(loader=FileSystemLoader(template_dir))
			
 
				+
			
 
				+    def get(self, file,  **kwargs):
			
 
				+        try:
			
 
				+            t = self.env.get_template(file)
			
 
				+            return t.render(**kwargs)
			
 
				+        except:
			
 
				+            return False
			
--- a/templates/META-INF/container.xml
+++ b/templates/META-INF/container.xml
@@ -0,0 +1,6 @@
 
				+<?xml version='1.0' encoding='UTF-8' ?>
			
 
				+<container version='1.0' xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>
			
 
				+  <rootfiles>
			
 
				+    <rootfile full-path='OEBPF/ebook.opf' media-type='application/oebps-package+xml'/>
			
 
				+  </rootfiles>
			
 
				+</container>
			
--- a/templates/OEBPF/content/s1.xhtml
+++ b/templates/OEBPF/content/s1.xhtml
@@ -0,0 +1,25 @@
 
				+<?xml version='1.0' encoding='utf-8'?>
			
 
				+<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'>
			
 
				+<html xmlns='http://www.w3.org/1999/xhtml'>
			
 
				+  <head profile='http://dublincore.org/documents/dcmi-terms/'>
			
 
				+    <meta http-equiv='Content-Type' content='text/html;' />
			
 
				+    <title>{{title}}</title>
			
 
				+    <meta name='DCTERMS.title' content='{{title}}' />
			
 
				+    <meta name='DCTERMS.language' content='zh' scheme='DCTERMS.RFC4646' />
			
 
				+    <meta name='DCTERMS.source' content='MFW' />
			
 
				+    <meta name='DCTERMS.issued' content='{$issued}' scheme='DCTERMS.W3CDTF'/>
			
 
				+    <meta name='DCTERMS.creator' content='iaun'/>
			
 
				+    <meta name='DCTERMS.contributor' content='' />
			
 
				+    <meta name='DCTERMS.modified' content='{$issued}' scheme='DCTERMS.W3CDTF'/>
			
 
				+    <meta name='DCTERMS.provenance' content='' />
			
 
				+    <meta name='DCTERMS.subject' content='Unknown' />
			
 
				+    <link rel='schema.DC' href='http://purl.org/dc/elements/1.1/' hreflang='zh' />
			
 
				+    <link rel='schema.DCTERMS' href='http://purl.org/dc/terms/' hreflang='zh' />
			
 
				+    <link rel='schema.DCTYPE' href='http://purl.org/dc/dcmitype/' hreflang='zh' />
			
 
				+    <link rel='schema.DCAM' href='http://purl.org/dc/dcam/' hreflang='zh' />
			
 
				+    <link rel='stylesheet' type='text/css' href='../css/ebook.css' />
			
 
				+  </head>
			
 
				+  <body>
			
 
				+    {{data}}
			
 
				+  </body>
			
 
				+</html>
			
--- a/templates/OEBPF/css/ebook.css
+++ b/templates/OEBPF/css/ebook.css
@@ -0,0 +1,33 @@
 
				+body {
			
 
				+    font-size: medium;
			
 
				+}
			
 
				+
			
 
				+blockquote {
			
 
				+    font-style: italic;
			
 
				+    border-left: 3px solid black;
			
 
				+    margin-left: 0px;
			
 
				+    padding-left: 10px;
			
 
				+}
			
 
				+
			
 
				+code {
			
 
				+    font-family: monospace;
			
 
				+    word-wrap: break-word;
			
 
				+}
			
 
				+
			
 
				+p {
			
 
				+    text-indent: 1em;
			
 
				+    white-space: pre-line;
			
 
				+}
			
 
				+
			
 
				+pre > code {
			
 
				+    line-height: 1.5;
			
 
				+}
			
 
				+
			
 
				+pre {
			
 
				+    border-left: 3px solid black;
			
 
				+    background-color: rgb(240, 240, 240);
			
 
				+    padding-left: 10px;
			
 
				+    text-align: left;
			
 
				+    white-space: pre-wrap;
			
 
				+    font-size: 75%;
			
 
				+}
			
--- a/templates/OEBPF/ebook.opf
+++ b/templates/OEBPF/ebook.opf
@@ -0,0 +1,25 @@
 
				+<?xml version='1.0' encoding='utf-8'?>
			
 
				+<package xmlns='http://www.idpf.org/2007/opf' version='2.0' unique-identifier='BookId'>
			
 
				+	<metadata xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:opf='http://www.idpf.org/2007/opf'>
			
 
				+		<dc:title>{{name}}</dc:title>
			
 
				+		<dc:identifier id='BookId' opf:scheme='URI'>{{id}}</dc:identifier>
			
 
				+		<dc:language>zh</dc:language>
			
 
				+		<dc:creator opf:role='aut' opf:file-as=''>iaun</dc:creator>
			
 
				+		<dc:publisher>iaun</dc:publisher>
			
 
				+		<dc:description>iaun</dc:description>
			
 
				+		<dc:coverage></dc:coverage>
			
 
				+		<dc:source></dc:source>
			
 
				+		<dc:date opf:event='publication'>2022</dc:date>
			
 
				+		<dc:date opf:event='modification'>2022-01-01</dc:date>
			
 
				+		<dc:rights></dc:rights>
			
 
				+		<dc:subject>Unknown</dc:subject>
			
 
				+	</metadata>
			
 
				+	<manifest>
			
 
				+		<item id='ncx' media-type='application/x-dtbncx+xml' href='navigation.ncx'/>
			
 
				+		<item id='s1' media-type='application/xhtml+xml' href='content/s1.xhtml'/>
			
 
				+		<item id='css' media-type='text/css' href='css/ebook.css'/>
			
 
				+	</manifest>
			
 
				+	<spine toc='navigation'>
			
 
				+		<itemref idref='s1' />
			
 
				+	</spine>
			
 
				+</package>
			
--- a/templates/OEBPF/navigation.ncx
+++ b/templates/OEBPF/navigation.ncx
@@ -0,0 +1,18 @@
 
				+<?xml version='1.0' encoding='UTF-8'?>
			
 
				+<!DOCTYPE ncx PUBLIC '-//NISO//DTD ncx 2005-1//EN' 'http://www.daisy.org/z3986/2005/ncx-2005-1.dtd'>
			
 
				+<ncx xmlns='http://www.daisy.org/z3986/2005/ncx/'>
			
 
				+<head>
			
 
				+  <meta name='dtb:uid' content='{{id}}'/>
			
 
				+  <meta name='dtb:depth' content='1'/>
			
 
				+  <meta name='dtb:totalPageCount' content='0'/>
			
 
				+  <meta name='dtb:maxPageNumber' content='0'/>
			
 
				+</head>
			
 
				+<docTitle><text>{{name}}</text></docTitle>
			
 
				+<docAuthor><text>iaun</text></docAuthor>
			
 
				+<navMap>
			
 
				+  <navPoint class='section' id='s1' playOrder='1'>
			
 
				+    <navLabel><text>{{name}}</text></navLabel>
			
 
				+    <content src='content/s1.xhtml'/>
			
 
				+  </navPoint>
			
 
				+</navMap>
			
 
				+</ncx>
			
--- a/templates/mimetype
+++ b/templates/mimetype
@@ -0,0 +1 @@
 
				+application/epub+zip
			
--- a/zktools.py
+++ b/zktools.py
@@ -0,0 +1,81 @@
 
				+import pymysql
			
 
				+import json
			
 
				+# from jsonencoder import JsonEncoder
			
 
				+
			
 
				+
			
 
				+class ZkTools():
			
 
				+    db=None
			
 
				+    def __init__(self):
			
 
				+        self.db = pymysql.connect(host="localhost", user="user",
			
 
				+                     password="password", db="zk")
			
 
				+    
			
 
				+    def get_courses(self):
			
 
				+        cursor=self.db.cursor()
			
 
				+        sql="select `data` from `course`"
			
 
				+        cursor.execute(sql)
			
 
				+        data=cursor.fetchone()[0]
			
 
				+        data=json.loads(data)
			
 
				+        self.db.commit()
			
 
				+        res=[]
			
 
				+        for i in data['results']:
			
 
				+            res.append({'id':i['id'], 'name':i['name']})
			
 
				+        return res
			
 
				+
			
 
				+    def get_questions(self, course_id):
			
 
				+        cursor=self.db.cursor()
			
 
				+        sql="select `data` from `question_list` where `course_id` = %s"
			
 
				+        cursor.execute(sql, course_id)
			
 
				+        data=cursor.fetchone()[0]
			
 
				+        data=json.loads(data)
			
 
				+        self.db.commit()
			
 
				+        res=[]
			
 
				+        for i in data['results']:
			
 
				+            res.append({'id':i['id'], 'name':i['title']})
			
 
				+        return res
			
 
				+
			
 
				+    def get_content(self, question_id):
			
 
				+        cursor=self.db.cursor()
			
 
				+        sql="select `content_object` from `question_content_object` where `question_id` = %s"
			
 
				+        cursor.execute(sql, question_id)
			
 
				+        data=cursor.fetchone()[0]
			
 
				+        data=json.loads(data)
			
 
				+        self.db.commit()
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+    def format(self, data):
			
 
				+        # data=data["data"]
			
 
				+        temp="<h1>%s</h1>\n" % data['title']
			
 
				+        # memo1
			
 
				+        if data["memo"] != "":
			
 
				+            temp+="<p>%s</p>\n" % data["memo"]
			
 
				+        # groups
			
 
				+        temp+="<div>\n"
			
 
				+        
			
 
				+        for group_index in range(0, len(data['groups'])):
			
 
				+            group=data['groups'][group_index]
			
 
				+            temp+="<h2>%s</h2>\n" % group['title']
			
 
				+            if group['memo'] != "":
			
 
				+                temp+="<p>%s</p>\n" % group['memo']
			
 
				+            # questions
			
 
				+            temp+="<div>\n"
			
 
				+            for question_index in range(0, len(group['questions'])):
			
 
				+                question=group['questions'][question_index]
			
 
				+                temp+="<p style=\"font-weight:bold;\">%s. %s</p>\n" %(question_index+1, question['title'])
			
 
				+                temp+="<ul>\n"
			
 
				+                for option in question['options']:
			
 
				+                    temp+="<li>%s. %s</li>\n" % (option['value'], option['text'])
			
 
				+                temp+="</ul>\n" 
			
 
				+                for answer_index in range(0,len(question['answer'])):
			
 
				+                    answer=question['answer'][answer_index]
			
 
				+                    if type(answer) == list:
			
 
				+                        question['answer'][answer_index]="；".join(question['answer'][answer_index])
			
 
				+                temp+="<p><b>答案：</b>%s</p>\n" % ("；".join(question['answer']))
			
 
				+                if 'explanation' in question:
			
 
				+                    temp+="<p><b>题解：</b>%s</p>\n" % (question['explanation'])
			
 
				+                temp+= "<hr></hr>\n" if question_index < len(group['questions'])-1 else ""
			
 
				+            temp+="</div>\n"
			
 
				+        temp+="</div>\n"
			
 
				+        return temp
			
 
				+
			
 
				+