iaun преди 4 години
ревизия
99be2de6a3
променени са 15 файла, в които са добавени 368 реда и са изтрити 0 реда
  1. 3 0
      .gitignore
  2. 8 0
      README.md
  3. 68 0
      epub.py
  4. 44 0
      filetools.py
  5. 26 0
      index.py
  6. 12 0
      jsonencoder.py
  7. 2 0
      requirements.txt
  8. 16 0
      template.py
  9. 6 0
      templates/META-INF/container.xml
  10. 25 0
      templates/OEBPF/content/s1.xhtml
  11. 33 0
      templates/OEBPF/css/ebook.css
  12. 25 0
      templates/OEBPF/ebook.opf
  13. 18 0
      templates/OEBPF/navigation.ncx
  14. 1 0
      templates/mimetype
  15. 81 0
      zktools.py

+ 3 - 0
.gitignore

@@ -0,0 +1,3 @@
+__pycache__/
+output/
+temp/

+ 8 - 0
README.md

@@ -0,0 +1,8 @@
+# 自考题库转epub电子书
+## 使用方法
+- 修改zktools.py数据库连接信息
+- pip install -r requirements.txt
+- 启动 python index.py
+## TODO
+- 完善错误捕获
+- 优化部分硬编码

+ 68 - 0
epub.py

@@ -0,0 +1,68 @@
+import base64
+import hashlib
+import time
+import re
+import requests
+import zipfile
+import os
+from filetools import FileTools
+from template import Template
+
+file_tools= FileTools()
+template=Template(file_tools.get_ab_path('templates'))
+
+class Epub:
+    id = ""
+    name = ""
+    temp_dir = "" # 相对路径
+
+    def __init__(self, name):
+        self.name=name
+        self.id = str(base64.b64encode(bytes(hashlib.md5(base64.b64encode(
+            bytes(name, encoding='utf-8'))).hexdigest(), encoding='utf-8'))[-10:-1], encoding='utf-8')
+        self.temp_dir='temp/%s%s' % (int(time.time()), self.id)
+        file_tools.mkdir(self.temp_dir)
+        self.write_base_file()
+
+    def get_path(self, file):
+        return "%s/%s" % (self.temp_dir, file)
+    
+    def write_base_file(self):
+        file_tools.mkdir(self.get_path('META-INF'))
+        file_tools.mkdir(self.get_path('OEBPF'))
+        file_tools.mkdir(self.get_path('OEBPF/content'))
+        file_tools.mkdir(self.get_path('OEBPF/css'))
+        file_tools.mkdir(self.get_path('OEBPF/images'))
+        file='mimetype'; file_tools.write(self.get_path(file), template.get(file))
+        file='META-INF/container.xml'; file_tools.write(self.get_path(file), template.get(file))
+        file='OEBPF/css/ebook.css'; file_tools.write(self.get_path(file), template.get(file))
+        file='OEBPF/css/ebook.css'; file_tools.write(self.get_path(file), template.get(file))
+        file='OEBPF/ebook.opf'; file_tools.write(self.get_path(file), template.get(file, name=self.name, id=self.id))
+        file='OEBPF/navigation.ncx'; file_tools.write(self.get_path(file), template.get(file, name=self.name, id=self.id))
+
+    def image_process(self, text):
+        pattern = re.compile(r'https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/\d+/images/\d+.[a-z]{3}')
+        res=pattern.findall(text)
+        if res:
+            header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                        'Chrome/63.0.3239.132 Safari/537.36'}
+            for image in res:
+                temp_file=image.replace('/images/','_').replace('https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/','%s/OEBPF/images/' % self.temp_dir)
+                pic=requests.get(image, headers=header)
+                print(file_tools.write_b(temp_file, pic.content))
+        return text.replace('/images/','_').replace('https://sdjrzk-1251357229.cos.ap-guangzhou.myqcloud.com/exam/paper/','../images/')
+
+    def process(self, text):
+        cwd=os.getcwd()
+        text = self.image_process(text)
+        file='OEBPF/content/s1.xhtml'; file_tools.write(self.get_path(file), template.get(file, title=self.name, data=text))
+        z=zipfile.ZipFile(file_tools.get_ab_path("output/%s.epub"%self.name), 'w', zipfile.ZIP_STORED)
+        os.chdir(file_tools.get_ab_path(self.temp_dir))
+        for dirpath, dirnames, filenames in os.walk('./'):
+            for filename in filenames:
+                z.write(os.path.join(dirpath, filename))
+        os.chdir(cwd)
+        
+    def clean(self):
+        file_tools.rm(self.temp_dir)
+

+ 44 - 0
filetools.py

@@ -0,0 +1,44 @@
+import os
+import shutil
+script_path = os.path.dirname(os.path.realpath(__file__))
+
+class FileTools:
+    def mkdir(self, dir):
+        real = self.get_ab_path(dir)
+        if not os.path.exists(real):
+            try:
+                os.mkdir(real)
+                return True
+            except:
+                return False
+
+    def write(self, file, content):
+        try:
+            with open(self.get_ab_path(file), 'w') as f:
+                f.write(content)
+            return True
+        except:
+            return False
+
+    def write_b(self, file, content):
+        try:
+            print(file)
+            with open(self.get_ab_path(file), 'wb') as f:
+                f.write(content)
+            return True
+        except Exception as e:
+            print(e)
+            return False
+
+
+    def rm(self, dir):
+        full=self.get_ab_path(dir)
+        try:
+            shutil.rmtree(path=full)
+            return True
+        except:
+            return False
+
+    def get_ab_path(self,path):
+        return "%s/%s" % (script_path, path)
+

+ 26 - 0
index.py

@@ -0,0 +1,26 @@
+from epub import Epub
+from filetools import FileTools
+from zktools import ZkTools
+
+if __name__ == '__main__':
+    file = FileTools()
+    file.mkdir("temp")
+    file.mkdir("output")
+    zk=ZkTools()
+    course = zk.get_courses()
+    data={}
+    for i in course:
+        course_name = i['name']
+        data[i['name']]=[]
+        questions = zk.get_questions(i['id'])
+        for j in questions:
+            data[i['name']].append({"name":j['name'], "data":zk.get_content(j['id'])})
+
+    for i in data:
+        questions=data[i]
+        for question_index in range(0, len(questions)):
+            question=questions[question_index]
+            epub = Epub("百日题库-%s-%02d-%s" % (i.split(' ')[1], question_index+1, question['name'].replace('/', '_')))
+            epub.process(zk.format(question['data']))
+            epub.clean()
+

+ 12 - 0
jsonencoder.py

@@ -0,0 +1,12 @@
+import datetime
+import json
+
+class JsonEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, datetime.datetime):
+            return obj.strftime("%Y-%m-%d %H:%M:%S")
+        elif isinstance(obj, bytes):
+            return str(obj, 'utf-8')
+        else:
+            return json.JSONEncoder.default(self, obj)
+

+ 2 - 0
requirements.txt

@@ -0,0 +1,2 @@
+pymysql
+jinja2

+ 16 - 0
template.py

@@ -0,0 +1,16 @@
+from jinja2 import FileSystemLoader, Environment
+
+class Template:
+    dir = ""
+    env = None
+
+    def __init__(self, template_dir):
+        self.dir = template_dir
+        self.env = Environment(loader=FileSystemLoader(template_dir))
+
+    def get(self, file,  **kwargs):
+        try:
+            t = self.env.get_template(file)
+            return t.render(**kwargs)
+        except:
+            return False

+ 6 - 0
templates/META-INF/container.xml

@@ -0,0 +1,6 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<container version='1.0' xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>
+  <rootfiles>
+    <rootfile full-path='OEBPF/ebook.opf' media-type='application/oebps-package+xml'/>
+  </rootfiles>
+</container>

+ 25 - 0
templates/OEBPF/content/s1.xhtml

@@ -0,0 +1,25 @@
+<?xml version='1.0' encoding='utf-8'?>
+<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'>
+<html xmlns='http://www.w3.org/1999/xhtml'>
+  <head profile='http://dublincore.org/documents/dcmi-terms/'>
+    <meta http-equiv='Content-Type' content='text/html;' />
+    <title>{{title}}</title>
+    <meta name='DCTERMS.title' content='{{title}}' />
+    <meta name='DCTERMS.language' content='zh' scheme='DCTERMS.RFC4646' />
+    <meta name='DCTERMS.source' content='MFW' />
+    <meta name='DCTERMS.issued' content='{$issued}' scheme='DCTERMS.W3CDTF'/>
+    <meta name='DCTERMS.creator' content='iaun'/>
+    <meta name='DCTERMS.contributor' content='' />
+    <meta name='DCTERMS.modified' content='{$issued}' scheme='DCTERMS.W3CDTF'/>
+    <meta name='DCTERMS.provenance' content='' />
+    <meta name='DCTERMS.subject' content='Unknown' />
+    <link rel='schema.DC' href='http://purl.org/dc/elements/1.1/' hreflang='zh' />
+    <link rel='schema.DCTERMS' href='http://purl.org/dc/terms/' hreflang='zh' />
+    <link rel='schema.DCTYPE' href='http://purl.org/dc/dcmitype/' hreflang='zh' />
+    <link rel='schema.DCAM' href='http://purl.org/dc/dcam/' hreflang='zh' />
+    <link rel='stylesheet' type='text/css' href='../css/ebook.css' />
+  </head>
+  <body>
+    {{data}}
+  </body>
+</html>

+ 33 - 0
templates/OEBPF/css/ebook.css

@@ -0,0 +1,33 @@
+body {
+    font-size: medium;
+}
+
+blockquote {
+    font-style: italic;
+    border-left: 3px solid black;
+    margin-left: 0px;
+    padding-left: 10px;
+}
+
+code {
+    font-family: monospace;
+    word-wrap: break-word;
+}
+
+p {
+    text-indent: 1em;
+    white-space: pre-line;
+}
+
+pre > code {
+    line-height: 1.5;
+}
+
+pre {
+    border-left: 3px solid black;
+    background-color: rgb(240, 240, 240);
+    padding-left: 10px;
+    text-align: left;
+    white-space: pre-wrap;
+    font-size: 75%;
+}

+ 25 - 0
templates/OEBPF/ebook.opf

@@ -0,0 +1,25 @@
+<?xml version='1.0' encoding='utf-8'?>
+<package xmlns='http://www.idpf.org/2007/opf' version='2.0' unique-identifier='BookId'>
+	<metadata xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:opf='http://www.idpf.org/2007/opf'>
+		<dc:title>{{name}}</dc:title>
+		<dc:identifier id='BookId' opf:scheme='URI'>{{id}}</dc:identifier>
+		<dc:language>zh</dc:language>
+		<dc:creator opf:role='aut' opf:file-as=''>iaun</dc:creator>
+		<dc:publisher>iaun</dc:publisher>
+		<dc:description>iaun</dc:description>
+		<dc:coverage></dc:coverage>
+		<dc:source></dc:source>
+		<dc:date opf:event='publication'>2022</dc:date>
+		<dc:date opf:event='modification'>2022-01-01</dc:date>
+		<dc:rights></dc:rights>
+		<dc:subject>Unknown</dc:subject>
+	</metadata>
+	<manifest>
+		<item id='ncx' media-type='application/x-dtbncx+xml' href='navigation.ncx'/>
+		<item id='s1' media-type='application/xhtml+xml' href='content/s1.xhtml'/>
+		<item id='css' media-type='text/css' href='css/ebook.css'/>
+	</manifest>
+	<spine toc='navigation'>
+		<itemref idref='s1' />
+	</spine>
+</package>

+ 18 - 0
templates/OEBPF/navigation.ncx

@@ -0,0 +1,18 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!DOCTYPE ncx PUBLIC '-//NISO//DTD ncx 2005-1//EN' 'http://www.daisy.org/z3986/2005/ncx-2005-1.dtd'>
+<ncx xmlns='http://www.daisy.org/z3986/2005/ncx/'>
+<head>
+  <meta name='dtb:uid' content='{{id}}'/>
+  <meta name='dtb:depth' content='1'/>
+  <meta name='dtb:totalPageCount' content='0'/>
+  <meta name='dtb:maxPageNumber' content='0'/>
+</head>
+<docTitle><text>{{name}}</text></docTitle>
+<docAuthor><text>iaun</text></docAuthor>
+<navMap>
+  <navPoint class='section' id='s1' playOrder='1'>
+    <navLabel><text>{{name}}</text></navLabel>
+    <content src='content/s1.xhtml'/>
+  </navPoint>
+</navMap>
+</ncx>

+ 1 - 0
templates/mimetype

@@ -0,0 +1 @@
+application/epub+zip

+ 81 - 0
zktools.py

@@ -0,0 +1,81 @@
+import pymysql
+import json
+# from jsonencoder import JsonEncoder
+
+
+class ZkTools():
+    db=None
+    def __init__(self):
+        self.db = pymysql.connect(host="localhost", user="user",
+                     password="password", db="zk")
+    
+    def get_courses(self):
+        cursor=self.db.cursor()
+        sql="select `data` from `course`"
+        cursor.execute(sql)
+        data=cursor.fetchone()[0]
+        data=json.loads(data)
+        self.db.commit()
+        res=[]
+        for i in data['results']:
+            res.append({'id':i['id'], 'name':i['name']})
+        return res
+
+    def get_questions(self, course_id):
+        cursor=self.db.cursor()
+        sql="select `data` from `question_list` where `course_id` = %s"
+        cursor.execute(sql, course_id)
+        data=cursor.fetchone()[0]
+        data=json.loads(data)
+        self.db.commit()
+        res=[]
+        for i in data['results']:
+            res.append({'id':i['id'], 'name':i['title']})
+        return res
+
+    def get_content(self, question_id):
+        cursor=self.db.cursor()
+        sql="select `content_object` from `question_content_object` where `question_id` = %s"
+        cursor.execute(sql, question_id)
+        data=cursor.fetchone()[0]
+        data=json.loads(data)
+        self.db.commit()
+        return data
+
+
+    def format(self, data):
+        # data=data["data"]
+        temp="<h1>%s</h1>\n" % data['title']
+        # memo1
+        if data["memo"] != "":
+            temp+="<p>%s</p>\n" % data["memo"]
+        # groups
+        temp+="<div>\n"
+        
+        for group_index in range(0, len(data['groups'])):
+            group=data['groups'][group_index]
+            temp+="<h2>%s</h2>\n" % group['title']
+            if group['memo'] != "":
+                temp+="<p>%s</p>\n" % group['memo']
+            # questions
+            temp+="<div>\n"
+            for question_index in range(0, len(group['questions'])):
+                question=group['questions'][question_index]
+                temp+="<p style=\"font-weight:bold;\">%s. %s</p>\n" %(question_index+1, question['title'])
+                temp+="<ul>\n"
+                for option in question['options']:
+                    temp+="<li>%s. %s</li>\n" % (option['value'], option['text'])
+                temp+="</ul>\n" 
+                for answer_index in range(0,len(question['answer'])):
+                    answer=question['answer'][answer_index]
+                    if type(answer) == list:
+                        question['answer'][answer_index]=";".join(question['answer'][answer_index])
+                temp+="<p><b>答案:</b>%s</p>\n" % (";".join(question['answer']))
+                if 'explanation' in question:
+                    temp+="<p><b>题解:</b>%s</p>\n" % (question['explanation'])
+                temp+= "<hr></hr>\n" if question_index < len(group['questions'])-1 else ""
+            temp+="</div>\n"
+        temp+="</div>\n"
+        return temp
+
+