xscanserver.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #!-*-coding:utf-8 -*-
  2. import os
  3. import sys
  4. import time
  5. import socket
  6. import json
  7. import logging
  8. import inspect
  9. import winerror
  10. import win32event
  11. import win32service
  12. import servicemanager
  13. import win32serviceutil
  14. from flask import Flask,request
  15. from tornado.ioloop import IOLoop
  16. from tornado.wsgi import WSGIContainer
  17. from tornado.httpserver import HTTPServer
  18. #from parsedocx import DocxConverter,QuestionsParser
  19. import re,os
  20. import json,uuid
  21. from bs4 import BeautifulSoup
  22. from win32com.client import Dispatch,DispatchEx
  23. import pythoncom
  24. class DocxConverter(object):
  25. """
  26. """
  27. def __init__(self,docpath="test4.docx"):
  28. """
  29. """
  30. self.docpath = docpath
  31. def docx2html(self):
  32. """
  33. """
  34. self.word = DispatchEx("Word.Application")
  35. self.word.Visible = 0
  36. xlog.error(self.docpath)
  37. xlog.info(self.word)
  38. self.doc = self.word.Documents.Open(self.docpath)
  39. xlog.error(self.doc)
  40. html = os.path.join(os.path.dirname(self.docpath),str(uuid.uuid4())+".html")
  41. self.doc.SaveAs(html,10)
  42. self.doc.Close()
  43. self.word.Quit()
  44. return html
  45. class QuestionsParser(object):
  46. """试题解析
  47. """
  48. def __init__(self,name="test4.html"):
  49. self.html = open(name,"r").read()
  50. self.soup = BeautifulSoup(self.html,"html.parser")
  51. def get_paragraphs(self):
  52. """
  53. """
  54. wordsection = self.soup.find("div",class_="WordSection1")
  55. #print wordsection
  56. pars = wordsection.find_all("p")
  57. return pars
  58. def parse_questions(self):
  59. """提取试题
  60. """
  61. que_type_dct = {}
  62. paragraphs = self.get_paragraphs()
  63. for i,p in enumerate(paragraphs):
  64. print p.text
  65. if u"【题型】" in p.text:
  66. que_type_dct["type"] = p.text.split("、")[-1]
  67. def parse_questions(self):
  68. """提取试题
  69. """
  70. data = []
  71. tmp_val = {}
  72. tx_name = ""
  73. key = ""
  74. paragraphs = self.get_paragraphs()
  75. for i,p in enumerate(paragraphs):
  76. if u"【题型】" in p.text:
  77. tx_name = p.text
  78. if u"【题干】" in p.text:
  79. key = "tg"
  80. tmp_val["tx"] = tx_name
  81. if tmp_val.get("tg"):
  82. data.append(tmp_val)
  83. tmp_val = {"tg":"","tx":"","zsd":"","nd":"","da":"","jx":""}
  84. if u"【知识点】" in p.text:
  85. key = "zsd"
  86. if u"【难度】" in p.text:
  87. key = "nd"
  88. if u"【答案】" in p.text:
  89. key = "da"
  90. if u"【解析】" in p.text:
  91. key = "jx"
  92. if key != "":
  93. tmp_val[key] += p.__str__()
  94. data.append(tmp_val)
  95. return data
  96. def get_questions(self):
  97. """
  98. """
  99. questions = self.parse_questions()
  100. for que in questions:
  101. que["tx"] = que["tx"].split(u"、")[-1]
  102. #que["tg"] = que["tg"].replace(u"【题干】","")
  103. #que["zsd"] = que["zsd"].replace(u"【知识点】","")
  104. #que["da"] = que["da"].replace(u"【答案】","")
  105. #que["jx"] = que["jx"].replace(u"【解析】","")
  106. que["qno"] = self.get_qno(que["tg"])
  107. return questions
  108. def get_qno(self,tg):
  109. """提取题号
  110. """
  111. tgsoup = BeautifulSoup(tg,"html.parser")
  112. tgtext = tgsoup.text
  113. qno = re.search(r"\d+",tgtext.split(u"、")[0]).group()
  114. return qno
  115. app = Flask(__name__)
  116. root = "c:\\AppData\\say365"
  117. xlog = logging.getLogger('[PythonService]')
  118. handler = logging.FileHandler(os.path.join(root, "service.log"))
  119. formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
  120. handler.setFormatter(formatter)
  121. xlog.addHandler(handler)
  122. xlog.setLevel(logging.INFO)
  123. @app.route('/parsedocx',methods=["POST"])
  124. def parsedocx():
  125. """
  126. """
  127. try:
  128. fobj = request.files['file']
  129. if not os.path.exists(root):
  130. os.makedirs(root)
  131. docxname = os.path.join(root,str(int(time.time()*1000))+os.path.splitext(fobj.filename)[-1])
  132. with open(docxname,"wb+") as doc:
  133. doc.write(fobj.read())
  134. docxconv = DocxConverter(docxname)
  135. html = docxconv.docx2html()
  136. parser = QuestionsParser(html)
  137. questions = parser.get_questions()
  138. res = json.dumps(questions)
  139. xlog.info("test")
  140. return res
  141. except Exception as e:
  142. xlog.error(e)
  143. return str(e)
  144. def main():
  145. #app.run(host='0.0.0.0', port=8002, debug=True)
  146. s = HTTPServer(WSGIContainer(app))
  147. s.listen(8002)
  148. IOLoop.current().start()
  149. class XsacnService(win32serviceutil.ServiceFramework):
  150. #服务名
  151. _svc_name_ = "XsacnService"
  152. #服务在windows系统中显示的名称
  153. _svc_display_name_ = "XsacnService"
  154. #服务的描述
  155. _svc_description_ = "XsacnService"
  156. def __init__(self, args):
  157. win32serviceutil.ServiceFramework.__init__(self, args)
  158. self.stop_event = win32event.CreateEvent(None, 0, 0, None)
  159. socket.setdefaulttimeout(60) # 套接字设置默认超时时间
  160. self.logger = self._getLogger() # 获取日志对象
  161. self.isAlive = True
  162. def _getLogger(self):
  163. # 设置日志功能
  164. logger = logging.getLogger('[PythonService]')
  165. this_file = inspect.getfile(inspect.currentframe())
  166. dirpath = os.path.abspath(os.path.dirname(this_file))
  167. handler = logging.FileHandler(os.path.join(dirpath, "service.log"))
  168. formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
  169. handler.setFormatter(formatter)
  170. logger.addHandler(handler)
  171. logger.setLevel(logging.INFO)
  172. return logger
  173. def SvcDoRun(self):
  174. # 把自己的代码放到这里,就OK
  175. # 等待服务被停止
  176. #self.main()
  177. #win32event.WaitForSingleObject(self.hWaitStop, win32event.INFINITE)
  178. pythoncom.CoInitialize()
  179. while self.isAlive:
  180. self.logger.info("服务正在运行...")
  181. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  182. result = sock.connect_ex(('127.0.0.1', 8002)) # 嗅探网址是否可以访问,成功返回0,出错返回错误码
  183. if result != 0:
  184. # Python3.8的asyncio改变了循环方式,因为这种方式在windows上不支持相应的add_reader APIs,就会抛出NotImplementedError错误。
  185. # 因此加入下面两行代码
  186. #if sys.platform == 'win32':
  187. # asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
  188. self.main()
  189. sock.close()
  190. time.sleep(20)
  191. def main(self):
  192. #app.run(host='0.0.0.0', port=8002, debug=True)
  193. s = HTTPServer(WSGIContainer(app))
  194. s.listen(8002)
  195. IOLoop.current().start()
  196. def SvcStop(self):
  197. pythoncom.CoUninitialize()
  198. self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) # 先告诉SCM停止这个过程
  199. win32event.SetEvent(self.stop_event) # 设置事件
  200. self.ReportServiceStatus(win32service.SERVICE_STOPPED) # 确保停止,也可不加
  201. self.isAlive = False
  202. if __name__=='__main__':
  203. if len(sys.argv) == 1:
  204. try:
  205. evtsrc_dll = os.path.abspath(servicemanager.__file__)
  206. servicemanager.PrepareToHostSingle(XsacnService)
  207. servicemanager.Initialize('XsacnService', evtsrc_dll)
  208. servicemanager.StartServiceCtrlDispatcher()
  209. except win32service.error, details:
  210. if details[0] == winerror.ERROR_FAILED_SERVICE_CONTROLLER_CONNECT:
  211. win32serviceutil.usage()
  212. else:
  213. win32serviceutil.HandleCommandLine(XsacnService)
  214. #main()