xscanserver.py 8.5 KB


  1. #!-*-coding:utf-8 -*-
  2. import os
  3. import sys
  4. import time
  5. import socket
  6. import json
  7. import logging
  8. import inspect
  9. import winerror
  10. import win32event
  11. import win32service
  12. import servicemanager
  13. import win32serviceutil
  14. from flask import Flask,request,make_response,jsonify
  15. from flask_cors import *
  16. from tornado.ioloop import IOLoop
  17. from tornado.wsgi import WSGIContainer
  18. from tornado.httpserver import HTTPServer
  19. from threading import Thread
  20. #from parsedocx import DocxConverter,QuestionsParser
  21. import re,os
  22. import json,uuid
  23. from bs4 import BeautifulSoup
  24. from win32com.client import Dispatch,DispatchEx
  25. import pythoncom
  26. from twainscan import get_source_names,start_scan
  27. app = Flask(__name__)
  28. CORS(app,supports_credentials=True)
  29. root = "c:\\AppData\\say365"
  30. xlog = logging.getLogger('[PythonService]')
  31. handler = logging.FileHandler(os.path.join(root, "service.log"))
  32. formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
  33. handler.setFormatter(formatter)
  34. xlog.addHandler(handler)
  35. xlog.setLevel(logging.INFO)
  36. class DocxConverter(object):
  37. """
  38. """
  39. def __init__(self,docpath="test4.docx"):
  40. """
  41. """
  42. self.docpath = docpath
  43. def docx2html(self):
  44. """
  45. """
  46. self.word = DispatchEx("Word.Application")
  47. self.word.Visible = 0
  48. xlog.error(self.docpath)
  49. xlog.info(self.word)
  50. self.doc = self.word.Documents.Open(self.docpath)
  51. xlog.error(self.doc)
  52. html = os.path.join(os.path.dirname(self.docpath),str(uuid.uuid4())+".html")
  53. self.doc.SaveAs(html,10)
  54. self.doc.Close()
  55. self.word.Quit()
  56. return html
  57. class QuestionsParser(object):
  58. """试题解析
  59. """
  60. def __init__(self,name="test4.html"):
  61. self.html = open(name,"r").read()
  62. self.soup = BeautifulSoup(self.html,"html.parser")
  63. def get_paragraphs(self):
  64. """
  65. """
  66. wordsection = self.soup.find("div",class_="WordSection1")
  67. #print wordsection
  68. pars = wordsection.find_all("p")
  69. return pars
  70. def parse_questions(self):
  71. """提取试题
  72. """
  73. que_type_dct = {}
  74. paragraphs = self.get_paragraphs()
  75. for i,p in enumerate(paragraphs):
  76. print p.text
  77. if u"【题型】" in p.text:
  78. que_type_dct["type"] = p.text.split("、")[-1]
  79. def parse_questions(self):
  80. """提取试题
  81. """
  82. data = []
  83. tmp_val = {}
  84. tx_name = ""
  85. key = ""
  86. paragraphs = self.get_paragraphs()
  87. for i,p in enumerate(paragraphs):
  88. if u"【题型】" in p.text:
  89. tx_name = p.text
  90. if u"【题干】" in p.text:
  91. key = "tg"
  92. tmp_val["tx"] = tx_name
  93. if tmp_val.get("tg"):
  94. data.append(tmp_val)
  95. tmp_val = {"tg":"","tx":"","zsd":"","nd":"","da":"","jx":""}
  96. if u"【知识点】" in p.text:
  97. key = "zsd"
  98. if u"【难度】" in p.text:
  99. key = "nd"
  100. if u"【答案】" in p.text:
  101. key = "da"
  102. if u"【解析】" in p.text:
  103. key = "jx"
  104. if key != "":
  105. tmp_val[key] += p.__str__()
  106. data.append(tmp_val)
  107. return data
  108. def get_questions(self):
  109. """
  110. """
  111. questions = self.parse_questions()
  112. for que in questions:
  113. que["tx"] = que["tx"].split(u"、")[-1]
  114. #que["tg"] = que["tg"].replace(u"【题干】","")
  115. #que["zsd"] = que["zsd"].replace(u"【知识点】","")
  116. #que["da"] = que["da"].replace(u"【答案】","")
  117. #que["jx"] = que["jx"].replace(u"【解析】","")
  118. que["qno"] = self.get_qno(que["tg"])
  119. return questions
  120. def get_qno(self,tg):
  121. """提取题号
  122. """
  123. tgsoup = BeautifulSoup(tg,"html.parser")
  124. tgtext = tgsoup.text
  125. qno = re.search(r"\d+",tgtext.split(u"、")[0]).group()
  126. return qno
  127. @app.route('/parsedocx.aspx',methods=["POST"])
  128. def parsedocx():
  129. """
  130. """
  131. try:
  132. fobj = request.files['file']
  133. if not os.path.exists(root):
  134. os.makedirs(root)
  135. docxname = os.path.join(root,str(int(time.time()*1000))+os.path.splitext(fobj.filename)[-1])
  136. with open(docxname,"wb+") as doc:
  137. doc.write(fobj.read())
  138. docxconv = DocxConverter(docxname)
  139. html = docxconv.docx2html()
  140. parser = QuestionsParser(html)
  141. questions = parser.get_questions()
  142. res = json.dumps(questions)
  143. xlog.info("test")
  144. return res
  145. except Exception as e:
  146. xlog.error(e)
  147. return str(e)
  148. @app.route('/xscanSourceList.aspx',methods=["GET"])
  149. def xscanSourceList():
  150. """
  151. """
  152. snames = get_source_names()
  153. res = make_response(jsonify(snames))
  154. return res
  155. @app.route('/xscanAction.aspx',methods=["GET"])
  156. def xscanAction():
  157. """
  158. """
  159. try:
  160. t = Thread(target = start_scan)
  161. t.start()
  162. return "success"
  163. except Exception as e:
  164. xlog.error(e)
  165. return str(e)
  166. def main():
  167. #app.run(host='0.0.0.0', port=8002, debug=True)
  168. s = HTTPServer(WSGIContainer(app))
  169. s.listen(19882)
  170. IOLoop.current().start()
  171. class XsacnService(win32serviceutil.ServiceFramework):
  172. #服务名
  173. _svc_name_ = "XsacnService"
  174. #服务在windows系统中显示的名称
  175. _svc_display_name_ = "XsacnService"
  176. #服务的描述
  177. _svc_description_ = "XsacnService"
  178. def __init__(self, args):
  179. win32serviceutil.ServiceFramework.__init__(self, args)
  180. self.stop_event = win32event.CreateEvent(None, 0, 0, None)
  181. socket.setdefaulttimeout(60) # 套接字设置默认超时时间
  182. self.logger = self._getLogger() # 获取日志对象
  183. self.isAlive = True
  184. def _getLogger(self):
  185. # 设置日志功能
  186. logger = logging.getLogger('[PythonService]')
  187. this_file = inspect.getfile(inspect.currentframe())
  188. dirpath = os.path.abspath(os.path.dirname(this_file))
  189. handler = logging.FileHandler(os.path.join(dirpath, "service.log"))
  190. formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
  191. handler.setFormatter(formatter)
  192. logger.addHandler(handler)
  193. logger.setLevel(logging.INFO)
  194. return logger
  195. def SvcDoRun(self):
  196. # 把自己的代码放到这里,就OK
  197. # 等待服务被停止
  198. #self.main()
  199. #win32event.WaitForSingleObject(self.hWaitStop, win32event.INFINITE)
  200. pythoncom.CoInitialize()
  201. while self.isAlive:
  202. self.logger.info("服务正在运行...")
  203. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  204. result = sock.connect_ex(('127.0.0.1', 19882)) # 嗅探网址是否可以访问,成功返回0,出错返回错误码
  205. if result != 0:
  206. # Python3.8的asyncio改变了循环方式,因为这种方式在windows上不支持相应的add_reader APIs,就会抛出NotImplementedError错误。
  207. # 因此加入下面两行代码
  208. #if sys.platform == 'win32':
  209. # asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
  210. self.main()
  211. sock.close()
  212. time.sleep(20)
  213. def main(self):
  214. #app.run(host='0.0.0.0', port=8002, debug=True)
  215. s = HTTPServer(WSGIContainer(app))
  216. s.listen(19882)
  217. IOLoop.current().start()
  218. def SvcStop(self):
  219. pythoncom.CoUninitialize()
  220. self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING) # 先告诉SCM停止这个过程
  221. win32event.SetEvent(self.stop_event) # 设置事件
  222. self.ReportServiceStatus(win32service.SERVICE_STOPPED) # 确保停止,也可不加
  223. self.isAlive = False
  224. if __name__=='__main__':
  225. #if len(sys.argv) == 1:
  226. # try:
  227. # evtsrc_dll = os.path.abspath(servicemanager.__file__)
  228. # servicemanager.PrepareToHostSingle(XsacnService)
  229. # servicemanager.Initialize('XsacnService', evtsrc_dll)
  230. # servicemanager.StartServiceCtrlDispatcher()
  231. # except win32service.error, details:
  232. # if details[0] == winerror.ERROR_FAILED_SERVICE_CONTROLLER_CONNECT:
  233. # win32serviceutil.usage()
  234. #else:
  235. # win32serviceutil.HandleCommandLine(XsacnService)
  236. main()