ソースを参照

操作模式分组、ocr识别

xjc 2 年 前
コミット
357e9ffce7

+ 22 - 0
src/common/models.py

@@ -62,6 +62,8 @@ class Player(models.Model):
     role = models.SmallIntegerField(u"角色",default=0)
     player_type = models.SmallIntegerField(u"选手类型,0/游客,1/普通选手,2/种子选手,3/开户选手",default=1)
     phone = models.CharField(u"手机号", max_length=50,blank=True,null=True)
+    opmode_group = models.CharField(u"操作模式分组", max_length=255,blank=True,null=True)
+    opmode_group_name = models.CharField(u"操作模式分组名称", max_length=255,blank=True,null=True)
 
     ctime = models.DateTimeField(u"创建时间", auto_now_add=True)
 
@@ -109,6 +111,8 @@ class PlayerRecord(models.Model):
     badge = models.CharField(u"选手标识", max_length=255,blank=True,null=True,default=u"选手")
     zans = models.IntegerField(u"点赞数", blank=True,null=True,default=0)
     comments_count = models.IntegerField(u"点赞数", blank=True,null=True,default=0)
+    opmode_group = models.IntegerField(u"操作模式分组", blank=True,null=True)
+    opmode_group_name = models.CharField(u"操作模式分组名称", max_length=255,blank=True,null=True)
 
     ctime = models.DateTimeField(u"创建时间", auto_now_add=True)
 
@@ -329,6 +333,8 @@ class UserStock(models.Model):
     stock_date = models.CharField(u"持股日期", max_length=255,blank=True,null=True)
     stock_name = models.CharField(u"股票名称", max_length=255,blank=True,null=True)
     fund = models.FloatField(u"持股金额",blank=True,null=True)
+    match_group = models.IntegerField(u"选手分组id", blank=True,null=True)
+    opmode_group = models.IntegerField(u"选手操作模式分组id", blank=True,null=True)
 
     ctime = models.DateTimeField(u"创建时间", auto_now_add=True)
 
@@ -408,6 +414,7 @@ class SignupOrder(models.Model):
     phone = models.CharField(u"手机号",max_length=50,blank=True,null=True)
     match_group = models.IntegerField(u"赛事分组",blank=True,null=True)
     signup_name = models.CharField(u"参赛名",max_length=50,blank=True,null=True)
+    opmode_group = models.CharField(u"操作模式分组", max_length=255,blank=True,null=True)
 
     ctime = models.DateTimeField(u"创建时间", auto_now_add=True)
 
@@ -674,3 +681,18 @@ class UserBlacks(models.Model):
         db_table = "user_blacks"
         verbose_name = u"用户黑名单"
         app_label = "common"
+
+
+class OpModelGroup(models.Model):
+    """操作模式分组
+    """
+    name = models.CharField(u"名称", max_length=255, blank=True,null=True)
+    is_active = models.SmallIntegerField(u"是否显示",default=1)
+    order = models.IntegerField(u"排序字段",default=1)
+
+    ctime = models.DateTimeField(u"创建时间", auto_now_add=True)
+
+    class Meta:
+        db_table = "opmodel_group"
+        verbose_name = u"操作模式分组"
+        app_label = "common"

+ 6 - 0
src/manage/controls.py

@@ -28,6 +28,7 @@ from threading import Thread
 from django.db import connection
 from utils.wxSubscribeMessage import send_consult_reply_message
 from utils.aliyunpush import aliyunpush
+from tools.article_spider import startSpider
 
 import xlrd
 import xlwt
@@ -1124,6 +1125,11 @@ def push_article(**kwargs):
     aliyunpush.push_notice_by_userid(user_id,title,body,params,"ios")
     aliyunpush.push_notice_by_userid(user_id,title,body,params,"android")
 
+def spider_article(**kwargs):
+
+    data = startSpider(kwargs.get("url"))
+
+    return data
 
 def import_article(**kwargs):
     """

+ 1 - 0
src/manage/urls_backstage.py

@@ -68,6 +68,7 @@ urlpatterns = [
     url(r'^user/famous$', views.UserArticleRelationView.as_view()),
     url(r'^user/famous/list$', views.UserArticleRelationListView.as_view()),
     url(r'^baike/detail/top$', views.BaikeDetailTopView.as_view()),
+    url(r'^article/spider$', views.ArticleSpiderView.as_view()),
 
 ]
 

+ 13 - 0
src/manage/views.py

@@ -755,6 +755,19 @@ class ArticleListView(cv.AdminView):
             cv.tracefail()
             return cv.to_fail(e)
 
+class ArticleSpiderView(cv.AdminView):
+    def get(self, request):
+        """#文章抓取(平台管理后台)
+        """
+        qdata = request.json
+        try:
+            data = ctl.spider_article(**qdata)
+            return cv.to_suc(data)
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)
+        
+
 class FlushRankView(cv.AdminView):
     def get(self, request):
         """#更新排名(平台管理后台)

+ 118 - 0
src/tools/article_spider.py

@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import os
+import requests
+
+from bs4 import BeautifulSoup
+import urllib
+import oss2
+
+
+def startSpider(url):
+        # url               = 'https://mp.weixin.qq.com/s/DeVwL7vk07oBJwVpdkMjAQ'
+
+        header={   
+            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER'
+        } 
+
+        req = requests.get(url,headers = header,verify=False)
+        req.encoding = 'utf-8'
+        text   = req.text
+        
+        
+
+        soup = BeautifulSoup(text,features='html.parser')
+    
+        
+        # tags = soup.find_all(['span','img'])
+        # for tag in tags:
+        #     print(tag)
+
+
+        nbody = soup.find('div',class_ = 'rich_media_content')
+        # print(nbody.find_all('img'))
+
+        list = soup.find_all('img',class_='rich_pages')
+        # print(len(list))
+        for s in list:
+            save_img(s)
+        
+        nbody = soup.find('div',class_ = 'rich_media_content')
+
+        text = '提示:读完点个“赞”、点个“在看”'
+        glist = soup.find_all(lambda tag: tag.name == "span" and text in tag.text)
+        for gg in glist:
+            img_html = '<div></div>'
+            isoup = BeautifulSoup(img_html,'lxml')
+            gg.replaceWith(isoup.div)
+        
+        # print(gfg)
+
+        print(str(nbody))
+
+
+
+        return str(nbody)
+    
+
+def save_img(s):
+    #保存图片到磁盘文件夹 file_path中,默认为当前脚本运行目录下的 book\img文件夹
+    img_url  = s.get('data-src')
+
+    file_path = 'article/img'
+    try:
+        if not os.path.exists(file_path):
+            print('文件夹' + file_path + '不存在,重新建立')
+            #os.mkdir(file_path)
+            os.makedirs(file_path)
+        #获得图片后缀
+        splist = img_url.split('/')
+
+        file_suffix = (splist[len(splist)-1]).split('=')[1]
+        # print(file_suffix)
+
+        #拼接图片名(包含路径)
+        filename = '{}{}{}.{}'.format(file_path,os.sep,splist[len(splist)-2],file_suffix)
+    #下载图片,并保存到文件夹中
+        req = requests.get(img_url)
+
+        if req.status_code == 200:
+            f = open(filename,mode='wb')
+            f.write(req.content)
+            f.close()
+
+            nurl = uploadAliyun(filename,req.content)
+            # img_html = '<div><img  src=\'' + nurl + '\'/></div>'
+            # img_html = '<div><img  src=\'' + nurl + '\' style=\'' +s['style'] + ';margin: 0px; padding: 0px; outline: 0px; max-width: 100%; vertical-align: bottom; box-sizing: border-box !important; overflow-wrap: break-word !important;'+ '\'/></div>'
+            if(nurl):
+                img_html = '<div><img  src=\'' + nurl + '\' style=\'' +s['style']  + ';width: 100%;height: auto !important;'+ '\'/></div>'
+                isoup = BeautifulSoup(img_html,'lxml')
+                s.replaceWith(isoup.div)
+
+        # s['data-src'] = nurl
+        # print(nurl)
+
+    except IOError as e:
+        print('文件操作失败' + e)
+    except Exception as e:
+        print('错误 :' + e)
+
+
+
+def uploadAliyun(name,data):
+    accessKeyId     = 'LTAI5t8bioQxGXB1jtVugJcU'
+    accessKeySecret = 'OdGWSBRjkJxaPjgmE38eQ8nzkI6nRk'
+    bucketName      = 'hnwzarticle'
+    url             = 'http://oss-cn-shenzhen.aliyuncs.com'
+    
+    auth   = oss2.Auth(accessKeyId, accessKeySecret)
+    bucket = oss2.Bucket(auth, url, bucketName)
+    res    = bucket.put_object(name,data)
+
+    if res.status == 200:
+        domain = 'https://{}.{}/'.format(bucketName, 'oss-cn-shenzhen.aliyuncs.com')
+        url    = domain + name
+        return url
+    else:
+        return None

ファイルの差分が大きいため隠しています
+ 5295 - 4684
src/tools/code.csv


+ 31 - 14
src/tools/loadcode.py

@@ -1,24 +1,41 @@
-#import tushare as ts
+# -*- encoding: utf-8 -*-
 import csv
 import os
 
-def get_stock_list():
+
+_workdir = os.path.dirname(os.path.abspath(__file__))
+_filename = os.path.join(_workdir, 'code.csv')
+
+
+def _download_data():
     """
+    tushare doesnt support py2
+    using py3(/root/.pyvenv/wzb_ts/bin/python3) to exec it
     """
-    #ts.set_token('56cda41c39cfd949e9e11211a6b46ac700f2df2684fa74bd23e3ce4c')
-    #pro = ts.pro_api()
-    #
-    #df = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,fullname,enname,cnspell,market,list_date')
-    #data = df.to_records()
-    #df.to_json("test.json")
-    data = []
-    with open("/mnt/wzbapi/src/tools/code.csv","r") as f:
-        rows = csv.reader(f)
-        for row in rows:
-            data.append(row)
-    return data
+    import tushare as ts
+    ts.set_token('99800760dfbbdf7d0b4124f6d4be39ebac6a093628f0bd19a7432486')
+    pro = ts.pro_api()
+    
+    df = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
+    df.to_csv(_filename)
 
 
+def get_stock_list():
+    """
+    no duplicate names
+    """
+    names = set()
+    ret = []
+    with open(_filename, 'r') as f:
+        rows = csv.reader(f)
+        for i in rows:
+            code, name = i[2: 4]
+            if name not in names:
+                ret.append((code, name))
+                names.add(name)
+    return ret
 
 
+if __name__ == '__main__':
+    _download_data()
 

+ 39 - 14
src/tools/sync_stock_data.py

@@ -14,22 +14,51 @@ django.setup()
 
 import common.models as cm
 import common.common_functions as ccf
-from .loadcode import get_stock_list
+from loadcode import get_stock_list
+
 
 def sync_stock_data():
-    """同步股票数据
+    """
+    同步股票数据
     """
     stock_list = get_stock_list()
+    new_codes = []
+    
+    # update name by code
     for item in stock_list:
-        code = item[2]
-        name = item[3]
-        print(code,name)
-        obj,flag = cm.Stock.objects.get_or_create(
-                    name = name, 
-                )
-        obj.code = code
-        obj.save()
+        code, name = item
+        objs = cm.Stock.objects.filter(code=code).order_by('pk')
+        if objs:
+            objs[0].name = name
+            objs[0].save()
+            # delete duplicate codes
+            for obj in objs[1:]:
+                obj.delete()
+        else:
+            new_codes.append((code, name))
 
+    # update code by new name
+    for item in new_codes:
+        code, name = item
+        objs = cm.Stock.objects.filter(name=name).order_by('pk')
+        if objs:
+            idx = 0
+            for obj in objs:
+                idx += 1
+                # find empty code, update, break
+                if obj.code is None:
+                    obj.code = code
+                    obj.save()
+                    break
+            # no empty code, create one
+            else:
+                cm.Stock.objects.create(code=code, name=name)
+            # delete duplicate names
+            for obj in objs[idx:]:
+                obj.delete()
+        # no name, create one
+        else:
+            cm.Stock.objects.create(code=code, name=name)
 
 
 if __name__ == "__main__":
@@ -39,7 +68,3 @@ if __name__ == "__main__":
     #cm.Stock.objects.filter(code__isnull=False).delete()
     print("time cost:",time.time()-st)
 
-
-
-
-

+ 310 - 0
src/utils/ocr/OcrManage.py

@@ -0,0 +1,310 @@
+
+import re
+from aip import AipOcr
+
+class OcrManage:
+    def __init__(self):
+        pass
+    
+    @classmethod
+    def loadOcr(self,url):
+        app_id      = '35386340'
+        api_key     = 'EeFUPsy7L10UaaClXl3uy2ie'
+        secret_key  = 'QSWGGtGfGLkmZwc8AaRXgkloCAlSdNGB'
+        aip_ocr = AipOcr(
+            appId=app_id,
+            apiKey=api_key,
+            secretKey=secret_key)
+
+        res_url = aip_ocr.basicGeneralUrl(url)
+
+        return self.parse(res_url)
+
+    @classmethod
+    def parse(self,result):
+        list  = result['words_result']
+        words = []
+        for dic in list:
+            word = dic['words']
+            word = self.deleteWords(word)
+            if len(word) > 0:
+                words.append(word)
+        
+        wlist = self.wordsSeperate(words)
+        # print(wlist)
+        return self.model(wlist)
+    
+    @classmethod
+    def deleteWords(self,words):
+        words = words.replace('⊙','')
+        words = words.replace('¥','')
+        words = words.replace('¥','')
+        words = words.replace(' ','')
+        
+        return words
+
+    @classmethod
+    def wordsSeperate(self,words):
+        list = []
+        for strWords in words:
+            cword = ''
+            nword = ''
+            for strw in strWords:
+                if self.isChinese(strw) or strw == '(' or strw == ')':
+                    if len(cword) < 0:
+                        if len(nword) > 0:
+                            list.append(nword)
+                            nword = ''
+                    
+                    cword += strw
+                else:
+                    if len(nword) < 0:
+                        if len(cword) > 0:
+                            list.append(cword)
+                            cword = ''
+                    if strw == '-' or strw == ':':
+                        if len(nword) > 0:
+                            list.append(nword)
+                            nword = ''
+                    else:
+                        nword += strw
+
+            if len(cword) > 0:
+                list.append(cword)
+                cword = ''
+            
+            if len(nword) > 0:
+                if nword.find('+') >= 0:
+                    jlist = nword.split('+')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                if nword.find('-') >= 0:
+                    jlist = nword.split('-')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                
+                if nword.find('+') < 0 and nword.find('-'):
+                    list.append(nword)
+
+                nword = ''
+
+        return list
+
+    @classmethod
+    def model(self,list):
+        dic   = {}
+        nameList = ['万润科技','华力创通','翰宇药业','光弘科技','菲菱科思']
+        flist = []
+
+        asset = self.assetWords(list)
+
+        if self.floatValue(asset) >= 0:
+            today = self.floatValue(asset)/10000.0
+            dic['today'] = round(today,2)
+
+        index = self.indexWords('总资产',list)
+        
+        i = index
+        for i in range(len(list)):
+            words = list[i]
+            words = self.replaceWords(words)
+
+            strST = ''
+
+            if i-1 >= 0:
+                st = list[i-1]
+                if st == 'ST' or st == 'C':
+                    strST = st
+
+            codeName = ''
+            if len(words) >= 2:
+                nstr = strST + words[:2]
+                if nstr in nameList:
+                    codeName = nstr
+
+            if len(words) >= 3:
+                nstr = strST + words[:3]
+                if nstr in nameList:
+                    codeName = nstr
+            
+            if len(words) >= 4:
+                nstr = words[:4]
+                print(nstr)
+                if nstr in nameList:
+                    codeName = nstr
+            
+            if len(words) >= 5:
+                nstr = words[:2]
+                if nstr in nameList:
+                    codeName = nstr
+            
+            if len(codeName) > 0:
+                fund = 0
+                if i+4 < len(list):
+                    market = list[i+4]
+                    market = self.replaceWords(market)
+                    market = self.replaceDot(market)
+                    if self.floatValue(market) > 0:
+                        fund = self.floatValue(market)/10000.0
+                        if i+5 < len(list):
+                            wstr = list[i+5]
+                            if wstr == '万':
+                                fund = self.floatValue(market)
+
+                if fund > 0:
+                    ndic = {}
+                    ndic['name'] = codeName
+                    ndic['fund'] = str(round(fund,2))
+                    flist.append(ndic)
+        
+        dic['list'] = flist
+
+        print(dic)
+        return dic
+
+    @classmethod
+    def isChinese(self,word):
+        pattern = re.compile(r'[^\u4e00-\u9fa5]')
+        if pattern.search(word):
+            return False
+        else:
+            return True
+
+    @classmethod
+    def assetWords(self,words):
+        asset = ''
+        index = self.indexWords('净资产',words)
+
+        if index >= 0:
+            asset = words[index+3]
+            if self.floatValue(asset) == 0:
+                asset = words[index+4]
+            
+            asset = self.replaceWords(asset)
+            asset = self.replaceDot(asset)
+
+        asset = self.maxWords(words,asset,'总资产')
+        asset = self.maxWords(words,asset,'人民币')
+        asset = self.maxWords(words,asset,'CNY')
+        asset = self.maxWords(words,asset,'当日参考盈亏')
+        asset = self.maxWords(words,asset,'资产')
+        
+        print('aa' + asset)
+
+        if self.floatValue(asset) > 0:
+            location = asset.find('.')
+            if self.isChinese(asset):
+                if location >= 0 and len(asset) >= location + 3:
+                    asset = asset[:location+3]
+            if location >= 0 and len(asset) >= location + 3:
+                asset = asset[:location+3]
+            elif location < 0:
+                asset = str(self.floatValue(asset)/100.0)
+
+
+        return asset
+
+    @classmethod
+    def maxWords(self,words,strWord,contain):
+        strMax = '0'
+        # print(words)
+
+        for i in range(len(words)):
+            word = words[i]
+            # print('w:' + word + 'i:' + str(i))
+            if word.find('证券持仓') >= 0 or word.find('证券代码') >= 0 :
+                break
+
+            if word.find(contain):
+                j = i+1
+                for j in range(len(words)):
+                    nword = words[j]
+                    nword = nword.replace(',','')
+
+                    if self.floatValue(nword) > self.floatValue(strMax):
+                        strMax = nword
+                        
+                        if j+1 < len(words):
+                            w = words[j+1]
+                            w = w.replace(',','')
+
+                            if w == '万':
+                                strMax = str(self.floatValue(strMax)*10000)
+
+                        break
+
+            if self.floatValue(strMax) > 0:
+                break
+        
+        if self.floatValue(strMax) > self.floatValue(strWord):
+            return strMax
+        
+        return strWord
+
+
+    @classmethod
+    def replaceWords(self,words):
+        words = words.replace(' ','')
+        words = words.replace('\n','') 
+        words = words.replace(',','')
+        words = words.replace('%','')
+        words = words.replace(',','')
+        words = words.upper()
+
+        return words
+
+    @classmethod
+    def replaceDot(self,words):
+        list = words.split('.')
+        if len(list) <= 2:
+            return words
+        
+        strWords = ''
+        for i in range(len(list)):
+            strWords += list[i]
+            if i == len(list) - 2:
+                strWords += '.'
+
+        return strWords
+
+    @classmethod
+    def indexWords(self,words,list):
+        index = -1
+        for i in range(len(list)):
+            if words == list[i]:
+                index = i
+                break
+
+        return index
+
+    @classmethod
+    def isNumber(self,number):
+        number = number.replace(',','')
+        # print('isnumber:',isinstance(number, float))
+        if self.isChinese(number):
+            return False
+
+        pattern = r'^[0-9]+\.[0-9]+$'  # 匹配小数的正则表达式
+        if re.match(pattern, str(number)):
+            return True
+        else:
+            return False
+
+    @classmethod   
+    def floatValue(self,word):
+        if not self.isNumber(word):
+            return 0
+        
+        if word == '':
+            return 0
+        
+        return float(word)
+    
+
+if __name__ == '__main__':
+    print('ocr')
+    

+ 3 - 0
src/utils/ocr/__init__.py

@@ -0,0 +1,3 @@
+# encoding: utf-8
+
+from .ocr import BaiduOcrForm

+ 110 - 0
src/utils/ocr/ocr.py

@@ -0,0 +1,110 @@
+# encoding: utf-8
+import os
+import json
+
+import cv2
+import numpy as np
+import requests
+from aip import AipOcr
+
+
+_workdir = os.path.dirname(os.path.abspath(__file__))
+
+
+class BaiduOcrForm:
+    app_id = '35386340'
+    api_key = 'EeFUPsy7L10UaaClXl3uy2ie'
+    secret_key = 'QSWGGtGfGLkmZwc8AaRXgkloCAlSdNGB'
+    aip_ocr = AipOcr(
+        appId=app_id,
+        apiKey=api_key,
+        secretKey=secret_key)
+    req_session = requests.Session()
+
+    ths_template_total = cv2.imread(os.path.join(_workdir, 'tmp_total.jpg'), cv2.IMREAD_GRAYSCALE)
+    ths_template_table = cv2.imread(os.path.join(_workdir, 'tmp_table.jpg'), cv2.IMREAD_GRAYSCALE)
+
+    def __init__(self):
+        pass
+
+    @classmethod
+    def do_ocr(cls, source, img_url):
+        """
+        extract formed texts from screenshots by ocr using baidu.
+        """
+        # download from internet, and load in grayscale
+        resp = cls.req_session.get(img_url)
+        nd_array = np.asarray(bytearray(resp.content), dtype='uint8')
+        im = cv2.imdecode(nd_array, cv2.IMREAD_GRAYSCALE)
+
+        # resize to width x height = ? x 1000
+        new_w_h = 1000, im.shape[0] * 1000 / im.shape[1]
+        im = cv2.resize(im, new_w_h)
+
+        if source == 'tonghuashun':
+            # find edge
+            eg = cv2.Canny(im, 100, 255)
+
+            # find total
+            ret = cls.do_ocr_ths(im, eg, 'total')
+            # data processing
+            total = ''.join(i['words'] for i in ret['forms_result'][0]['header'])
+
+            # find table
+            ret = cls.do_ocr_ths(im, eg, 'table')
+            # data processing
+            table = []
+            ret = ret['forms_result'][0]['header']
+            for i in xrange(0, len(ret), 2):
+                if ret[i]['words'].find(u'持仓') != -1 or ret[i]['words'].find(u'首页') != -1:
+                    break
+                if i + 1 >= len(ret):
+                    break
+                table.append({
+                    'name': ret[i]['words'],
+                    'money': ret[i + 1]['words']
+                })
+
+            # union data
+            ret = {
+                'total': total,
+                'stocks': table
+            }
+
+        else:
+            _, nd_array = cv2.imencode('.jpg', im)
+            ret = cls.aip_ocr.form(nd_array.tobytes())
+
+        return ret
+
+    @classmethod
+    def do_ocr_ths(cls, im, eg, tmp):
+        """
+        同花顺
+        """
+        if tmp == 'total':
+            x, y = cls.match_template(eg, cls.ths_template_total)
+            sub = im[y: y + 100, x: x + 240]
+        elif tmp == 'table':
+            x, y = cls.match_template(eg, cls.ths_template_table)
+            sub = im[y: im.shape[0], x: x + 240]
+        else:
+            return
+        _, nd_array = cv2.imencode('.jpg', sub)
+        ret = cls.aip_ocr.form(nd_array.tobytes())
+        return ret
+
+    @classmethod
+    def match_template(cls, im, template):
+        """
+        match template and return the top left (col, row) aka (x, y)
+        """
+        h, w = template.shape
+        rt = cv2.matchTemplate(im, template, cv2.TM_CCOEFF_NORMED)
+        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(rt)
+        return max_loc[0], max_loc[1] + h
+
+
+if __name__ == '__main__':
+    d = BaiduOcrForm.do_ocr('tonghuashun', 'http://127.0.0.1:8000/a.jpg')
+    print d

BIN
src/utils/ocr/tmp_table.jpg


BIN
src/utils/ocr/tmp_total.jpg


+ 531 - 0
src/utils/ocrmanage.py

@@ -0,0 +1,531 @@
+# encoding: utf-8
+import re
+
+from django.core.cache import cache
+from aip import AipOcr
+
+import common.models as cm
+
+
+class StockCache(object):
+    """
+    stock cache, key=name, value=code, ttl=3600s.
+    """
+
+    def __init__(self):
+        self.key_prefix = 'djwzb:stock_cache_name_code'
+        self.ttl = 3600
+        self.empty_str = '#'
+
+    def make_key(self, name):
+        return '%s:%s' % (self.key_prefix, name)
+
+    def get_code_by_name(self, name):
+        key = self.make_key(name) 
+        code = cache.get(key)
+        if code is None:
+            stock = cm.Stock.objects.filter(name=name).first()
+            if stock is None:
+                cache.set(key, self.empty_str, self.ttl)
+                return
+            cache.set(key, stock.code, self.ttl)
+            return stock.code
+        elif code == self.empty_str:
+            return
+        else:
+            return code
+
+
+stock_cache = StockCache()
+
+
+class OcrManage:
+
+    def __init__(self):
+        pass
+    
+    @classmethod
+    def loadOcr(self,url,source):
+        app_id      = '35386340'
+        api_key     = 'EeFUPsy7L10UaaClXl3uy2ie'
+        secret_key  = 'QSWGGtGfGLkmZwc8AaRXgkloCAlSdNGB'
+        aip_ocr = AipOcr(
+            appId=app_id,
+            apiKey=api_key,
+            secretKey=secret_key)
+
+        res_url = aip_ocr.generalUrl(url)
+
+        return self.parse(res_url,source)
+
+    @classmethod
+    def parse(self,result,source):
+        list  = result['words_result']
+        if source == 'pc': #电脑截图
+            words = []
+            for dic in list:
+                word = dic['words']
+                word = self.deleteWords(word)
+                if len(word) > 0:
+                    words.append(word)
+            wlist = self.wordsSeperate(words)
+            return self.lookCPAsset(wlist,list)
+        else:
+            return self.lookAsset(list)
+    
+    @classmethod
+    def lookAsset(self,list):
+        dic   = {}
+        asset = ''
+        if self.indexObject('净资产',list) > 0:
+            asset = self.caculateAsset(list,'净资产')
+        else:
+            asset = self.caculateAsset(list,'总资产')
+
+
+        if self.floatValue(asset) >= 0:
+            today = 0
+            if '万' in asset:
+                today = self.floatValue(asset)
+            else:
+                today = self.floatValue(asset)/10000.0
+
+            dic['today'] = round(today,2)
+        else:
+            dic['today'] = 0
+        
+        dic['list'] = self.lookMarket(list)
+
+        print(dic)
+        return dic
+    @classmethod
+    def lookMarket(self,list):
+        flist = []
+        index = self.indexObject('市值',list)
+        i = index
+        for i in range(len(list)):
+            dic = list[i]
+            words = dic['words']
+            codeName = self.codeName(words)
+            if len(codeName['name']) > 0:
+                fund = 0
+                market = '0'
+                for m in range(i+1,len(list)):
+                    mdic = list[m]
+                    mloc = mdic['location']
+                    loc  = dic['location']
+                    nstr = mdic['words']
+                    if len(nstr) == 6 and ((',' not in nstr) or ('.' not in nstr)):
+                        continue
+                    if float(mloc['left']) < (float(loc['left']) + float(loc['width'])):
+                        market = mdic['words']
+                        break
+                
+                if '万' in market:
+                    fund = self.floatValue(market)
+                else:
+                    fund = self.floatValue(market)/10000.0
+                
+                if fund > 0:
+                    ndic = {}
+                    ndic['name'] = codeName['name']
+                    ndic['code'] = codeName['code']
+                    ndic['fund'] = str(round(fund,2))
+                    flist.append(ndic)
+
+        return flist
+    
+    @classmethod
+    def lookCPAsset(self,wlist,list):
+        dic   = {}
+
+        # print(str(wlist))
+
+
+        asset = self.assetWords(wlist)
+        if self.floatValue(asset) >= 0:
+            today = self.floatValue(asset)/10000.0
+            dic['today'] = round(today,2)
+
+        dic['list'] = self.lookCPMarket(list)
+
+        print(str(dic))
+
+        return dic
+
+    @classmethod
+    def lookCPMarket(self,list):
+        flist = []
+        index = self.indexEquel('市值',list)
+        if index == 0:
+            index = self.indexEquel('最新市值',list)
+
+        if index == 0:
+            index = self.indexEquel('最新沛值',list)
+        
+        before = 0.0
+
+        if index - 1 > 0:
+            sdic = list[index-1]
+            sloc = sdic['location']
+           
+            before = float(sloc['left']) + float(sloc['width'])
+            
+        
+        
+        for i in range(index,len(list)):
+            dic = list[i]
+            words = dic['words']
+            words = self.replaceWords(words)
+            codeName = ''
+            clist = self.seperateChinense(words)
+            for sword in clist:
+                codeName = self.codeName(sword)
+                if len(codeName['name']) > 0:
+                    break
+            if len(codeName['name']) > 0:
+                fund = 0
+                market = '0'
+                for m in range(i+1,len(list)):
+                    mdic = list[m]
+                    mloc = mdic['location']
+                
+                    if float((mloc['left'])) > before:
+                        market = mdic['words']
+                        # print('market:' + str(mdic))
+                        break
+                
+                if '万' in market:
+                    fund = self.floatValue(market)
+                else:
+                    fund = self.floatValue(market)/10000.0
+                
+                if fund > 0:
+                    ndic = {}
+                    ndic['name'] = codeName['name']
+                    ndic['code'] = codeName['code']
+                    ndic['fund'] = str(round(fund,2))
+                    flist.append(ndic)
+
+        return flist
+
+
+    @classmethod
+    def caculateAsset(self,list,word):
+        strAsset = '0'
+        i = 0
+        for i in range(len(list)):
+            dic = list[i]
+            words = dic['words']
+            if word in words:           
+                for j in range(i+1,len(list)):
+                    ndic = list[j]
+                    idic = dic['location']
+                    jdic = ndic['location']
+                    ix = float(idic['left'])
+                    jx = float(jdic['left']) - 30
+                    width = float(jdic['width'])
+                    if jx < ix and ix < (jx + width):
+                        strAsset = ndic['words']
+                        break
+
+        return strAsset
+    
+    @classmethod
+    def deleteWords(self,words):
+        words = words.replace('⊙','')
+        words = words.replace('¥','')
+        words = words.replace('¥','')
+        words = words.replace(' ','')
+        
+        return words
+
+    @classmethod
+    def wordsSeperate(self,words):
+        list = []
+        for word in words:
+            nlist = self.seperateChinense(word)
+            if len(nlist) > 0:
+                list.extend(nlist)
+
+        return list
+
+    @classmethod
+    def seperateChinense(self,word):
+        list = []
+        cword = ''
+        nword = ''
+        for strw in word:
+            if self.isChinese(strw) or strw == '(' or strw == ')':
+                if len(cword) == 0:
+                    if len(nword) > 0:
+                        list.append(nword)
+                        nword = ''
+                
+                cword += strw
+
+            else:
+                if len(nword) == 0:
+                    if len(cword) > 0:
+                        list.append(cword)
+                        cword = ''
+                if strw == '-' or strw == ':':
+                    if len(nword) > 0:
+                        list.append(nword)
+                        nword = ''
+                else:
+                    nword += strw
+
+        if self.isChinese(word[0]):
+            if len(cword) > 0:
+                list.append(cword)
+                cword = ''
+            
+            if len(nword) > 0:
+                if nword.find('+') >= 0:
+                    jlist = nword.split('+')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                if nword.find('-') >= 0:
+                    jlist = nword.split('-')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                
+                if nword.find('+') < 0 and nword.find('-'):
+                    list.append(nword)
+
+                nword = ''
+        else:
+            
+            if len(nword) > 0:
+                if nword.find('+') >= 0:
+                    jlist = nword.split('+')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                if nword.find('-') >= 0:
+                    jlist = nword.split('-')
+                    for jstr in jlist:
+                        if len(jstr) > 0:
+                            list.append(jstr)
+
+                
+                if nword.find('+') < 0 and nword.find('-'):
+                    list.append(nword)
+
+                nword = ''
+
+            if len(cword) > 0:
+                list.append(cword)
+                cword = ''
+
+        return list
+
+    @classmethod
+    def codeName(self,words):
+        codeName = ''
+        code = None
+        if len(words) >= 3:
+            nstr = words[:3]
+            code = stock_cache.get_code_by_name(nstr)
+            if code is not None:
+                codeName = nstr
+
+        if len(words) >= 4:
+            nstr = words[:4]
+            code = stock_cache.get_code_by_name(nstr)
+            if code is not None:
+                codeName = nstr
+        
+        if len(words) >= 5:
+            nstr = words[:2]
+            code = stock_cache.get_code_by_name(nstr)
+            if code is not None:
+                codeName = nstr
+        
+        dic = {}
+        dic['name'] = codeName
+        dic['code'] = code or ''
+        
+        return dic
+
+    
+    @classmethod
+    def isChinese(self,word):
+        for ch in word:
+            if u'\u4e00' <= ch <= u'\u9fff':
+                return True
+    
+        return False
+
+    @classmethod
+    def assetWords(self,words):
+        asset = ''
+        index = self.indexWords('净资产',words)
+        if index >= 0:
+            asset = self.maxWords(words,asset,'净资产')
+        else:
+            asset = self.maxWords(words,asset,'总资产')
+            asset = self.maxWords(words,asset,'人民币')
+            asset = self.maxWords(words,asset,'CNY')
+            asset = self.maxWords(words,asset,'当日参考盈亏')
+            asset = self.maxWords(words,asset,'资产')
+        
+ 
+        if self.floatValue(asset) > 0:
+            location = asset.find('.')
+            if self.isChinese(asset):
+                if location >= 0 and len(asset) >= location + 3:
+                    asset = asset[:location+3]
+            if location >= 0 and len(asset) >= location + 3:
+                asset = asset[:location+3]
+            elif location < 0:
+                asset = str(self.floatValue(asset)/100.0)
+
+
+        return asset
+
+    @classmethod
+    def maxWords(self,words,strWord,contain):
+        strMax = '0'
+        for i in range(len(words)):
+            word = words[i]
+            if word.find('证券持仓') >= 0 or word.find('证券代码') >= 0 :
+                break
+  
+            if word.find(contain) >= 0:
+                for j in range(i+1,len(words)):
+                    nword = words[j]
+                    nword = self.replaceWords(nword)
+                    nword = self.replaceDot(nword)
+
+                    if self.floatValue(nword) > self.floatValue(strMax):
+                        strMax = nword
+                        if j+1 < len(words):
+                            w = words[j+1]
+                            w = w.replace(',','')
+
+                            if w == '万':
+                                strMax = str(self.floatValue(strMax)*10000)
+
+                        break
+
+            if self.floatValue(strMax) > 0:
+                break
+        
+        if self.floatValue(strMax) > self.floatValue(strWord):
+            return strMax
+        
+        return strWord
+
+
+    @classmethod
+    def replaceWords(self,words):
+        words = words.replace(' ','')
+        words = words.replace('\n','') 
+        words = words.replace(',','')
+        words = words.replace('%','')
+        words = words.replace(',','')
+        words = words.replace(':','')
+        words = words.replace(':','')
+        words = words.replace('A','')
+        words = words.upper()
+
+        return words
+
+    @classmethod
+    def replaceDot(self,words):
+        list = words.split('.')
+        if len(list) <= 2:
+            return words
+        
+        strWords = ''
+        i = 0
+        for i in range(len(list)):
+            strWords += list[i]
+            if i == len(list) - 2:
+                strWords += '.'
+
+        return strWords
+
+    @classmethod
+    def indexWords(self,words,list):
+        index = -1
+        i = 0
+        for i in range(len(list)):
+            if words == list[i]:
+                index = i
+                break
+
+        return index
+    
+    @classmethod
+    def indexObject(self,words,list):
+        index = -1
+        i=0
+        for i in range(len(list)):
+            dic = list[i]
+            nword = dic['words']
+            if words in nword:
+                index = i
+                break
+
+        return index
+    
+    @classmethod
+    def indexEquel(self,words,list):
+        index = -1
+        i=0
+        for i in range(len(list)):
+            dic = list[i]
+            nword = dic['words']
+            if words == nword:
+                index = i
+                break
+
+        return index
+
+    @classmethod
+    def isNumber(self,number):
+        number = number.replace(',','')
+        if self.isChinese(number):
+            # print('ischinese:' + number)
+            return False
+
+        zhengshu = r'^([1-9][\d]*|0)(\.[\d]+)?$'
+        if re.match(zhengshu, str(number)):
+            return True
+        else:
+            return False
+        
+
+        # pattern = r'^[0-9]+\.[0-9]+$'  # 匹配小数的正则表达式
+        # if re.match(pattern, str(number)):
+        #     return True
+        # else:
+        #     return False
+
+    @classmethod   
+    def floatValue(self,word):
+        word = self.replaceWords(word)
+        word = self.replaceDot(word)
+
+        if not self.isNumber(word):
+            return 0
+        
+        if word == '':
+            return 0
+
+        # print('floatValue:' + word)
+
+        return float(word)
+    
+
+if __name__ == '__main__':
+    print('ocr')
+    

+ 131 - 3
src/weixin/controls.py

@@ -679,6 +679,7 @@ def add_model(cls,**kwargs):
         username = user.username
         usercode = user.usercode
         match_group = player.match_group
+        opmode_group = player.opmode_group
         zq = user.zq
         cw = user.cw
         df = user.df
@@ -736,7 +737,9 @@ def add_model(cls,**kwargs):
                             stock_id = stock_id,
                             stock_name = ts["name"],
                             player_id = player.id,
-                            stock_date = stock_date
+                            stock_date = stock_date,
+                            match_group = match_group,
+                            opmode_group = opmode_group
                         )
                         if ts.get("fund"):
                             usobj.fund = ts["fund"]
@@ -762,6 +765,7 @@ def add_model(cls,**kwargs):
             obj.username = username
             obj.usercode = usercode
             obj.match_group = match_group
+            obj.opmode_group = opmode_group
             obj.is_markt = is_markt
             obj.yesterday_is_markt = yesterday_is_markt
             obj.zq = zq
@@ -2045,6 +2049,8 @@ def do_wx_pay(request):
     match_group = qdata.get("match_group")
     signup_name = qdata.get("signup_name")
     pay_type = qdata.get("pay_type")
+    opmode_group = qdata.get("opmode_group")
+
     if not signup_name:
         signup_name = user_name
 
@@ -2079,7 +2085,8 @@ def do_wx_pay(request):
             out_trade_no = out_trade_no,
             phone = phone,
             match_group = match_group,
-            signup_name = signup_name
+            signup_name = signup_name,
+            opmode_group = opmode_group
         )
         #支付
         if pay_type == "wxapppay":
@@ -2110,13 +2117,16 @@ def do_wx_pay(request):
             out_trade_no = out_trade_no,
             phone = phone,
             match_group = match_group,
-            signup_name = signup_name
+            signup_name = signup_name,
+            opmode_group = opmode_group
         )
 
         user = cm.UserInfo.objects.filter(id=user_id).first()
         #生成选手信息
         cur_match = cm.Match.objects.filter(id=match_id).first()
         visit_group = cm.MatchGroup.objects.filter(id=match_group).first()
+        opmode_group_obj = cm.OpModelGroup.objects.filter(id=opmode_group).first()
+
         player,flag = cm.Player.objects.get_or_create(
             user_id = user.id, 
             match_id = cur_match.id
@@ -2124,6 +2134,9 @@ def do_wx_pay(request):
         player.match_name = cur_match.name
         player.match_group = visit_group.id
         player.match_group_name = visit_group.name
+        player.opmode_group = opmode_group_obj.id
+        player.opmode_group_name = opmode_group_obj.name
+
         player.username = signup_name 
         player.usercode = request.user.get("usercode")
         player.role = 2
@@ -2395,6 +2408,9 @@ def add_wanzhu_consult(request):
     aliyunpush.push_notice_by_userid(0,title,body,params,"ios")
     aliyunpush.push_notice_by_userid(0,title,body,params,"android")
 
+    #退款短信通知
+    if '退款' in content:
+        send_signup_success('13883187629','退款')
 
 
 def add_comments(request):
@@ -3241,5 +3257,117 @@ def get_stock_nb_comments(request):
     return total,data
 
 
+def get_opmode_group_rank_list(**kwargs):
+    """
+    """
+    #today = get_today_date()
+    #match_id = ccc.get_cur_match().id
+    today = "2023-08-25"
+    match_id = 16
+
+    qset = cm.PlayerRecord.get_db_model(match_id).objects.filter(stock_date=today).order_by("-total_income")
+
+    qdata = list(qset.values())
+    #按操作模式分组并排序
+    tmp_dct = {}
+    for item in qdata:
+        opmode_group = item["opmode_group"]
+        opmode_group_name = item["opmode_group_name"]
+
+        key = "%s_%s" % (opmode_group,opmode_group_name)
+        if not tmp_dct.get(key):
+            tmp_dct[key] = [item]
+        else:
+            tmp_dct[key].append(item)
+
+        if len(tmp_dct[key]) > 5:
+            break
+
+    data = []
+    for k,v in tmp_dct.items():
+        opmode_id,opmode_name = k.split("_")
+        #盈利与亏损
+        win_5 = qset.filter(today_income__gte=0.05).count()
+        loss_5 = qset.filter(today_income__lte=0.05).count()
+        item = {
+            "opmode_name":opmode_name,     
+            "opmode_id":opmode_id,     
+            "list":v,
+            "win_5":win_5,
+            "loss_5":loss_5
+        }
+        data.append(item)
+
+    return data
+
+
+def get_opmode_group_statistic(**kwargs):
+    """
+    """
+    opmode_id = kwargs.get("opmode_id")
+
+    #today = get_today_date()
+    #match_id = ccc.get_cur_match().id
+    today = "2023-08-25"
+    match_id = 16
+
+    qset = cm.PlayerRecord.get_db_model(match_id).objects.filter(stock_date=today,opmode_group=opmode_id)
+    qset_user_stock = cm.UserStock.objects.filter(stock_date=today,opmode_group=opmode_id)
+
+    total = qset.count()
+    #今日均收益
+    today_total_income = qset.aggregate(total=Sum("today_income"))
+    today_income_avg = today_total_income["total"]/total
+    #总资产
+    today_fund_total = qset.aggregate(total=Sum("today_fund"))["total"]
+    #初始资产
+    init_fund_total = qset.aggregate(total=Sum("init_fund"))["total"]
+    
+    #持股前10
+    user_stock_top10 = list(qset_user_stock.values("stock_id","stock_name")\
+        .annotate(cnt=Count("stock_id")).order_by("-cnt").values("stock_id","stock_name","cnt"))[:10]
+
+    statistic = {
+        "total_man":total,     
+        "today_income_avg":today_income_avg,     
+        "today_fund_total":today_fund_total,     
+        "today_init_total":init_fund_total,     
+        "user_stock_top10":user_stock_top10     
+    }
+    return statistic
+
+
+
+
+def get_opmode_group_rank_info_list(**kwargs):
+    """
+    """
+    opmode_id = kwargs.get("opmode_id")
+    order_by = kwargs.get("order_by")
+
+    #today = get_today_date()
+    #match_id = ccc.get_cur_match().id
+    today = "2023-08-25"
+    match_id = 16
+
+    qset = cm.PlayerRecord.get_db_model(match_id).objects.filter(stock_date=today,opmode_group=opmode_id)
 
+    #今日收益
+    if order_by == "today_income":
+        qset = qset.order_by("-today_income")
+    #总收益
+    if order_by == "total_income":
+        qset = qset.order_by("-total_income")
+
+
+    page = int(kwargs.get("page",0))
+    page_size = int(kwargs.get("page_size",10))
+
+    if page and page_size:
+        total,qset = ccc.get_page_qset(qset,page,page_size)
+    else:
+        total = qset.count()
 
+    data = list(qset.values())
+
+    return total,data

+ 7 - 0
src/weixin/urls_backstage.py

@@ -98,8 +98,15 @@ urlpatterns = [
     url(r'^v3/match/winlost/top5$', views.MatchWinlostTop5ListView.as_view()),
     url(r'^v3/ai/detect/image$', views.AiDetectImageView.as_view()),
     url(r'^v3/ai/detect/txt$', views.AiDetectTxtView.as_view()),
+    url(r'^v3/ai/ocr$', views.AiOcr.as_view()),
     url(r'^v3/user/black$', views.UserBlacksView.as_view()),
     url(r'^v3/user/black/list$', views.UserBlacksListView.as_view()),
     url(r'^v3/stock/nbcomments/list$', views.StockNbCommentsListView.as_view()),
+
+    url(r'^v3/match/opmodel/list$', views.OpModelGroupListView.as_view()),
+    url(r'^v3/match/opmodel/rank/list$', views.OpModelGroupRankListView.as_view()),
+    url(r'^v3/match/opmodel/group/rank/info$', views.OpModelGroupRankInfoView.as_view()),
+    url(r'^v3/match/opmodel/group/rank/list$', views.OpModelGroupRankInfoListView.as_view()),
+    
 ]
 

+ 80 - 1
src/weixin/views.py

@@ -17,7 +17,8 @@ import weixin.controls as ctl
 import weixin.wzhifuSDK as wzf
 from utils.upload_to_oss import hnoss
 from utils.baiduai import baidu_ai_detect_image,baidu_ai_detect_txt
-
+from utils.ocr import BaiduOcrForm
+from utils.ocrmanage import OcrManage
 
 class OpenidView(cv.BaseView):
     def get(self,request):
@@ -1490,6 +1491,31 @@ class AiDetectTxtView(cv.AuthView):
             return cv.to_fail(e)
 
 
+class AiOcr(cv.AuthView):
+    def post(self, request):
+        """
+        extract balance, stocks and money from screenshots by ocr using baidu.
+        
+        @source: str | None, source of image. optional['tonghuashun']
+        @img_url: str, the url of image
+        """
+        qdata = request.json
+        try:
+            source = qdata.get('source')
+            content = qdata.get('img_url') 
+
+            if not content:
+                return cv.to_fail('img_url can not be empty')
+
+            # rst = BaiduOcrForm.do_ocr(source, content)
+            rst = OcrManage.loadOcr(content,source)
+            return cv.to_suc(rst)
+
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)
+
+
 class UserBlacksView(cv.AuthView):
     def post(self, request):
         """#拉黑用户(3.0小程序)
@@ -1535,3 +1561,56 @@ class StockNbCommentsListView(cv.BaseView):
         except Exception as e: 
             cv.tracefail()
             return cv.to_fail(e)
+
+
+####
+class OpModelGroupListView(cv.BaseView):
+    def get(self, request):
+        """#获取所有操作模式分组(微信小程序)
+        """
+        qdata = request.json
+        try:
+            total,rst = ctl.get_list_info(self,**qdata)
+            return cv.to_suc({"total":total,"list":rst})
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)
+
+
+class OpModelGroupRankListView(cv.BaseView):
+    def get(self, request):
+        """#获取所有操作模式分组排名(微信小程序)
+        """
+        qdata = request.json
+        try:
+            rst = ctl.get_opmode_group_rank_list(**qdata)
+            return cv.to_suc(rst)
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)
+
+
+class OpModelGroupRankInfoView(cv.BaseView):
+    def get(self, request):
+        """#获取模式分组信息(微信小程序)
+        """
+        qdata = request.json
+        try:
+            statistic = ctl.get_opmode_group_statistic(**qdata)
+            return cv.to_suc(statistic)
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)
+
+
+class OpModelGroupRankInfoListView(cv.BaseView):
+    def get(self, request):
+        """#获取模式分组信息(微信小程序)
+        """
+        qdata = request.json
+        try:
+            total,rst = ctl.get_opmode_group_rank_info_list(**qdata)
+            return cv.to_suc({"total":total,"list":rst})
+        except Exception as e:
+            cv.tracefail()
+            return cv.to_fail(e)