We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
我在开发团子OCR的支持时,输出的文字的方向不正确。具体效果如下:
代码如下:
import os import numpy as np import cv2 import base64 from typing import List, Tuple import requests import json from .common import CommonDetector from ..utils import Quadrilateral from dotenv import load_dotenv load_dotenv() STARRIVER_OCR_TOKEN = os.getenv('STARRIVER_OCR_TOKEN', '') class StariverDetector(CommonDetector): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.token = STARRIVER_OCR_TOKEN if self.token is None: raise ValueError('团子OCR需要设置 STARRIVER_OCR_TOKEN 的环境变量。') self.url = 'https://dl.ap-sh.starivercs.cn/v2/manga_trans/advanced/manga_ocr' async def _detect(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float, verbose: bool = False) -> Tuple[List[Quadrilateral], np.ndarray, np.ndarray]: img_encoded = cv2.imencode('.jpg', image)[1] img_base64 = base64.b64encode(img_encoded).decode('utf-8') textlines: List[Quadrilateral] = [] data = { "token": self.token, "mask": True, "refine": True, "filtrate": True, "disable_skip_area": True, "detect_scale": 3, "merge_threshold": 0.5, "low_accuracy_mode": False, "image": img_base64 } response = requests.post(self.url, data=json.dumps(data)) response_data = response.json()['Data'] if verbose: with open('det_result.json', 'w', encoding='utf-8') as f: json.dump(response_data, f, ensure_ascii=False, indent=4) for block in response_data['text_block']: pts_from_ocr = np.array([ block['block_coordinate']['upper_left'], block['block_coordinate']['upper_right'], block['block_coordinate']['lower_right'], block['block_coordinate']['lower_left'] ]) text = ''.join(block['texts']) if verbose: print(f"识别出来的文本块: {text}") # if block['is_vertical'] == 1: # direction = 'v' # else: # direction = 'h' fg_color = block['foreground_color'] bg_color = block['background_color'] textlines.append(Quadrilateral(pts=pts_from_ocr, text=text, prob=1.0, fg_r=fg_color[0], fg_g=fg_color[1], fg_b=fg_color[2], bg_r=bg_color[0], bg_g=bg_color[1], bg_b=bg_color[2])) raw_mask = self.base64_to_ndarray(response_data['mask']) # 处理mask mask_resized = cv2.resize(raw_mask, (raw_mask.shape[1] * 2, raw_mask.shape[0] * 2), interpolation=cv2.INTER_LINEAR) raw_mask = np.clip(mask_resized * 255, 0, 255).astype(np.uint8) return textlines, raw_mask, None @staticmethod def base64_to_ndarray(base64_str: str) -> np.ndarray: img_data = base64.b64decode(base64_str) img_array = np.frombuffer(img_data, np.uint8) img = cv2.imdecode(img_array, cv2.IMREAD_GRAYSCALE) # 修改为灰度图读取 return img
请问这可能是什么原因导致的?应该检查代码的哪些部分?
The text was updated successfully, but these errors were encountered:
No branches or pull requests
我在开发团子OCR的支持时,输出的文字的方向不正确。具体效果如下:
代码如下:
请问这可能是什么原因导致的?应该检查代码的哪些部分?
The text was updated successfully, but these errors were encountered: