文档边缘识别实现方案

问答 / 0 / 4 / 创建于 1年前

如图所示,文档照片中如何识别纸张边缘,用python或前端怎么实现

文档边缘识别实现方案

课程读者 56 声望

暂无个人描述~

《L03 构架 API 服务器》

你将学到如 RESTFul 设计风格、PostMan 的使用、OAuth 流程，JWT 概念及使用和 API 开发相关的进阶知识。

《L01 基础入门》

我们将带你从零开发一个项目并部署到线上，本课程教授 Web 开发中专业、实用的技能，如 Git 工作流、Laravel Mix 前端工作流等。

推荐文章：

更多推荐...

基于WebRTC实现音视频通话 10 / 13 |

在laravel下实现全双工的websocket开发 21 / 10 |

Laravel 11 开始翻译/校对文档 25 / 47 |

用钉钉提醒 Laravel 11 异常：一个简单的实现 11 / 8 |

生产环境致命错误你还在等客户反馈吗？分享一下致命错误的自动告警方案 13 / 7 |

Laravel 11 中文文档仓库，已翻译完成！！！ 18 / 29 |

讨论数量: 4

wongvio

课程读者 508 声望

先识别出轮廓，然后比大小，也可以换个思路，比面积

import cv2
import numpy as np
import tkinter as tk
from tkinter import filedialog
from skimage.filters import threshold_local  # 确保已安装scikit-image库

def find_document(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print("Image not loaded.")
        return None

    # 转换为灰度图像
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Canny边缘检测，这里可以调整阈值
    edged = cv2.Canny(gray, 75, 200)

    # 轮廓检测
    contours, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

    screenCnt = None
    # 遍历轮廓
    for c in contours:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        # 如果轮廓有四个点，我们认为找到了文档
        if len(approx) == 4:
            screenCnt = approx
            break

    if screenCnt is None:
        print("No document contour found.")
        # 在这里，您可以允许用户手动选择角点或返回
        return None

    # 如果找到了轮廓，应用透视变换
    warped = four_point_transform(image, screenCnt.reshape(4, 2))
    return warped

def order_points(pts):
    # 初始化一个坐标点，按照左上，右上，右下，左下的顺序
    rect = np.zeros((4, 2), dtype="float32")

    # 左上点的坐标和右下点的坐标分别是x+y的最小值和最大值
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # 计算右上点和左下点的坐标，分别是x-y的最小值和最大值
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    # 返回排序后的坐标点
    return rect

def four_point_transform(image, pts):
    # 获取坐标点，并将它们分别排列成左上，右上，右下，左下的顺序
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # 计算输入的宽度和高度
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # 计算输出图像的坐标点，并应用透视变换
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    # 获取透视变换矩阵，并应用它
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # 返回变换后的图像
    return warped

def find_document(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print("Image not loaded.")
        return None

    # Convert to grayscale and blur the image slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Perform edge detection
    edged = cv2.Canny(gray, 75, 200)

    # Find contours and sort them by size
    contours, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

    screenCnt = None

    # Loop over the contours
    for c in contours:
        # Approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        # If our approximated contour has four points, then we
        # can assume we have found the document
        if len(approx) == 4:
            screenCnt = approx
            break

    if screenCnt is None:
        print("No document contour found.")
        return None

    # Apply a perspective transform to obtain the top-down view of the document
    warped = four_point_transform(image, screenCnt.reshape(4, 2))

    return warped

def process_image(file_path):
    if file_path:
        processed_image = find_document(file_path)
        if processed_image is not None and processed_image.size > 0:
            cv2.imshow("Processed Document", processed_image)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

            save_path = file_path.rsplit('.', 1)[0] + '_processed.jpg'
            cv2.imwrite(save_path, processed_image)
            print(f"Processed image saved to {save_path}")
        else:
            print("图像处理失败或图像为空。")

def select_image():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename()
    root.destroy()  # 关闭Tkinter窗口
    process_image(file_path)

def main():
    # 创建Tkinter窗口
    root = tk.Tk()
    root.title("选择文件")

    # 创建一个按钮来选择图像文件
    select_button = tk.Button(root, text="选择图片", command=select_image)
    select_button.pack()

    # 运行Tkinter事件循环
    root.mainloop()

if __name__ == "__main__":
    main()

1年前评论

y1415181920

234 声望

训练图片模型每张图片都不一样的 yolov+labelme标注

1年前评论

wongvio

不用那么麻烦，把图片先处理成线稿，然后找线稿里面的矩形，再找到面积最大的那个，就出来了

y1415181920 （作者）

@wongvio 有道理是可以这样

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容，与人为善，比聪明更重要！

帮助