aiohttphandler

本模块是自己封装的。客户端使用的是 aiohttp 来封装 handler,服务端使用的是 fastapi 和 aiofiles。

但是如果日志量过大的情况下,还是撑不住

客户端

log.py

# -*- coding:utf8 -*-
"""日志工具"""

import asyncio
import logging
from logging.handlers import HTTPHandler
from os.path import join

import aiohttp

from config import config

loop = asyncio.get_event_loop()


def get_logger(name, level=config.LOG_LEVEL, log_format=config.LOG_FORMAT, date_fmt=None):
    """

    Args:
        name: logger 实例名字
        level: logger 日志级别
        log_format: logger 的输出格式
        date_fmt: logger 时间格式

    Returns:

    """
    # 强制要求传入 name
    logger = logging.getLogger(name)
    logger.propagate = 0
    # 如果已经实例过一个相同名字的 logger,则不用再追加 handler
    if logger.handlers:
        return logger
    logger.setLevel(level=level)
    formatter = logging.Formatter(log_format, datefmt=date_fmt or config.LOG_DATEFORMAT)
    stream_handler = logging.StreamHandler()
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)

    if config.HTTP_LOG_HOST and config.HTTP_LOG_PATH and config.HTTP_LOG_FILE:
        aio_http_handler = AioHttpHandler(host=config.HTTP_LOG_HOST, uri="")
        aio_http_handler.setFormatter(formatter)
        logger.addHandler(aio_http_handler)
    return logger


async def handler_status(status):
    _ = status


class AioHttpHandler(HTTPHandler):
    def __init__(self, host, uri, method="GET"):
        logging.Handler.__init__(self)
        self.url = "%s/%s" % (host, uri)
        method = method.upper()
        if method not in ["GET", "POST"]:
            raise ValueError("method must be GET or POST")
        self.method = method

    async def submit(self, data):
        # 使用aiohttp封装发送数据函数
        timeout = aiohttp.ClientTimeout(total=10)
        file = config.HTTP_LOG_FILE.lstrip("/")
        body = {"msg": data, "file": join(config.HTTP_LOG_PATH, file)}
        if self.method == "GET":
            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.get(self.url, params=body) as resp:
                    await handler_status(resp.status)
        else:
            headers = {
                "Content-type": "application/x-www-form-urlencoded",
            }
            async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
                async with session.post(self.url, data=body) as resp:
                    await handler_status(resp.status)
            return True

    def emit(self, record):
        msg = self.format(record)
        tasks = [asyncio.ensure_future(self.submit(msg))]
        # 执行coroutine
        loop.run_until_complete(asyncio.wait(tasks))

config.py

import logging

# 日志相关配置
LOG_LEVEL = os.getenv("LOG_LEVEL", logging.INFO)
LOG_FORMAT = '%(asctime)s [%(name)s %(lineno)s]  %(levelname)s: %(message)s'
LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'

IS_HTTP_HANDLER = 1  # 是否启用 http handler
HTTP_LOG_PATH = os.getenv("LOG_PATH", "/Users/wu/Work/youmi/projects/ym-crawler-ccs")  # 日志输出路径
# HTTP_LOG_FILE = os.getenv("LOG_FILE", "crawler/kuaishou/goods_detail.log")  # 日志输出文件
HTTP_LOG_FILE = os.getenv("LOG_FILE", "hhh/goods_detail.log")  # 日志输出文件
# HTTP_LOG_FILE = os.getenv("LOG_FILE", "")  # 日志输出文件
HTTP_LOG_HOST = os.getenv("LOG_HOST", "http://127.0.0.1:5000")  # http handler host
HTTP_LOG_FLUSH_NUM = os.getenv("LOG_FLUSH_NUM", 10)  # http handler host

服务端

log_server.py

# -*- coding: utf-8 -*-
# @Time   : 2021/1/13 下午2:28
# @Author : wu
"""
pip install uvicorn
pip install fastapi
"""
import os

import aiofiles
from fastapi import FastAPI
import uvicorn

from config import config

app = FastAPI()
suffix = ".log"
file_cache = set()
fd_cache = {}


@app.get("/")
async def log(file: str, msg: str):
    if not file.endswith(suffix):
        file += suffix

    # 创建文件夹
    await _mkdir(file)
    if not fd_cache.get(file):
        fd_cache[file] = {"fd": await aiofiles.open(file, "a+"), "flush_num": 0}
    # 写入数据
    await write2file(fd_cache[file], file, msg)


async def _mkdir(file: str):
    if file in file_cache:
        return
    path = os.path.dirname(file)
    if not os.path.exists(path):
        os.makedirs(path)
    file_cache.add(file)


async def write2file(f, file: str, msg: str):
    try:
        await f["fd"].write(msg + "\n")
        f["flush_num"] += 1
        if f["flush_num"] >= config.HTTP_LOG_FLUSH_NUM:
            f["flush_num"] = 0
            await f["fd"].flush()
    except FileNotFoundError:
        # 防止中途有删除目录
        path = os.path.dirname(file)
        os.makedirs(path)


if __name__ == "__main__":
    """
    gunicorn -w=3 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 127.0.0.1:5000
    """
    uvicorn.run(app=app, host="127.0.0.1", port=5000, log_level="info")

supervisor 部署服务端

/etc/supervisor/conf.d/log_server.conf


[program:server_5001]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5001
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5001.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5001.out.log
ding_at_mobiles=13763315112

[program:server_5002]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5002
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5002.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5002.out.log
ding_at_mobiles=13763315112

[program:server_5003]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5003
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5003.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5003.out.log
ding_at_mobiles=13763315112


[program:server_5004]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5004
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5004.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5004.out.log
ding_at_mobiles=13763315112

[group:log_server]
programs=server_5001,server_5002,server_5004,server_5003
priority=999

nginx 负载均衡

/etc/nginx/sites-enabled/log-server.conf

upstream backserver {
        server 172.19.43.201:5001;
        server 172.19.43.201:5002;
        server 172.19.43.201:5003;
        server 172.19.43.201:5004;
        server 172.19.43.201:5005;
}

server {
    listen 5000;
    server_name _;
    location / {
        proxy_pass http://backserver;
        proxy_set_header Host $host:$server_port;
    }
}
本作品采用《CC 协议》,转载必须注明作者和本文链接
讨论数量: 0
(= ̄ω ̄=)··· 暂无内容!

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容,与人为善,比聪明更重要!