aiohttphandler
本模块是自己封装的。客户端使用的是 aiohttp 来封装 handler,服务端使用的是 fastapi 和 aiofiles。
但是如果日志量过大的情况下,还是撑不住
客户端
log.py
# -*- coding:utf8 -*-
"""日志工具"""
import asyncio
import logging
from logging.handlers import HTTPHandler
from os.path import join
import aiohttp
from config import config
loop = asyncio.get_event_loop()
def get_logger(name, level=config.LOG_LEVEL, log_format=config.LOG_FORMAT, date_fmt=None):
"""
Args:
name: logger 实例名字
level: logger 日志级别
log_format: logger 的输出格式
date_fmt: logger 时间格式
Returns:
"""
# 强制要求传入 name
logger = logging.getLogger(name)
logger.propagate = 0
# 如果已经实例过一个相同名字的 logger,则不用再追加 handler
if logger.handlers:
return logger
logger.setLevel(level=level)
formatter = logging.Formatter(log_format, datefmt=date_fmt or config.LOG_DATEFORMAT)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if config.HTTP_LOG_HOST and config.HTTP_LOG_PATH and config.HTTP_LOG_FILE:
aio_http_handler = AioHttpHandler(host=config.HTTP_LOG_HOST, uri="")
aio_http_handler.setFormatter(formatter)
logger.addHandler(aio_http_handler)
return logger
async def handler_status(status):
_ = status
class AioHttpHandler(HTTPHandler):
def __init__(self, host, uri, method="GET"):
logging.Handler.__init__(self)
self.url = "%s/%s" % (host, uri)
method = method.upper()
if method not in ["GET", "POST"]:
raise ValueError("method must be GET or POST")
self.method = method
async def submit(self, data):
# 使用aiohttp封装发送数据函数
timeout = aiohttp.ClientTimeout(total=10)
file = config.HTTP_LOG_FILE.lstrip("/")
body = {"msg": data, "file": join(config.HTTP_LOG_PATH, file)}
if self.method == "GET":
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(self.url, params=body) as resp:
await handler_status(resp.status)
else:
headers = {
"Content-type": "application/x-www-form-urlencoded",
}
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
async with session.post(self.url, data=body) as resp:
await handler_status(resp.status)
return True
def emit(self, record):
msg = self.format(record)
tasks = [asyncio.ensure_future(self.submit(msg))]
# 执行coroutine
loop.run_until_complete(asyncio.wait(tasks))
config.py
import logging
# 日志相关配置
LOG_LEVEL = os.getenv("LOG_LEVEL", logging.INFO)
LOG_FORMAT = '%(asctime)s [%(name)s %(lineno)s] %(levelname)s: %(message)s'
LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
IS_HTTP_HANDLER = 1 # 是否启用 http handler
HTTP_LOG_PATH = os.getenv("LOG_PATH", "/Users/wu/Work/youmi/projects/ym-crawler-ccs") # 日志输出路径
# HTTP_LOG_FILE = os.getenv("LOG_FILE", "crawler/kuaishou/goods_detail.log") # 日志输出文件
HTTP_LOG_FILE = os.getenv("LOG_FILE", "hhh/goods_detail.log") # 日志输出文件
# HTTP_LOG_FILE = os.getenv("LOG_FILE", "") # 日志输出文件
HTTP_LOG_HOST = os.getenv("LOG_HOST", "http://127.0.0.1:5000") # http handler host
HTTP_LOG_FLUSH_NUM = os.getenv("LOG_FLUSH_NUM", 10) # http handler host
服务端
log_server.py
# -*- coding: utf-8 -*-
# @Time : 2021/1/13 下午2:28
# @Author : wu
"""
pip install uvicorn
pip install fastapi
"""
import os
import aiofiles
from fastapi import FastAPI
import uvicorn
from config import config
app = FastAPI()
suffix = ".log"
file_cache = set()
fd_cache = {}
@app.get("/")
async def log(file: str, msg: str):
if not file.endswith(suffix):
file += suffix
# 创建文件夹
await _mkdir(file)
if not fd_cache.get(file):
fd_cache[file] = {"fd": await aiofiles.open(file, "a+"), "flush_num": 0}
# 写入数据
await write2file(fd_cache[file], file, msg)
async def _mkdir(file: str):
if file in file_cache:
return
path = os.path.dirname(file)
if not os.path.exists(path):
os.makedirs(path)
file_cache.add(file)
async def write2file(f, file: str, msg: str):
try:
await f["fd"].write(msg + "\n")
f["flush_num"] += 1
if f["flush_num"] >= config.HTTP_LOG_FLUSH_NUM:
f["flush_num"] = 0
await f["fd"].flush()
except FileNotFoundError:
# 防止中途有删除目录
path = os.path.dirname(file)
os.makedirs(path)
if __name__ == "__main__":
"""
gunicorn -w=3 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 127.0.0.1:5000
"""
uvicorn.run(app=app, host="127.0.0.1", port=5000, log_level="info")
supervisor 部署服务端
/etc/supervisor/conf.d/log_server.conf
[program:server_5001]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5001
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5001.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5001.out.log
ding_at_mobiles=13763315112
[program:server_5002]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5002
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5002.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5002.out.log
ding_at_mobiles=13763315112
[program:server_5003]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5003
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5003.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5003.out.log
ding_at_mobiles=13763315112
[program:server_5004]
directory = /home/ymserver/vhost/ym-crawler-ccs
command = /home/ymserver/.virtualenvs/ym-crawler-ccs/bin/gunicorn -w=10 -k uvicorn.workers.UvicornH11Worker utils.log_server:app -b 172.19.43.201:5004
process_name = %(program_name)s_%(process_num)02d
numprocs = 1
autostart = true
startsecs = 5
autorestart = true
killasgroup=true
stopasgroup=true
startretries = 3
user = ymserver
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
stderr_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5004.err.log
stdout_logfile = /data/log/ym-crawler-ccs/scripts/http_handler_server/server_5004.out.log
ding_at_mobiles=13763315112
[group:log_server]
programs=server_5001,server_5002,server_5004,server_5003
priority=999
nginx 负载均衡
/etc/nginx/sites-enabled/log-server.conf
upstream backserver {
server 172.19.43.201:5001;
server 172.19.43.201:5002;
server 172.19.43.201:5003;
server 172.19.43.201:5004;
server 172.19.43.201:5005;
}
server {
listen 5000;
server_name _;
location / {
proxy_pass http://backserver;
proxy_set_header Host $host:$server_port;
}
}
本作品采用《CC 协议》,转载必须注明作者和本文链接