通过http Request Range功能实现大文件下载
利用http的范围请求,可以对大文件进行并行下载。
步骤:
1.head请求查看url是否支持范围请求Accept-Ranges,如果支持,获取文件的大小Content-Length
2.多进程构造请求,并且在请求头添加range项
3.合并数据
package main
import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io/ioutil"
"log"
"mime"
"net/http"
"os"
"path/filepath"
"strconv"
"sync"
"time"
)
func parseFileInfoFrom(resp *http.Response) string {
contentDisposition := resp.Header.Get("Content-Disposition")
if contentDisposition != "" {
_, params, err := mime.ParseMediaType(contentDisposition)
if err != nil {
panic(err)
}
return params["filename"]
}
filename := filepath.Base(resp.Request.URL.Path)
return filename
}
// FileDownloader 文件下载器
type FileDownloader struct {
fileSize int
url string
outputFileName string
totalPart int //下载线程
outputDir string
doneFilePart []filePart
}
// NewFileDownloader .
func NewFileDownloader(url, outputFileName, outputDir string, totalPart int) *FileDownloader {
if outputDir == "" {
wd, err := os.Getwd() //获取当前工作目录
if err != nil {
log.Println(err)
}
outputDir = wd
}
return &FileDownloader{
fileSize: 0,
url: url,
outputFileName: outputFileName,
outputDir: outputDir,
totalPart: totalPart,
doneFilePart: make([]filePart, totalPart),
}
}
// filePart 文件分片
type filePart struct {
Index int //文件分片的序号
From int //开始byte
To int //解决byte
Data []byte //http下载得到的文件内容
}
func main() {
startTime := time.Now()
var url string //下载文件的地址
url = "https://download.jetbrains.com/go/goland-2020.2.2.dmg"
downloader := NewFileDownloader(url, "", "", 10)
if err := downloader.Run(); err != nil {
// fmt.Printf("\n%s", err)
log.Fatal(err)
}
fmt.Printf("\n 文件下载完成耗时: %f second\n", time.Now().Sub(startTime).Seconds())
}
// head 获取要下载的文件的基本信息(header) 使用HTTP Method Head
func (d *FileDownloader) head() (int, error) {
r, err := d.getNewRequest("HEAD")
if err != nil {
return 0, err
}
resp, err := http.DefaultClient.Do(r)
if err != nil {
return 0, err
}
if resp.StatusCode > 299 {
return 0, errors.New(fmt.Sprintf("Can't process, response is %v", resp.StatusCode))
}
//检查是否支持 断点续传
//https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Ranges
if resp.Header.Get("Accept-Ranges") != "bytes" {
return 0, errors.New("服务器不支持文件断点续传")
}
d.outputFileName = parseFileInfoFrom(resp)
//https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Length
return strconv.Atoi(resp.Header.Get("Content-Length"))
}
// Run 开始下载任务
func (d *FileDownloader) Run() error {
fileTotalSize, err := d.head()
if err != nil {
return err
}
d.fileSize = fileTotalSize
jobs := make([]filePart, d.totalPart)
eachSize := fileTotalSize / d.totalPart
for i := range jobs {
jobs[i].Index = i
if i == 0 {
jobs[i].From = 0
} else {
jobs[i].From = jobs[i-1].To + 1
}
if i < d.totalPart-1 {
jobs[i].To = jobs[i].From + eachSize
} else {
//the last filePart
jobs[i].To = fileTotalSize - 1
}
}
var wg sync.WaitGroup
for _, j := range jobs {
wg.Add(1)
go func(job filePart) {
defer wg.Done()
err := d.downloadPart(job)
if err != nil {
log.Println("下载文件失败:", err, job)
}
}(j)
}
wg.Wait()
return d.mergeFileParts()
}
// 下载分片
func (d FileDownloader) downloadPart(c filePart) error {
r, err := d.getNewRequest("GET")
if err != nil {
return err
}
log.Printf("开始[%d]下载from:%d to:%d\n", c.Index, c.From, c.To)
r.Header.Set("Range", fmt.Sprintf("bytes=%v-%v", c.From, c.To))
resp, err := http.DefaultClient.Do(r)
if err != nil {
return err
}
if resp.StatusCode > 299 {
return errors.New(fmt.Sprintf("服务器错误状态码: %v", resp.StatusCode))
}
defer resp.Body.Close()
bs, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if len(bs) != (c.To - c.From + 1) {
return errors.New("下载文件分片长度错误")
}
c.Data = bs
d.doneFilePart[c.Index] = c
return nil
}
// getNewRequest 创建一个request
func (d FileDownloader) getNewRequest(method string) (*http.Request, error) {
r, err := http.NewRequest(
method,
d.url,
nil,
)
if err != nil {
return nil, err
}
r.Header.Set("User-Agent", "mojocn")
return r, nil
}
// mergeFileParts 合并下载的文件
func (d FileDownloader) mergeFileParts() error {
log.Println("开始合并文件")
path := filepath.Join(d.outputDir, d.outputFileName)
mergedFile, err := os.Create(path)
if err != nil {
return err
}
defer mergedFile.Close()
hash := sha256.New()
totalSize := 0
for _, s := range d.doneFilePart {
mergedFile.Write(s.Data)
hash.Write(s.Data)
totalSize += len(s.Data)
}
if totalSize != d.fileSize {
return errors.New("文件不完整")
}
//https://download.jetbrains.com/go/goland-2020.2.2.dmg.sha256?_ga=2.223142619.1968990594.1597453229-1195436307.1493100134
if hex.EncodeToString(hash.Sum(nil)) != "3af4660ef22f805008e6773ac25f9edbc17c2014af18019b7374afbed63d4744" {
return errors.New("文件损坏")
} else {
log.Println("文件SHA-256校验成功")
}
return nil
}