Hero Image
怎么选择 Go 文件读取方案

怎么选择 Go 文件读取方案 Go 提供了可一次性读取文件内容的方法:os.ReadFile 与 ioutil.ReadFile。在 Go 1.16 开始,ioutil.ReadFile 就等价于 os.ReadFile。 一次性加载文件的优缺点非常明显,它能减少 IO 次数,但它会将文件内容都加载至内存中,对于大文件,存在内存撑爆的风险。 逐行读取 Go 中 bufio.Reader 对象提供了一个 ReadLine() 方法,但其实我们更多地是使用 ReadBytes(’\n’) 或者 ReadString(’\n’) 代替。 func ReadLines(filename string) { fi, err := os.Open(filename) if err != nil{ panic(err) } defer fi.Close() reader := bufio.NewReader(fi) for { _, err = reader.ReadString('\n') if err != nil { if err == io.EOF { break } panic(err) } } } 块读取 块读取也称为分片读取,这也很好理解,我们可以将内容分成一块块的,每次读取指定大小的块内容。这里,我们将块大小设置为 4KB。 func ReadChunk(filename string) { f, err := os.Open(filename) if err != nil { panic(err) } defer f.Close() buf := make([]byte, 4*1024) r := bufio.NewReader(f) for { _, err = r.Read(buf) if err != nil { if err == io.EOF { break } panic(err) } } } result BenchmarkOsReadFile4KB-8 92877 12491 ns/op BenchmarkOsReadFile4MB-8 1620 744460 ns/op BenchmarkOsReadFile4GB-8 1 7518057733 ns/op signal: killed BenchmarkReadLines4KB-8 90846 13184 ns/op BenchmarkReadLines4MB-8 493 2338170 ns/op BenchmarkReadLines4GB-8 1 3072629047 ns/op BenchmarkReadLines16GB-8 1 12472749187 ns/op BenchmarkReadChunk4KB-8 99848 12262 ns/op BenchmarkReadChunk4MB-8 913 1233216 ns/op BenchmarkReadChunk4GB-8 1 2095515009 ns/op BenchmarkReadChunk16GB-8 1 8547054349 ns/op 在本文的测试条件下(每行数据 1KB),对于小对象 4KB 的读取,三种方式差距并不大;在 MB 级别的读取中,直接加载最快,但块读取也慢不了多少;上了 GB 后,块读取方式会最快。

Hero Image
25秒读取16GB文件,Go怎么做到的?

25 秒读取 16GB 文件,Go 怎么做到的? Reading 16GB File in Seconds, Golang 打开文件后,我们有以下两个选项可以选择: 逐行读取文件,这有助于减少内存紧张,但需要更多的时间。 一次将整个文件读入内存并处理该文件,这将消耗更多内存,但会显著减少时间。 由于文件太大,即 16 GB,因此无法将整个文件加载到内存中。但是第一种选择对我们来说也是不可行的,因为我们希望在几秒钟内处理文件。 但你猜怎么着,还有第三种选择。瞧……相比于将整个文件加载到内存中,在 Go 语言中,我们还可以使用 bufio.NewReader()将文件分块加载。 func main() { s := time.Now() args := os.Args[1:] if len(args) != 6 { // for format LogExtractor.exe -f "From Time" -t "To Time" -i "Log file directory location" fmt.Println("Please give proper command line arguments") return } startTimeArg := args[1] finishTimeArg := args[3] fileName := args[5] file, err := os.Open(fileName) if err != nil { fmt.Println("cannot able to read the file", err) return } defer file.Close() // close after checking err queryStartTime, err := time.Parse("2006-01-02T15:04:05.0000Z", startTimeArg) if err != nil { fmt.Println("Could not able to parse the start time", startTimeArg) return } queryFinishTime, err := time.Parse("2006-01-02T15:04:05.0000Z", finishTimeArg) if err != nil { fmt.Println("Could not able to parse the finish time", finishTimeArg) return } filestat, err := file.Stat() if err != nil { fmt.Println("Could not able to get the file stat") return } fileSize := filestat.Size() offset := fileSize - 1 lastLineSize := 0 for { b := make([]byte, 1) n, err := file.ReadAt(b, offset) if err != nil { fmt.Println("Error reading file ", err) break } char := string(b[0]) if char == "\n" { break } offset-- lastLineSize += n } lastLine := make([]byte, lastLineSize) _, err = file.ReadAt(lastLine, offset+1) if err != nil { fmt.Println("Could not able to read last line with offset", offset, "and lastline size", lastLineSize) return } logSlice := strings.SplitN(string(lastLine), ",", 2) logCreationTimeString := logSlice[0] lastLogCreationTime, err := time.Parse("2006-01-02T15:04:05.0000Z", logCreationTimeString) if err != nil { fmt.Println("can not able to parse time : ", err) } if lastLogCreationTime.After(queryStartTime) && lastLogCreationTime.Before(queryFinishTime) { Process(file, queryStartTime, queryFinishTime) } fmt.Println("\nTime taken - ", time.Since(s)) } func Process(f *os.File, start time.Time, end time.Time) error { linesPool := sync.Pool{New: func() interface{} { lines := make([]byte, 250*1024) return lines }} stringPool := sync.Pool{New: func() interface{} { lines := "" return lines }} r := bufio.NewReader(f) var wg sync.WaitGroup for { buf := linesPool.Get().([]byte) n, err := r.Read(buf) buf = buf[:n] if n == 0 { if err != nil { fmt.Println(err) break } if err == io.EOF { break } return err } nextUntillNewline, err := r.ReadBytes('\n') if err != io.EOF { buf = append(buf, nextUntillNewline...) } wg.Add(1) go func() { ProcessChunk(buf, &linesPool, &stringPool, start, end) wg.Done() }() } wg.Wait() return nil } func ProcessChunk(chunk []byte, linesPool *sync.Pool, stringPool *sync.Pool, start time.Time, end time.Time) { var wg2 sync.WaitGroup logs := stringPool.Get().(string) logs = string(chunk) linesPool.Put(chunk) logsSlice := strings.Split(logs, "\n") stringPool.Put(logs) chunkSize := 300 n := len(logsSlice) noOfThread := n / chunkSize if n%chunkSize != 0 { noOfThread++ } for i := 0; i < (noOfThread); i++ { wg2.Add(1) go func(s int, e int) { defer wg2.Done() // to avaoid deadlocks for i := s; i < e; i++ { text := logsSlice[i] if len(text) == 0 { continue } logSlice := strings.SplitN(text, ",", 2) logCreationTimeString := logSlice[0] logCreationTime, err := time.Parse("2006-01-02T15:04:05.0000Z", logCreationTimeString) if err != nil { fmt.Printf("\n Could not able to parse the time :%s for log : %v", logCreationTimeString, text) return } if logCreationTime.After(start) && logCreationTime.Before(end) { // fmt.Println(text) } } }(i*chunkSize, int(math.Min(float64((i+1)*chunkSize), float64(len(logsSlice))))) } wg2.Wait() logsSlice = nil }

Hero Image
Gin 框架绑定 JSON 参数使用 jsoniter

Gin 框架绑定 JSON 参数使用 jsoniter simple go build -tags=jsoniter ./... custom implement BindingBody interface // github.com/gin-gonic/gin@v1.6.3/binding/binding.go:36 // Binding describes the interface which needs to be implemented for binding the // data present in the request such as JSON request body, query parameters or // the form POST. type Binding interface { Name() string Bind(*http.Request, interface{}) error } // BindingBody adds BindBody method to Binding. BindBody is similar with Bind, // but it reads the body from supplied bytes instead of req.Body. type BindingBody interface { Binding BindBody([]byte, interface{}) error } package custom import ( "bytes" "fmt" "io" "net/http" jsoniter "github.com/json-iterator/go" "github.com/gin-gonic/gin/binding" ) // BindingJSON 替换Gin默认的binding,支持更丰富JSON功能 var BindingJSON = jsonBinding{} // 可以自定义jsoniter配置或者添加插件 var json = jsoniter.ConfigCompatibleWithStandardLibrary type jsonBinding struct{} func (jsonBinding) Name() string { return "json" } func (jsonBinding) Bind(req *http.Request, obj interface{}) error { if req == nil || req.Body == nil { return fmt.Errorf("invalid request") } return decodeJSON(req.Body, obj) } func (jsonBinding) BindBody(body []byte, obj interface{}) error { return decodeJSON(bytes.NewReader(body), obj) } func decodeJSON(r io.Reader, obj interface{}) error { decoder := json.NewDecoder(r) if binding.EnableDecoderUseNumber { decoder.UseNumber() } if binding.EnableDecoderDisallowUnknownFields { decoder.DisallowUnknownFields() } if err := decoder.Decode(obj); err != nil { return err } return validate(obj) } func validate(obj interface{}) error { if binding.Validator == nil { return nil } return binding.Validator.ValidateStruct(obj) } // binding.JSON 替换成自定义的 ctx.ShouldBindWith(ms, binding.JSON) ctx.ShouldBindBodyWith(ms, binding.JSON)

Hero Image
Golang基准测试

Golang 基准测试 基本使用 基准测试常用于代码性能测试,函数需要导入 testing 包,并定义以 Benchmark 开头的函数, 参数为 testing.B 指针类型,在测试函数中循环调用函数多次 ➜ go test -bench=. -run=none goos: darwin goarch: amd64 pkg: pkg06 cpu: Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz BenchmarkFib-12 250 4682682 ns/op PASS ok pkg06 1.875s ➜ go test -bench=. -benchmem -run=none goos: darwin goarch: amd64 pkg: pkg06 cpu: Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz BenchmarkFib-12 249 4686452 ns/op 0 B/op 0 allocs/op PASS ok pkg06 1.854s bench 的工作原理 基准测试函数会被一直调用直到 b.N 无效,它是基准测试循环的次数 b.N 从 1 开始,如果基准测试函数在 1 秒内就完成 (默认值),则 b.N 增加,并再次运行基准测试函数 b.N 的值会按照序列 1,2,5,10,20,50,... 增加,同时再次运行基准测测试函数 上述结果解读代表 1 秒内运行了 250 次,每次 4682682 ns -12 后缀和用于运行次测试的 GOMAXPROCS 值有关。与 GOMAXPROCS 一样,此数字默认为启动时 Go 进程可见的 CPU 数。可以使用 -cpu 标识更改此值,可以传入多个值以列表形式来运行基准测试 传入 cpu num 进行测试 ➜ go test -bench=. -cpu=1,2,4 -benchmem -run=none goos: darwin goarch: amd64 pkg: pkg06 cpu: Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz BenchmarkFib 244 4694667 ns/op 0 B/op 0 allocs/op BenchmarkFib-2 255 4721201 ns/op 0 B/op 0 allocs/op BenchmarkFib-4 256 4756392 ns/op 0 B/op 0 allocs/op PASS ok pkg06 5.826s count 多次运行基准测试 因为热缩放、内存局部性、后台处理、gc 活动等等会导致单次的误差,所以一般会进行多次测试

Hero Image
Gin中文文档

自定义路由日志的格式 Gin 运行多个服务 XML、JSON、YAML 和 ProtoBuf 渲染(输出格式) 自定义路由日志的格式 default [GIN-debug] POST /foo --> main.main.func1 (3 handlers) [GIN-debug] GET /bar --> main.main.func2 (3 handlers) [GIN-debug] GET /status --> main.main.func3 (3 handlers) import ( "log" "net/http" "github.com/gin-gonic/gin" ) func main() { r := gin.Default() gin.DebugPrintRouteFunc = func(httpMethod, absolutePath, handlerName string, nuHandlers int) { log.Printf("endpoint %v %v %v %v\n", httpMethod, absolutePath, handlerName, nuHandlers) } r.POST("/foo", func(c *gin.Context) { c.JSON(http.StatusOK, "foo") }) r.GET("/bar", func(c *gin.Context) { c.JSON(http.StatusOK, "bar") }) r.GET("/status", func(c *gin.Context) { c.JSON(http.StatusOK, "ok") }) // Listen and Server in http://0.0.0.0:8080 r.Run() } Gin 运行多个服务 package main import ( "log" "net/http" "time" "github.com/gin-gonic/gin" "golang.org/x/sync/errgroup" ) var ( g errgroup.Group ) func router01() http.Handler { e := gin.New() e.Use(gin.Recovery()) e.GET("/", func(c *gin.Context) { c.JSON( http.StatusOK, gin.H{ "code": http.StatusOK, "error": "Welcome server 01", }, ) }) return e } func router02() http.Handler { e := gin.New() e.Use(gin.Recovery()) e.GET("/", func(c *gin.Context) { c.JSON( http.StatusOK, gin.H{ "code": http.StatusOK, "error": "Welcome server 02", }, ) }) return e } func main() { server01 := &http.Server{ Addr: ":8080", Handler: router01(), ReadTimeout: 5 * time.Second, WriteTimeout: 10 * time.Second, } server02 := &http.Server{ Addr: ":8081", Handler: router02(), ReadTimeout: 5 * time.Second, WriteTimeout: 10 * time.Second, } g.Go(func() error { return server01.ListenAndServe() }) g.Go(func() error { return server02.ListenAndServe() }) if err := g.Wait(); err != nil { log.Fatal(err) } } XML、JSON、YAML 和 ProtoBuf 渲染(输出格式) SecureJSON 使用 SecureJSON 可以防止 json 劫持,如果返回的数据是数组,则会默认在返回值前加上"while(1)"