From 56fdc9c12addcc3ad3af96a532c744c8f2989192 Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Wed, 21 Oct 2020 22:05:13 +0200 Subject: [PATCH 1/6] Change to gather the results for each file aynchronously --- dll.go | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/dll.go b/dll.go index 768c27e..ee38abd 100644 --- a/dll.go +++ b/dll.go @@ -6,6 +6,7 @@ import ( "go/parser" "go/token" "os" + "sync" ) type report struct { @@ -84,16 +85,37 @@ func gather(source string, asFile bool) ([]*report, error) { func main() { if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, "no source files supplied") + os.Exit(1) } - for _, source := range os.Args[1:] { - r, err := gather(source, true) - if err != nil { - fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) - continue + files := os.Args[1:] + fileCount := len(files) + rc := make(chan []*report, fileCount) + + go func() { + var wg sync.WaitGroup + wg.Add(fileCount) + + for _, source := range files { + go func(source string) { + defer wg.Done() + r, err := gather(source, true) + if err != nil { + fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) + rc <- []*report{} + return + } + rc <- r + }(source) } - for _, rep := range r { - fmt.Println(rep) + + wg.Wait() + close(rc) + }() + + for reports := range rc { + for _, report := range reports { + fmt.Println(report) } } } From 56d9df6aaa976f8ef94145a1877a876a969c182e Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Thu, 22 Oct 2020 13:06:02 +0200 Subject: [PATCH 2/6] Improve performance by reducing the total amount of goroutines With this change it will no longer start for each file a goroutine it will start fewer goroutines and never more than the system cpu cores count. This will increase the performance on single core systems because it won't create unnessesary goroutines and multi core systems will still benefit from the reduced amount of goroutines. --- dll.go | 57 ++++++++++++++++++++++++++++++++++++----------- dll_test.go | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/dll.go b/dll.go index ee38abd..50b9f82 100644 --- a/dll.go +++ b/dll.go @@ -6,6 +6,7 @@ import ( "go/parser" "go/token" "os" + "runtime" "sync" ) @@ -90,32 +91,62 @@ func main() { files := os.Args[1:] fileCount := len(files) - rc := make(chan []*report, fileCount) + + reportsChannel := make(chan []*report, fileCount) + cpuCount := runtime.NumCPU() + filesPerCore := getFilesForEachCPUCore(files, cpuCount) go func() { var wg sync.WaitGroup - wg.Add(fileCount) - for _, source := range files { - go func(source string) { + for _, files := range filesPerCore { + wg.Add(1) + + go func(files []string) { defer wg.Done() - r, err := gather(source, true) - if err != nil { - fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) - rc <- []*report{} - return + for _, source := range files { + r, err := gather(source, true) + if err != nil { + fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) + reportsChannel <- []*report{} + return + } + reportsChannel <- r } - rc <- r - }(source) + }(files) } wg.Wait() - close(rc) + close(reportsChannel) }() - for reports := range rc { + for reports := range reportsChannel { for _, report := range reports { fmt.Println(report) } } } + +func getFilesForEachCPUCore(files []string, cpuCount int) [][]string { + fileCount := len(files) + + // early exit if only one file or one core exists -> no reason to use multithreading. + if cpuCount == 1 || fileCount == 1 { + return [][]string{files} + } + + for (fileCount % cpuCount) != 0 { + cpuCount-- + } + + filesPerCPU := fileCount / cpuCount + + fileParts := make([][]string, 0, cpuCount) + x := 0 + for i := 0; i < cpuCount; i++ { + fileParts = append(fileParts, files[x:(x+filesPerCPU)]) + x = x + filesPerCPU + } + + return fileParts +} diff --git a/dll_test.go b/dll_test.go index 5cf5f69..4837c76 100644 --- a/dll_test.go +++ b/dll_test.go @@ -136,3 +136,67 @@ func TestErrorParsing(t *testing.T) { t.Error("expected error but got nil") } } + +func TestGetFilesForEachCPUCore(t *testing.T) { + tests := []struct { + name string + files []string + cpuCount int + expected [][]string + }{ + { + name: "One file one core", + files: []string{"foo"}, + cpuCount: 1, + expected: [][]string{{"foo"}}, + }, + { + name: "Tow files one core", + files: []string{"foo", "bar"}, + cpuCount: 1, + expected: [][]string{{"foo", "bar"}}, + }, + { + name: "Tow files four cores", + files: []string{"foo", "bar"}, + cpuCount: 4, + expected: [][]string{{"foo"}, {"bar"}}, + }, + { + name: "One file tow cores", + files: []string{"foo"}, + cpuCount: 2, + expected: [][]string{{"foo"}}, + }, + { + name: "Four files tow cores", + files: []string{"foo", "bar", "foobar", "barfoo"}, + cpuCount: 2, + expected: [][]string{{"foo", "bar"}, {"foobar", "barfoo"}}, + }, + { + name: "Tow file three cores", + files: []string{"foo", "bar"}, + cpuCount: 3, + expected: [][]string{{"foo"}, {"bar"}}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got := getFilesForEachCPUCore(test.files, test.cpuCount) + + if len(got) != len(test.expected) { + t.Fatalf("Expect parts to be equal. got: '%d' slices but expected '%d'", len(got), len(test.expected)) + } + + for oi, gotPart := range got { + for i, gotString := range gotPart { + if gotString != test.expected[oi][i] { + t.Fatalf("Want: %v\n\nGot: %v", test.expected, got) + } + } + } + }) + } +} From 82777ccafa92a673328f51b194199e9be44855d2 Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Thu, 22 Oct 2020 14:50:00 +0200 Subject: [PATCH 3/6] Rename getFilesForEachCPUCore to splitFilesIntoParts Refactor test to be easier to read. --- dll.go | 17 ++++++------ dll_test.go | 80 ++++++++++++++++++++++++++--------------------------- 2 files changed, 48 insertions(+), 49 deletions(-) diff --git a/dll.go b/dll.go index 50b9f82..2a5c768 100644 --- a/dll.go +++ b/dll.go @@ -94,7 +94,7 @@ func main() { reportsChannel := make(chan []*report, fileCount) cpuCount := runtime.NumCPU() - filesPerCore := getFilesForEachCPUCore(files, cpuCount) + filesPerCore := splitFilesIntoParts(files, cpuCount) go func() { var wg sync.WaitGroup @@ -127,23 +127,22 @@ func main() { } } -func getFilesForEachCPUCore(files []string, cpuCount int) [][]string { +func splitFilesIntoParts(files []string, parts int) [][]string { fileCount := len(files) - // early exit if only one file or one core exists -> no reason to use multithreading. - if cpuCount == 1 || fileCount == 1 { + if parts == 1 || fileCount == 1 { return [][]string{files} } - for (fileCount % cpuCount) != 0 { - cpuCount-- + for (fileCount % parts) != 0 { + parts-- } - filesPerCPU := fileCount / cpuCount + filesPerCPU := fileCount / parts - fileParts := make([][]string, 0, cpuCount) + fileParts := make([][]string, 0, parts) x := 0 - for i := 0; i < cpuCount; i++ { + for i := 0; i < parts; i++ { fileParts = append(fileParts, files[x:(x+filesPerCPU)]) x = x + filesPerCPU } diff --git a/dll_test.go b/dll_test.go index 4837c76..5da948d 100644 --- a/dll_test.go +++ b/dll_test.go @@ -137,65 +137,65 @@ func TestErrorParsing(t *testing.T) { } } -func TestGetFilesForEachCPUCore(t *testing.T) { +func Test_splitFilesIntoParts(t *testing.T) { + getFileArray := func(amount int) []string { + files := make([]string, 0, amount) + for i := 0; i < amount; i++ { + files = append(files, "foo") + } + return files + } + tests := []struct { - name string - files []string - cpuCount int - expected [][]string + name string + files []string + parts int + expectedParts int }{ { - name: "One file one core", - files: []string{"foo"}, - cpuCount: 1, - expected: [][]string{{"foo"}}, + name: "should split one file into one part", + files: getFileArray(1), + parts: 1, + expectedParts: 1, }, { - name: "Tow files one core", - files: []string{"foo", "bar"}, - cpuCount: 1, - expected: [][]string{{"foo", "bar"}}, + name: "should split two files into one part", + files: getFileArray(2), + parts: 1, + expectedParts: 1, }, { - name: "Tow files four cores", - files: []string{"foo", "bar"}, - cpuCount: 4, - expected: [][]string{{"foo"}, {"bar"}}, + name: "should split two files into four part", + files: getFileArray(2), + parts: 4, + expectedParts: 2, }, { - name: "One file tow cores", - files: []string{"foo"}, - cpuCount: 2, - expected: [][]string{{"foo"}}, + name: "should split one file into two part", + files: getFileArray(1), + parts: 2, + expectedParts: 1, }, { - name: "Four files tow cores", - files: []string{"foo", "bar", "foobar", "barfoo"}, - cpuCount: 2, - expected: [][]string{{"foo", "bar"}, {"foobar", "barfoo"}}, + name: "should split four files into two part", + files: getFileArray(4), + parts: 2, + expectedParts: 2, }, { - name: "Tow file three cores", - files: []string{"foo", "bar"}, - cpuCount: 3, - expected: [][]string{{"foo"}, {"bar"}}, + name: "should split two files into three part", + files: getFileArray(2), + parts: 3, + expectedParts: 2, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got := getFilesForEachCPUCore(test.files, test.cpuCount) + got := splitFilesIntoParts(test.files, test.parts) - if len(got) != len(test.expected) { - t.Fatalf("Expect parts to be equal. got: '%d' slices but expected '%d'", len(got), len(test.expected)) - } - - for oi, gotPart := range got { - for i, gotString := range gotPart { - if gotString != test.expected[oi][i] { - t.Fatalf("Want: %v\n\nGot: %v", test.expected, got) - } - } + if len(got) != test.expectedParts { + t.Fatalf("Expect to split into '%d' but got '%d'", test.expectedParts, len(got)) } }) } From 73f18d87a9a799ddd75f139b7b4450bef13242e3 Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Thu, 22 Oct 2020 16:41:10 +0200 Subject: [PATCH 4/6] Change splitting to try to split into as many parts as possible Now if you give it 10 files and tell it to split it into 3 parts it will not longer split it into 2 parts. It will split it into 3 parts and add the remaining files to the first part -> the first part will have in this case one file more than the rest. This and some other cases are now covered by tests. --- dll.go | 23 ++++++++++++++++------- dll_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/dll.go b/dll.go index 2a5c768..e65ddd2 100644 --- a/dll.go +++ b/dll.go @@ -130,21 +130,30 @@ func main() { func splitFilesIntoParts(files []string, parts int) [][]string { fileCount := len(files) - if parts == 1 || fileCount == 1 { + if parts <= 1 { return [][]string{files} } - for (fileCount % parts) != 0 { - parts-- + // if there are more parts than files, it tries to find the next smallest number to destribute them equally. + if fileCount < parts { + for (fileCount % parts) != 0 { + parts-- + } } filesPerCPU := fileCount / parts - fileParts := make([][]string, 0, parts) - x := 0 + lastIndex := 0 for i := 0; i < parts; i++ { - fileParts = append(fileParts, files[x:(x+filesPerCPU)]) - x = x + filesPerCPU + fileParts = append(fileParts, files[lastIndex:(lastIndex+filesPerCPU)]) + lastIndex = lastIndex + filesPerCPU + } + + // if not all files could be splitted equally it adds the missing ones to the first part. + if filesPerCPU*parts != fileCount { + firstpart := fileParts[0] + firstpart = append(firstpart, files[filesPerCPU*parts:]...) + fileParts[0] = firstpart } return fileParts diff --git a/dll_test.go b/dll_test.go index 5da948d..ec1f8d9 100644 --- a/dll_test.go +++ b/dll_test.go @@ -158,6 +158,12 @@ func Test_splitFilesIntoParts(t *testing.T) { parts: 1, expectedParts: 1, }, + { + name: "should split one files into zero part", + files: getFileArray(1), + parts: 0, + expectedParts: 1, + }, { name: "should split two files into one part", files: getFileArray(2), @@ -188,6 +194,12 @@ func Test_splitFilesIntoParts(t *testing.T) { parts: 3, expectedParts: 2, }, + { + name: "should split ten files into three part", + files: getFileArray(10), + parts: 3, + expectedParts: 3, + }, } for _, test := range tests { @@ -197,6 +209,24 @@ func Test_splitFilesIntoParts(t *testing.T) { if len(got) != test.expectedParts { t.Fatalf("Expect to split into '%d' but got '%d'", test.expectedParts, len(got)) } + + for _, files := range got { + if len(files) < 1 { + t.Fatalf("Expected to contain at least on file but got none") + } + } }) } + + t.Run("should split empty files into one part", func(t *testing.T) { + files := []string{} + parts := 1 + + got := len(splitFilesIntoParts(files, parts)) + want := 1 + + if got != want { + t.Fatalf("Expected a length of %d but got %d", want, got) + } + }) } From 9a3daec2beea1c19400ba46910b96ced5da53202 Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Thu, 22 Oct 2020 16:55:11 +0200 Subject: [PATCH 5/6] Rename func splitFilesIntoParts to splitArrayIntoParts with inner vars The name is now more descriptive and easier to understand. --- dll.go | 34 ++++++++++++++++---------------- dll_test.go | 56 ++++++++++++++++++++++++++--------------------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/dll.go b/dll.go index e65ddd2..63a4838 100644 --- a/dll.go +++ b/dll.go @@ -94,7 +94,7 @@ func main() { reportsChannel := make(chan []*report, fileCount) cpuCount := runtime.NumCPU() - filesPerCore := splitFilesIntoParts(files, cpuCount) + filesPerCore := splitArrayIntoParts(files, cpuCount) go func() { var wg sync.WaitGroup @@ -127,34 +127,34 @@ func main() { } } -func splitFilesIntoParts(files []string, parts int) [][]string { - fileCount := len(files) +func splitArrayIntoParts(array []string, parts int) [][]string { + arraySize := len(array) if parts <= 1 { - return [][]string{files} + return [][]string{array} } - // if there are more parts than files, it tries to find the next smallest number to destribute them equally. - if fileCount < parts { - for (fileCount % parts) != 0 { + // if there are more parts than strings, it tries to find the next smallest number to destribute them equally. + if arraySize < parts { + for (arraySize % parts) != 0 { parts-- } } - filesPerCPU := fileCount / parts - fileParts := make([][]string, 0, parts) + stringsPerPart := arraySize / parts + arrayParts := make([][]string, 0, parts) lastIndex := 0 for i := 0; i < parts; i++ { - fileParts = append(fileParts, files[lastIndex:(lastIndex+filesPerCPU)]) - lastIndex = lastIndex + filesPerCPU + arrayParts = append(arrayParts, array[lastIndex:(lastIndex+stringsPerPart)]) + lastIndex = lastIndex + stringsPerPart } - // if not all files could be splitted equally it adds the missing ones to the first part. - if filesPerCPU*parts != fileCount { - firstpart := fileParts[0] - firstpart = append(firstpart, files[filesPerCPU*parts:]...) - fileParts[0] = firstpart + // if not all strings could be splitted equally it will adds the missing ones to the first part. + if stringsPerPart*parts != arraySize { + firstpart := arrayParts[0] + firstpart = append(firstpart, array[stringsPerPart*parts:]...) + arrayParts[0] = firstpart } - return fileParts + return arrayParts } diff --git a/dll_test.go b/dll_test.go index ec1f8d9..f7036f8 100644 --- a/dll_test.go +++ b/dll_test.go @@ -137,66 +137,66 @@ func TestErrorParsing(t *testing.T) { } } -func Test_splitFilesIntoParts(t *testing.T) { - getFileArray := func(amount int) []string { - files := make([]string, 0, amount) +func Test_splitArrayIntoParts(t *testing.T) { + getStringArray := func(amount int) []string { + strings := make([]string, 0, amount) for i := 0; i < amount; i++ { - files = append(files, "foo") + strings = append(strings, "foo") } - return files + return strings } tests := []struct { name string - files []string + strings []string parts int expectedParts int }{ { - name: "should split one file into one part", - files: getFileArray(1), + name: "should split the array with one string into one part", + strings: getStringArray(1), parts: 1, expectedParts: 1, }, { - name: "should split one files into zero part", - files: getFileArray(1), + name: "should split the array with one string into zero part", + strings: getStringArray(1), parts: 0, expectedParts: 1, }, { - name: "should split two files into one part", - files: getFileArray(2), + name: "should split the array with two strings into one part", + strings: getStringArray(2), parts: 1, expectedParts: 1, }, { - name: "should split two files into four part", - files: getFileArray(2), + name: "should split the array with two strings into four part", + strings: getStringArray(2), parts: 4, expectedParts: 2, }, { - name: "should split one file into two part", - files: getFileArray(1), + name: "should split the array with one string into two part", + strings: getStringArray(1), parts: 2, expectedParts: 1, }, { - name: "should split four files into two part", - files: getFileArray(4), + name: "should split the array with four strings into two part", + strings: getStringArray(4), parts: 2, expectedParts: 2, }, { - name: "should split two files into three part", - files: getFileArray(2), + name: "should split the array with two strings into three part", + strings: getStringArray(2), parts: 3, expectedParts: 2, }, { - name: "should split ten files into three part", - files: getFileArray(10), + name: "should split the array with ten strings into three part", + strings: getStringArray(10), parts: 3, expectedParts: 3, }, @@ -204,25 +204,25 @@ func Test_splitFilesIntoParts(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got := splitFilesIntoParts(test.files, test.parts) + got := splitArrayIntoParts(test.strings, test.parts) if len(got) != test.expectedParts { - t.Fatalf("Expect to split into '%d' but got '%d'", test.expectedParts, len(got)) + t.Fatalf("Expect to split the array into '%d' but got '%d'", test.expectedParts, len(got)) } for _, files := range got { if len(files) < 1 { - t.Fatalf("Expected to contain at least on file but got none") + t.Fatalf("Expected to contain at least on string but got none") } } }) } - t.Run("should split empty files into one part", func(t *testing.T) { - files := []string{} + t.Run("should split the empty array into one part", func(t *testing.T) { + strings := []string{} parts := 1 - got := len(splitFilesIntoParts(files, parts)) + got := len(splitArrayIntoParts(strings, parts)) want := 1 if got != want { From 279e72a694a706ea0ad03e86781a636ca173c480 Mon Sep 17 00:00:00 2001 From: Augusto Dwenger J Date: Thu, 22 Oct 2020 17:13:32 +0200 Subject: [PATCH 6/6] Extract nested loops and funtions into own functions --- dll.go | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/dll.go b/dll.go index 63a4838..3b32e50 100644 --- a/dll.go +++ b/dll.go @@ -101,19 +101,7 @@ func main() { for _, files := range filesPerCore { wg.Add(1) - - go func(files []string) { - defer wg.Done() - for _, source := range files { - r, err := gather(source, true) - if err != nil { - fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) - reportsChannel <- []*report{} - return - } - reportsChannel <- r - } - }(files) + go analyseFiles(files, reportsChannel, &wg) } wg.Wait() @@ -121,9 +109,26 @@ func main() { }() for reports := range reportsChannel { - for _, report := range reports { - fmt.Println(report) + printReports(reports) + } +} + +func analyseFiles(files []string, c chan []*report, wg *sync.WaitGroup) { + defer wg.Done() + for _, source := range files { + r, err := gather(source, true) + if err != nil { + fmt.Fprintf(os.Stderr, "error parsing %s: %s\n", source, err) + c <- []*report{} + return } + c <- r + } +} + +func printReports(reports []*report) { + for _, report := range reports { + fmt.Println(report) } }