package main import ( "bytes" "flag" "io" "log" "os" "path" "sync" "github.com/cheggaaa/pb/v3" ) type TopLevel struct { LeftInput string RightInput string LeftOutput string CombinedOutput string RightOutput string OpenHandles chan int DryRun bool Bar *pb.ProgressBar } type Step struct { *TopLevel Subpath string } type Direction byte const ( LEFT Direction = iota RIGHT COMBINED ) type FileState byte const ( MISSING FileState = iota FILE DIRECTORY UNKNOWN ) func (s *Step) CheckState(filePath string) (out FileState, err error) { info, err := os.Stat(filePath) if err != nil { if os.IsNotExist(err) { return MISSING, nil } else { return UNKNOWN, err } } if info.IsDir() { return DIRECTORY, nil } else if info.Mode().IsRegular() { return FILE, nil } else { return UNKNOWN, nil } } func (s *Step) InputPath(child string, side Direction) string { var basePath string switch side { case LEFT: basePath = s.LeftInput case RIGHT: basePath = s.RightInput default: panic("Unexpected side for input path") } return path.Join(basePath, s.Subpath, child) } func (s *Step) OutputPath(child string, side Direction) string { var basePath string switch side { case LEFT: basePath = s.LeftOutput case RIGHT: basePath = s.RightOutput case COMBINED: basePath = s.CombinedOutput default: panic("Unexpected side for output path") } return path.Join(basePath, s.Subpath, child) } func (s *Step) Separate(child string) { s.SeparateLeft(child) s.SeparateRight(child) } func (s *Step) SeparateLeft(child string) { leftInPath := s.InputPath(child, LEFT) leftOutPath := s.OutputPath(child, LEFT) if s.DryRun { return } leftBase := path.Dir(leftOutPath) err := os.MkdirAll(leftBase, 0755) if err != nil { log.Printf("Failed creating %s: %s\n", leftBase, err) } err = os.Rename(leftInPath, leftOutPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed moving %s to %s: %s\n", leftInPath, leftOutPath, err) } } func (s *Step) SeparateRight(child string) { rightInPath := s.InputPath(child, RIGHT) rightOutPath := s.OutputPath(child, RIGHT) if s.DryRun { return } rightBase := path.Dir(rightOutPath) err := os.MkdirAll(rightBase, 0755) if err != nil { log.Printf("Failed creating %s: %s\n", rightBase, err) } err = os.Rename(rightInPath, rightOutPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed moving %s to %s: %s\n", rightInPath, rightOutPath, err) } } func (s *Step) Combine(child string) { leftInPath := s.InputPath(child, LEFT) rightInPath := s.InputPath(child, RIGHT) combinedOutPath := s.OutputPath(child, COMBINED) if s.DryRun { return } combinedBase := path.Dir(combinedOutPath) err := os.MkdirAll(combinedBase, 0755) if err != nil { log.Printf("Failed creating %s: %s\n", combinedBase, err) } err = os.Rename(leftInPath, combinedOutPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed moving %s to %s: %s\n", leftInPath, combinedOutPath, err) } err = os.Remove(rightInPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed removing %s: %s\n", rightInPath, err) } } func (s *Step) MakeCombinedDir(child string) { combinedOutPath := s.OutputPath(child, COMBINED) if s.DryRun { return } err := os.MkdirAll(combinedOutPath, 0755) if err != nil { log.Printf("Failed creating %s: %s\n", combinedOutPath, err) } } func (s *Step) RemoveInputDirs(child string) { leftInPath := s.InputPath(child, LEFT) rightInPath := s.InputPath(child, RIGHT) if s.DryRun { return } err := os.Remove(leftInPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed removing %s: %s\n", leftInPath, err) } err = os.Remove(rightInPath) if err != nil && !os.IsNotExist(err) { log.Printf("Failed removing %s: %s\n", rightInPath, err) } } func (s *Step) ListChildren() []string { s.OpenHandles <- 1 defer func() { <-s.OpenHandles }() leftInPath := s.InputPath("", LEFT) rightInPath := s.InputPath("", RIGHT) results := make(map[string]bool) leftFiles, err := os.ReadDir(leftInPath) if err != nil { log.Printf("Failed listing %s: %s\n", leftInPath, err) } rightFiles, err := os.ReadDir(rightInPath) if err != nil { log.Printf("Failed listing %s: %s\n", rightInPath, err) } for _, file := range leftFiles { results[file.Name()] = true } for _, file := range rightFiles { results[file.Name()] = true } out := make([]string, 0, len(results)) for file := range results { out = append(out, file) } return out } func (s *Step) AreFilesIdentical(child string) bool { s.OpenHandles <- 1 defer func() { <-s.OpenHandles }() leftInPath := s.InputPath(child, LEFT) rightInPath := s.InputPath(child, RIGHT) leftInfo, err := os.Stat(leftInPath) if err != nil { log.Printf("Error statting path %s: %s\n", leftInPath, err) return false } rightInfo, err := os.Stat(rightInPath) if err != nil { log.Printf("Error statting path %s: %s\n", rightInPath, err) return false } if leftInfo.Size() != rightInfo.Size() { return false } leftFile, err := os.OpenFile(leftInPath, os.O_RDONLY, 0) if err != nil { log.Printf("Error opening path %s: %s\n", leftInPath, err) return false } defer func() { err := leftFile.Close() if err != nil { log.Printf("Error closing %s: %s\n", leftInPath, err) } }() rightFile, err := os.OpenFile(rightInPath, os.O_RDONLY, 0) if err != nil { log.Printf("Error opening path %s: %s\n", rightInPath, err) return false } defer func() { err := rightFile.Close() if err != nil { log.Printf("Error closing %s: %s\n", rightInPath, err) } }() // 4 MB buffers var leftData [4194304]byte var rightData [4194304]byte for { leftRead, err := leftFile.Read(leftData[:]) if err != nil && err != io.EOF { log.Printf("Error reading %s: %s\n", leftInPath, err) return false } rightRead, err := rightFile.Read(rightData[:]) if err != nil && err != io.EOF { log.Printf("Error reading %s: %s\n", rightInPath, err) return false } if leftRead != rightRead { return false } if !bytes.Equal(leftData[:leftRead], rightData[:rightRead]) { return false } if err == io.EOF { return true } } } func (s *Step) Walk() { wg := sync.WaitGroup{} children := s.ListChildren() s.Bar.AddTotal(int64(len(children))) for _, child := range children { rightPath := s.InputPath(child, RIGHT) rightState, err := s.CheckState(rightPath) if err != nil { wg.Add(1) go func(rightPath string) { defer func() { wg.Done() }() log.Printf("Error statting path %s: %s\n", rightPath, err) s.Bar.Increment() }(rightPath) continue } else if rightState == UNKNOWN { wg.Add(1) go func(rightPath string) { defer func() { wg.Done() }() log.Printf("Unknown stat value for path %s\n", rightPath) s.Bar.Increment() }(rightPath) continue } else if rightState == MISSING { wg.Add(1) go func(child string) { defer func() { wg.Done() }() s.SeparateLeft(child) s.Bar.Increment() }(child) continue } leftPath := s.InputPath(child, LEFT) leftState, err := s.CheckState(leftPath) if err != nil { wg.Add(1) go func(leftPath string) { defer func() { wg.Done() }() log.Printf("Error statting path %s: %s\n", leftPath, err) s.Bar.Increment() }(leftPath) continue } else if leftState == UNKNOWN { wg.Add(1) go func(leftPath string) { defer func() { wg.Done() }() log.Printf("Unknown stat value for path %s\n", leftPath) s.Bar.Increment() }(leftPath) continue } else if leftState == MISSING { wg.Add(1) go func(child string) { defer func() { wg.Done() }() s.SeparateRight(child) s.Bar.Increment() }(child) continue } switch rightState { case FILE: wg.Add(1) go func(child string) { defer func() { wg.Done() }() if leftState == FILE && s.AreFilesIdentical(child) { s.Combine(child) s.Bar.Increment() } else { s.Separate(child) s.Bar.Increment() } }(child) case DIRECTORY: if leftState == DIRECTORY { substep := Step{ TopLevel: s.TopLevel, Subpath: path.Join(s.Subpath, child), } wg.Add(1) go func(child string, substep *Step) { defer func() { wg.Done() }() s.MakeCombinedDir(child) substep.Walk() s.RemoveInputDirs(child) s.Bar.Increment() }(child, &substep) } else { wg.Add(1) go func(child string) { defer func() { wg.Done() }() s.Separate(child) s.Bar.Increment() }(child) } default: panic("Unexpected state") } } wg.Wait() } func main() { settings := TopLevel{ OpenHandles: make(chan int, 450), Bar: pb.StartNew(1), } flag.StringVar(&settings.LeftInput, "left-input", "./input/left", "The name of the left side of the input.") flag.StringVar(&settings.RightInput, "right-input", "./input/right", "The name of the right side of the input.") flag.StringVar(&settings.LeftOutput, "left-output", "./output/left", "The name of the left side of the output.") flag.StringVar(&settings.CombinedOutput, "combined-output", "./output/combined", "The name of the combined side of the output.") flag.StringVar(&settings.RightOutput, "right-output", "./output/right", "The name of the right side of the output.") flag.BoolVar(&settings.DryRun, "dry-run", true, "True if no actual operation should be performed.") flag.Parse() (&Step{ TopLevel: &settings, Subpath: "", }).Walk() settings.Bar.Increment() }