You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
437 lines
9.4 KiB
437 lines
9.4 KiB
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"path"
|
|
"sync"
|
|
|
|
"github.com/cheggaaa/pb/v3"
|
|
)
|
|
|
|
type TopLevel struct {
|
|
LeftInput string
|
|
RightInput string
|
|
LeftOutput string
|
|
CombinedOutput string
|
|
RightOutput string
|
|
OpenHandles chan int
|
|
DryRun bool
|
|
Bar *pb.ProgressBar
|
|
}
|
|
|
|
type Step struct {
|
|
*TopLevel
|
|
Subpath string
|
|
}
|
|
|
|
type Direction byte
|
|
|
|
const (
|
|
LEFT Direction = iota
|
|
RIGHT
|
|
COMBINED
|
|
)
|
|
|
|
type FileState byte
|
|
|
|
const (
|
|
MISSING FileState = iota
|
|
FILE
|
|
DIRECTORY
|
|
UNKNOWN
|
|
)
|
|
|
|
func (s *Step) CheckState(filePath string) (out FileState, err error) {
|
|
info, err := os.Stat(filePath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return MISSING, nil
|
|
} else {
|
|
return UNKNOWN, err
|
|
}
|
|
}
|
|
if info.IsDir() {
|
|
return DIRECTORY, nil
|
|
} else if info.Mode().IsRegular() {
|
|
return FILE, nil
|
|
} else {
|
|
return UNKNOWN, nil
|
|
}
|
|
}
|
|
|
|
func (s *Step) InputPath(child string, side Direction) string {
|
|
var basePath string
|
|
switch side {
|
|
case LEFT:
|
|
basePath = s.LeftInput
|
|
case RIGHT:
|
|
basePath = s.RightInput
|
|
default:
|
|
panic("Unexpected side for input path")
|
|
}
|
|
return path.Join(basePath, s.Subpath, child)
|
|
}
|
|
|
|
func (s *Step) OutputPath(child string, side Direction) string {
|
|
var basePath string
|
|
switch side {
|
|
case LEFT:
|
|
basePath = s.LeftOutput
|
|
case RIGHT:
|
|
basePath = s.RightOutput
|
|
case COMBINED:
|
|
basePath = s.CombinedOutput
|
|
default:
|
|
panic("Unexpected side for output path")
|
|
}
|
|
return path.Join(basePath, s.Subpath, child)
|
|
}
|
|
|
|
func (s *Step) Separate(child string) {
|
|
s.SeparateLeft(child)
|
|
s.SeparateRight(child)
|
|
}
|
|
|
|
func (s *Step) SeparateLeft(child string) {
|
|
leftInPath := s.InputPath(child, LEFT)
|
|
leftOutPath := s.OutputPath(child, LEFT)
|
|
|
|
if s.DryRun {
|
|
return
|
|
}
|
|
|
|
leftBase := path.Dir(leftOutPath)
|
|
err := os.MkdirAll(leftBase, 0755)
|
|
if err != nil {
|
|
log.Printf("Failed creating %s: %s\n", leftBase, err)
|
|
}
|
|
err = os.Rename(leftInPath, leftOutPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed moving %s to %s: %s\n", leftInPath, leftOutPath, err)
|
|
}
|
|
}
|
|
|
|
func (s *Step) SeparateRight(child string) {
|
|
rightInPath := s.InputPath(child, RIGHT)
|
|
rightOutPath := s.OutputPath(child, RIGHT)
|
|
|
|
if s.DryRun {
|
|
return
|
|
}
|
|
|
|
rightBase := path.Dir(rightOutPath)
|
|
err := os.MkdirAll(rightBase, 0755)
|
|
if err != nil {
|
|
log.Printf("Failed creating %s: %s\n", rightBase, err)
|
|
}
|
|
err = os.Rename(rightInPath, rightOutPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed moving %s to %s: %s\n", rightInPath, rightOutPath, err)
|
|
}
|
|
}
|
|
|
|
func (s *Step) Combine(child string) {
|
|
leftInPath := s.InputPath(child, LEFT)
|
|
rightInPath := s.InputPath(child, RIGHT)
|
|
combinedOutPath := s.OutputPath(child, COMBINED)
|
|
|
|
if s.DryRun {
|
|
return
|
|
}
|
|
|
|
combinedBase := path.Dir(combinedOutPath)
|
|
err := os.MkdirAll(combinedBase, 0755)
|
|
if err != nil {
|
|
log.Printf("Failed creating %s: %s\n", combinedBase, err)
|
|
}
|
|
err = os.Rename(leftInPath, combinedOutPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed moving %s to %s: %s\n", leftInPath, combinedOutPath, err)
|
|
}
|
|
err = os.Remove(rightInPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed removing %s: %s\n", rightInPath, err)
|
|
}
|
|
}
|
|
|
|
func (s *Step) MakeCombinedDir(child string) {
|
|
combinedOutPath := s.OutputPath(child, COMBINED)
|
|
|
|
if s.DryRun {
|
|
return
|
|
}
|
|
|
|
err := os.MkdirAll(combinedOutPath, 0755)
|
|
if err != nil {
|
|
log.Printf("Failed creating %s: %s\n", combinedOutPath, err)
|
|
}
|
|
}
|
|
|
|
func (s *Step) RemoveInputDirs(child string) {
|
|
leftInPath := s.InputPath(child, LEFT)
|
|
rightInPath := s.InputPath(child, RIGHT)
|
|
|
|
if s.DryRun {
|
|
return
|
|
}
|
|
|
|
err := os.Remove(leftInPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed removing %s: %s\n", leftInPath, err)
|
|
}
|
|
err = os.Remove(rightInPath)
|
|
if err != nil && !os.IsNotExist(err) {
|
|
log.Printf("Failed removing %s: %s\n", rightInPath, err)
|
|
}
|
|
}
|
|
|
|
func (s *Step) ListChildren() []string {
|
|
s.OpenHandles <- 1
|
|
defer func() {
|
|
<-s.OpenHandles
|
|
}()
|
|
leftInPath := s.InputPath("", LEFT)
|
|
rightInPath := s.InputPath("", RIGHT)
|
|
results := make(map[string]bool)
|
|
leftFiles, err := os.ReadDir(leftInPath)
|
|
if err != nil {
|
|
log.Printf("Failed listing %s: %s\n", leftInPath, err)
|
|
}
|
|
rightFiles, err := os.ReadDir(rightInPath)
|
|
if err != nil {
|
|
log.Printf("Failed listing %s: %s\n", rightInPath, err)
|
|
}
|
|
for _, file := range leftFiles {
|
|
results[file.Name()] = true
|
|
}
|
|
for _, file := range rightFiles {
|
|
results[file.Name()] = true
|
|
}
|
|
out := make([]string, 0, len(results))
|
|
for file := range results {
|
|
out = append(out, file)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (s *Step) AreFilesIdentical(child string) bool {
|
|
s.OpenHandles <- 1
|
|
defer func() {
|
|
<-s.OpenHandles
|
|
}()
|
|
leftInPath := s.InputPath(child, LEFT)
|
|
rightInPath := s.InputPath(child, RIGHT)
|
|
leftInfo, err := os.Stat(leftInPath)
|
|
if err != nil {
|
|
log.Printf("Error statting path %s: %s\n", leftInPath, err)
|
|
return false
|
|
}
|
|
rightInfo, err := os.Stat(rightInPath)
|
|
if err != nil {
|
|
log.Printf("Error statting path %s: %s\n", rightInPath, err)
|
|
return false
|
|
}
|
|
if leftInfo.Size() != rightInfo.Size() {
|
|
return false
|
|
}
|
|
|
|
leftFile, err := os.OpenFile(leftInPath, os.O_RDONLY, 0)
|
|
if err != nil {
|
|
log.Printf("Error opening path %s: %s\n", leftInPath, err)
|
|
return false
|
|
}
|
|
defer func() {
|
|
err := leftFile.Close()
|
|
if err != nil {
|
|
log.Printf("Error closing %s: %s\n", leftInPath, err)
|
|
}
|
|
}()
|
|
rightFile, err := os.OpenFile(rightInPath, os.O_RDONLY, 0)
|
|
if err != nil {
|
|
log.Printf("Error opening path %s: %s\n", rightInPath, err)
|
|
return false
|
|
}
|
|
defer func() {
|
|
err := rightFile.Close()
|
|
if err != nil {
|
|
log.Printf("Error closing %s: %s\n", rightInPath, err)
|
|
}
|
|
}()
|
|
|
|
// 4 MB buffers
|
|
var leftData [4194304]byte
|
|
var rightData [4194304]byte
|
|
for {
|
|
leftRead, err := leftFile.Read(leftData[:])
|
|
if err != nil && err != io.EOF {
|
|
log.Printf("Error reading %s: %s\n", leftInPath, err)
|
|
return false
|
|
}
|
|
rightRead, err := rightFile.Read(rightData[:])
|
|
if err != nil && err != io.EOF {
|
|
log.Printf("Error reading %s: %s\n", rightInPath, err)
|
|
return false
|
|
}
|
|
if leftRead != rightRead {
|
|
return false
|
|
}
|
|
if !bytes.Equal(leftData[:leftRead], rightData[:rightRead]) {
|
|
return false
|
|
}
|
|
if err == io.EOF {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Step) Walk() {
|
|
wg := sync.WaitGroup{}
|
|
|
|
children := s.ListChildren()
|
|
s.Bar.AddTotal(int64(len(children)))
|
|
for _, child := range children {
|
|
rightPath := s.InputPath(child, RIGHT)
|
|
rightState, err := s.CheckState(rightPath)
|
|
if err != nil {
|
|
wg.Add(1)
|
|
go func(rightPath string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
log.Printf("Error statting path %s: %s\n", rightPath, err)
|
|
s.Bar.Increment()
|
|
}(rightPath)
|
|
continue
|
|
} else if rightState == UNKNOWN {
|
|
wg.Add(1)
|
|
go func(rightPath string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
log.Printf("Unknown stat value for path %s\n", rightPath)
|
|
s.Bar.Increment()
|
|
}(rightPath)
|
|
continue
|
|
} else if rightState == MISSING {
|
|
wg.Add(1)
|
|
go func(child string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
s.SeparateLeft(child)
|
|
s.Bar.Increment()
|
|
}(child)
|
|
continue
|
|
}
|
|
|
|
leftPath := s.InputPath(child, LEFT)
|
|
leftState, err := s.CheckState(leftPath)
|
|
if err != nil {
|
|
wg.Add(1)
|
|
go func(leftPath string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
log.Printf("Error statting path %s: %s\n", leftPath, err)
|
|
s.Bar.Increment()
|
|
}(leftPath)
|
|
continue
|
|
} else if leftState == UNKNOWN {
|
|
wg.Add(1)
|
|
go func(leftPath string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
log.Printf("Unknown stat value for path %s\n", leftPath)
|
|
s.Bar.Increment()
|
|
}(leftPath)
|
|
continue
|
|
} else if leftState == MISSING {
|
|
wg.Add(1)
|
|
go func(child string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
s.SeparateRight(child)
|
|
s.Bar.Increment()
|
|
}(child)
|
|
continue
|
|
}
|
|
|
|
switch rightState {
|
|
case FILE:
|
|
wg.Add(1)
|
|
go func(child string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
if leftState == FILE && s.AreFilesIdentical(child) {
|
|
s.Combine(child)
|
|
s.Bar.Increment()
|
|
} else {
|
|
s.Separate(child)
|
|
s.Bar.Increment()
|
|
}
|
|
}(child)
|
|
case DIRECTORY:
|
|
if leftState == DIRECTORY {
|
|
substep := Step{
|
|
TopLevel: s.TopLevel,
|
|
Subpath: path.Join(s.Subpath, child),
|
|
}
|
|
wg.Add(1)
|
|
go func(child string, substep *Step) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
s.MakeCombinedDir(child)
|
|
substep.Walk()
|
|
s.RemoveInputDirs(child)
|
|
s.Bar.Increment()
|
|
}(child, &substep)
|
|
} else {
|
|
wg.Add(1)
|
|
go func(child string) {
|
|
defer func() {
|
|
wg.Done()
|
|
}()
|
|
s.Separate(child)
|
|
s.Bar.Increment()
|
|
}(child)
|
|
}
|
|
default:
|
|
panic("Unexpected state")
|
|
}
|
|
}
|
|
|
|
wg.Wait()
|
|
}
|
|
|
|
func main() {
|
|
settings := TopLevel{
|
|
OpenHandles: make(chan int, 450),
|
|
Bar: pb.StartNew(1),
|
|
}
|
|
flag.StringVar(&settings.LeftInput,
|
|
"left-input", "./input/left", "The name of the left side of the input.")
|
|
flag.StringVar(&settings.RightInput,
|
|
"right-input", "./input/right", "The name of the right side of the input.")
|
|
flag.StringVar(&settings.LeftOutput,
|
|
"left-output", "./output/left", "The name of the left side of the output.")
|
|
flag.StringVar(&settings.CombinedOutput,
|
|
"combined-output", "./output/combined", "The name of the combined side of the output.")
|
|
flag.StringVar(&settings.RightOutput,
|
|
"right-output", "./output/right", "The name of the right side of the output.")
|
|
flag.BoolVar(&settings.DryRun,
|
|
"dry-run", true, "True if no actual operation should be performed.")
|
|
flag.Parse()
|
|
(&Step{
|
|
TopLevel: &settings,
|
|
Subpath: "",
|
|
}).Walk()
|
|
settings.Bar.Increment()
|
|
}
|
|
|