package cli import ( "bytes" "fmt" "os" "strings" "testing" "git.nakama.town/fmartingr/dharma/pkg/reporter" "git.nakama.town/fmartingr/dharma/pkg/scraper" "git.nakama.town/fmartingr/dharma/pkg/testutil" "github.com/spf13/cobra" ) func TestIntegrationTestsite(t *testing.T) { // Skip this test if running in CI environment or if it's a short test run if testing.Short() { t.Skip("Skipping integration test in short mode") } // Start the testsite server serverURL, cleanup, err := testutil.StartTestsiteServer() if err != nil { t.Fatalf("Failed to start test server: %v", err) } defer cleanup() // Test cases for CLI invocation with different flags testCases := []struct { name string args []string wantOutput bool wantErrors bool }{ { name: "Basic scan", args: []string{serverURL}, wantOutput: true, wantErrors: true, }, { name: "JSON output", args: []string{"--format", "json", serverURL}, wantOutput: true, wantErrors: true, }, { name: "Internal links only", args: []string{"--internal-only", serverURL}, wantOutput: true, wantErrors: true, }, { name: "Custom depth", args: []string{"--depth", "1", serverURL}, wantOutput: true, wantErrors: true, }, { name: "Custom concurrency", args: []string{"--concurrency", "1", serverURL}, wantOutput: true, wantErrors: true, }, { name: "Custom timeout", args: []string{"--timeout", "5", serverURL}, wantOutput: true, wantErrors: true, }, { name: "CSV output", args: []string{"--format", "csv", serverURL}, wantOutput: true, wantErrors: true, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { // Temporarily replace stdout to capture output oldStdout := os.Stdout r, w, _ := os.Pipe() os.Stdout = w // Save and restore rootCmd so state doesn't leak between test cases origRootCmd := rootCmd defer func() { rootCmd = origRootCmd os.Stdout = oldStdout }() // Reset rootCmd for this test rootCmd = createRootCmd() // Set the command line arguments os.Args = append([]string{"dharma"}, tc.args...) // Execute the CLI err := Execute() if err != nil { t.Fatalf("Failed to execute command: %v", err) } // Restore stdout and read the output w.Close() var buf bytes.Buffer _, err = buf.ReadFrom(r) if err != nil { t.Fatalf("Failed to read output: %v", err) } output := buf.String() // Check if we got any output if tc.wantOutput && output == "" { t.Errorf("Expected output but got none") } // Check if errors were reported for known broken links if tc.wantErrors { if !strings.Contains(output, "not_found.html") { t.Errorf("Expected not_found.html to be reported as broken in output") } } // Specific checks for different formats if strings.Contains(tc.name, "JSON") { if !strings.Contains(output, `"url":`) { t.Errorf("Expected JSON output with 'url' field") } } else if strings.Contains(tc.name, "CSV") { if !strings.Contains(output, "Status,Type,URL,Source URL,Error") { t.Errorf("Expected CSV header in output") } } }) } } // createRootCmd returns a fresh instance of the root command func createRootCmd() *cobra.Command { cmd := &cobra.Command{ Use: "dharma [URL]", Short: "Scrape websites and check for broken links and references", Long: `Dharma is a website link checker tool that crawls a website to find broken links, images, CSS references, and more. It generates a report of all issues found.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { url := args[0] // Only print status message when using pretty format if format == "pretty" { if !strings.HasPrefix(url, "file://") { fmt.Printf("Scanning website: %s\n", url) } else { fmt.Printf("Scanning local directory: %s\n", strings.TrimPrefix(url, "file://")) } } else { // Force verbose off for non-pretty formats verbose = false } // Create a new scraper s := scraper.New( scraper.WithConcurrency(concurrency), scraper.WithDepth(depth), scraper.WithTimeout(timeout), scraper.WithVerbose(verbose), scraper.WithInternalOnly(internalOnly), ) // Run the scraper results, err := s.Scan(url) if err != nil { return err } // Generate report r, err := reporter.New(format) if err != nil { return err } return r.Generate(results, os.Stdout) }, } cmd.Flags().StringVarP(&format, "format", "f", "pretty", "Output format (pretty, json, csv)") cmd.Flags().IntVarP(&concurrency, "concurrency", "c", 10, "Number of concurrent requests") cmd.Flags().IntVarP(&depth, "depth", "d", 3, "Maximum depth to crawl") cmd.Flags().IntVarP(&timeout, "timeout", "t", 10, "Timeout in seconds for each request") cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output") cmd.Flags().BoolVarP(&internalOnly, "internal-only", "i", false, "Only check internal links (same hostname)") return cmd }