dharma/pkg/cli/testsite_test.go
Felipe M. 0ef15167d5
All checks were successful
ci/woodpecker/tag/release Pipeline was successful
initial release
2025-05-04 10:49:50 +02:00

196 lines
5.1 KiB
Go

package cli
import (
"bytes"
"fmt"
"os"
"strings"
"testing"
"git.nakama.town/fmartingr/dharma/pkg/reporter"
"git.nakama.town/fmartingr/dharma/pkg/scraper"
"git.nakama.town/fmartingr/dharma/pkg/testutil"
"github.com/spf13/cobra"
)
func TestIntegrationTestsite(t *testing.T) {
// Skip this test if running in CI environment or if it's a short test run
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
// Start the testsite server
serverURL, cleanup, err := testutil.StartTestsiteServer()
if err != nil {
t.Fatalf("Failed to start test server: %v", err)
}
defer cleanup()
// Test cases for CLI invocation with different flags
testCases := []struct {
name string
args []string
wantOutput bool
wantErrors bool
}{
{
name: "Basic scan",
args: []string{serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "JSON output",
args: []string{"--format", "json", serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "Internal links only",
args: []string{"--internal-only", serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "Custom depth",
args: []string{"--depth", "1", serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "Custom concurrency",
args: []string{"--concurrency", "1", serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "Custom timeout",
args: []string{"--timeout", "5", serverURL},
wantOutput: true,
wantErrors: true,
},
{
name: "CSV output",
args: []string{"--format", "csv", serverURL},
wantOutput: true,
wantErrors: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Temporarily replace stdout to capture output
oldStdout := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
// Save and restore rootCmd so state doesn't leak between test cases
origRootCmd := rootCmd
defer func() {
rootCmd = origRootCmd
os.Stdout = oldStdout
}()
// Reset rootCmd for this test
rootCmd = createRootCmd()
// Set the command line arguments
os.Args = append([]string{"dharma"}, tc.args...)
// Execute the CLI
err := Execute()
if err != nil {
t.Fatalf("Failed to execute command: %v", err)
}
// Restore stdout and read the output
w.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(r)
if err != nil {
t.Fatalf("Failed to read output: %v", err)
}
output := buf.String()
// Check if we got any output
if tc.wantOutput && output == "" {
t.Errorf("Expected output but got none")
}
// Check if errors were reported for known broken links
if tc.wantErrors {
if !strings.Contains(output, "not_found.html") {
t.Errorf("Expected not_found.html to be reported as broken in output")
}
}
// Specific checks for different formats
if strings.Contains(tc.name, "JSON") {
if !strings.Contains(output, `"url":`) {
t.Errorf("Expected JSON output with 'url' field")
}
} else if strings.Contains(tc.name, "CSV") {
if !strings.Contains(output, "Status,Type,URL,Source URL,Error") {
t.Errorf("Expected CSV header in output")
}
}
})
}
}
// createRootCmd returns a fresh instance of the root command
func createRootCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "dharma [URL]",
Short: "Scrape websites and check for broken links and references",
Long: `Dharma is a website link checker tool that crawls a website to find broken links, images, CSS references, and more. It generates a report of all issues found.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
url := args[0]
// Only print status message when using pretty format
if format == "pretty" {
if !strings.HasPrefix(url, "file://") {
fmt.Printf("Scanning website: %s\n", url)
} else {
fmt.Printf("Scanning local directory: %s\n", strings.TrimPrefix(url, "file://"))
}
} else {
// Force verbose off for non-pretty formats
verbose = false
}
// Create a new scraper
s := scraper.New(
scraper.WithConcurrency(concurrency),
scraper.WithDepth(depth),
scraper.WithTimeout(timeout),
scraper.WithVerbose(verbose),
scraper.WithInternalOnly(internalOnly),
)
// Run the scraper
results, err := s.Scan(url)
if err != nil {
return err
}
// Generate report
r, err := reporter.New(format)
if err != nil {
return err
}
return r.Generate(results, os.Stdout)
},
}
cmd.Flags().StringVarP(&format, "format", "f", "pretty", "Output format (pretty, json, csv)")
cmd.Flags().IntVarP(&concurrency, "concurrency", "c", 10, "Number of concurrent requests")
cmd.Flags().IntVarP(&depth, "depth", "d", 3, "Maximum depth to crawl")
cmd.Flags().IntVarP(&timeout, "timeout", "t", 10, "Timeout in seconds for each request")
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output")
cmd.Flags().BoolVarP(&internalOnly, "internal-only", "i", false, "Only check internal links (same hostname)")
return cmd
}