196 lines
5.1 KiB
Go
196 lines
5.1 KiB
Go
package cli
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
"git.nakama.town/fmartingr/dharma/pkg/reporter"
|
|
"git.nakama.town/fmartingr/dharma/pkg/scraper"
|
|
"git.nakama.town/fmartingr/dharma/pkg/testutil"
|
|
"github.com/spf13/cobra"
|
|
)
|
|
|
|
func TestIntegrationTestsite(t *testing.T) {
|
|
// Skip this test if running in CI environment or if it's a short test run
|
|
if testing.Short() {
|
|
t.Skip("Skipping integration test in short mode")
|
|
}
|
|
|
|
// Start the testsite server
|
|
serverURL, cleanup, err := testutil.StartTestsiteServer()
|
|
if err != nil {
|
|
t.Fatalf("Failed to start test server: %v", err)
|
|
}
|
|
defer cleanup()
|
|
|
|
// Test cases for CLI invocation with different flags
|
|
testCases := []struct {
|
|
name string
|
|
args []string
|
|
wantOutput bool
|
|
wantErrors bool
|
|
}{
|
|
{
|
|
name: "Basic scan",
|
|
args: []string{serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "JSON output",
|
|
args: []string{"--format", "json", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "Internal links only",
|
|
args: []string{"--internal-only", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "Custom depth",
|
|
args: []string{"--depth", "1", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "Custom concurrency",
|
|
args: []string{"--concurrency", "1", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "Custom timeout",
|
|
args: []string{"--timeout", "5", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
{
|
|
name: "CSV output",
|
|
args: []string{"--format", "csv", serverURL},
|
|
wantOutput: true,
|
|
wantErrors: true,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
// Temporarily replace stdout to capture output
|
|
oldStdout := os.Stdout
|
|
r, w, _ := os.Pipe()
|
|
os.Stdout = w
|
|
|
|
// Save and restore rootCmd so state doesn't leak between test cases
|
|
origRootCmd := rootCmd
|
|
defer func() {
|
|
rootCmd = origRootCmd
|
|
os.Stdout = oldStdout
|
|
}()
|
|
|
|
// Reset rootCmd for this test
|
|
rootCmd = createRootCmd()
|
|
|
|
// Set the command line arguments
|
|
os.Args = append([]string{"dharma"}, tc.args...)
|
|
|
|
// Execute the CLI
|
|
err := Execute()
|
|
if err != nil {
|
|
t.Fatalf("Failed to execute command: %v", err)
|
|
}
|
|
|
|
// Restore stdout and read the output
|
|
w.Close()
|
|
var buf bytes.Buffer
|
|
_, err = buf.ReadFrom(r)
|
|
if err != nil {
|
|
t.Fatalf("Failed to read output: %v", err)
|
|
}
|
|
output := buf.String()
|
|
|
|
// Check if we got any output
|
|
if tc.wantOutput && output == "" {
|
|
t.Errorf("Expected output but got none")
|
|
}
|
|
|
|
// Check if errors were reported for known broken links
|
|
if tc.wantErrors {
|
|
if !strings.Contains(output, "not_found.html") {
|
|
t.Errorf("Expected not_found.html to be reported as broken in output")
|
|
}
|
|
}
|
|
|
|
// Specific checks for different formats
|
|
if strings.Contains(tc.name, "JSON") {
|
|
if !strings.Contains(output, `"url":`) {
|
|
t.Errorf("Expected JSON output with 'url' field")
|
|
}
|
|
} else if strings.Contains(tc.name, "CSV") {
|
|
if !strings.Contains(output, "Status,Type,URL,Source URL,Error") {
|
|
t.Errorf("Expected CSV header in output")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// createRootCmd returns a fresh instance of the root command
|
|
func createRootCmd() *cobra.Command {
|
|
cmd := &cobra.Command{
|
|
Use: "dharma [URL]",
|
|
Short: "Scrape websites and check for broken links and references",
|
|
Long: `Dharma is a website link checker tool that crawls a website to find broken links, images, CSS references, and more. It generates a report of all issues found.`,
|
|
Args: cobra.ExactArgs(1),
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
url := args[0]
|
|
|
|
// Only print status message when using pretty format
|
|
if format == "pretty" {
|
|
if !strings.HasPrefix(url, "file://") {
|
|
fmt.Printf("Scanning website: %s\n", url)
|
|
} else {
|
|
fmt.Printf("Scanning local directory: %s\n", strings.TrimPrefix(url, "file://"))
|
|
}
|
|
} else {
|
|
// Force verbose off for non-pretty formats
|
|
verbose = false
|
|
}
|
|
|
|
// Create a new scraper
|
|
s := scraper.New(
|
|
scraper.WithConcurrency(concurrency),
|
|
scraper.WithDepth(depth),
|
|
scraper.WithTimeout(timeout),
|
|
scraper.WithVerbose(verbose),
|
|
scraper.WithInternalOnly(internalOnly),
|
|
)
|
|
|
|
// Run the scraper
|
|
results, err := s.Scan(url)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Generate report
|
|
r, err := reporter.New(format)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return r.Generate(results, os.Stdout)
|
|
},
|
|
}
|
|
|
|
cmd.Flags().StringVarP(&format, "format", "f", "pretty", "Output format (pretty, json, csv)")
|
|
cmd.Flags().IntVarP(&concurrency, "concurrency", "c", 10, "Number of concurrent requests")
|
|
cmd.Flags().IntVarP(&depth, "depth", "d", 3, "Maximum depth to crawl")
|
|
cmd.Flags().IntVarP(&timeout, "timeout", "t", 10, "Timeout in seconds for each request")
|
|
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output")
|
|
cmd.Flags().BoolVarP(&internalOnly, "internal-only", "i", false, "Only check internal links (same hostname)")
|
|
|
|
return cmd
|
|
}
|