dharma/pkg/scraper/testsite_test.go
Felipe M. 0ef15167d5
All checks were successful
ci/woodpecker/tag/release Pipeline was successful
initial release
2025-05-04 10:49:50 +02:00

206 lines
5 KiB
Go

package scraper
import (
"strings"
"testing"
"git.nakama.town/fmartingr/dharma/pkg/testutil"
)
func TestTestsiteIntegration(t *testing.T) {
// Skip if running short tests
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
// Start the testsite server
serverURL, cleanup, err := testutil.StartTestsiteServer()
if err != nil {
t.Fatalf("Failed to start test server: %v", err)
}
defer cleanup()
// Create a new scraper with default settings
s := New(
WithConcurrency(2),
WithDepth(3),
WithTimeout(5),
WithVerbose(false),
WithInternalOnly(true),
)
// Run the scraper
results, err := s.Scan(serverURL)
if err != nil {
t.Fatalf("Scraper.Scan failed: %v", err)
}
// Verify we have results
if results == nil {
t.Fatal("Expected results but got nil")
}
// Check that we have the correct base URL
if results.BaseURL != serverURL {
t.Errorf("Expected BaseURL to be %s, got %s", serverURL, results.BaseURL)
}
// Test cases for specific URLs that should be found
expectedURLs := map[string]bool{
"/found.html": true,
"/not_found.html": false,
"/rel/index.html": true,
"/rel/relfound.html": true,
"/static/style.css": true,
"/static/script.js": true,
}
// Check for expected URLs in the results
for urlPath, shouldExist := range expectedURLs {
fullURL := serverURL + urlPath
found := false
// Look in both successes and errors
for _, result := range results.Successes {
if result.URL == fullURL {
found = true
if !shouldExist {
t.Errorf("URL %s should not exist but was found in successes", urlPath)
}
break
}
}
if !found && shouldExist {
// If not found in successes, check if it's in errors
for _, result := range results.Errors {
if result.URL == fullURL {
found = true
t.Errorf("URL %s should exist but was found in errors", urlPath)
break
}
}
if !found {
t.Errorf("Expected URL %s was not found in results", urlPath)
}
}
}
// Check that not_found.html is in errors
notFoundURL := serverURL + "/not_found.html"
foundInErrors := false
for _, result := range results.Errors {
if result.URL == notFoundURL {
foundInErrors = true
break
}
}
if !foundInErrors {
t.Errorf("Expected %s to be in errors but it wasn't", notFoundURL)
}
// Verify relative links in the rel directory
relNotFoundURL := serverURL + "/rel/rel_not_found.html"
foundRelNotFound := false
for _, result := range results.Errors {
if result.URL == relNotFoundURL {
foundRelNotFound = true
break
}
}
if !foundRelNotFound {
t.Errorf("Expected %s to be in errors but it wasn't", relNotFoundURL)
}
// Check for missing image
missingImageURL := serverURL + "/rel/image-404.jpg"
foundMissingImage := false
for _, result := range results.Errors {
if result.URL == missingImageURL {
foundMissingImage = true
break
}
}
if !foundMissingImage {
t.Errorf("Expected %s to be in errors but it wasn't", missingImageURL)
}
// Check for external links
externalLinkCount := 0
for _, result := range results.Successes {
if strings.Contains(result.URL, "fmartingr.com") {
externalLinkCount++
}
}
if externalLinkCount != 0 {
t.Errorf("Found %d external links but should be 0 with internalOnly=true", externalLinkCount)
}
// Verify total count
expectedTotal := len(results.Successes) + len(results.Errors)
if results.Total != expectedTotal {
t.Errorf("Expected Total to be %d, got %d", expectedTotal, results.Total)
}
}
func TestTestsiteWithExternalLinks(t *testing.T) {
// Skip if running short tests
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
// Start the testsite server
serverURL, cleanup, err := testutil.StartTestsiteServer()
if err != nil {
t.Fatalf("Failed to start test server: %v", err)
}
defer cleanup()
// Create a new scraper with external links allowed
s := New(
WithConcurrency(2),
WithDepth(1), // Lower depth for external links test
WithTimeout(5),
WithVerbose(false),
WithInternalOnly(false), // Allow external links
)
// Run the scraper
results, err := s.Scan(serverURL)
if err != nil {
t.Fatalf("Scraper.Scan failed: %v", err)
}
// Check for external links - we should find at least one external link
foundExternalLinks := false
brokenExternalLinks := false
for _, result := range results.Successes {
if result.IsExternal && strings.Contains(result.URL, "fmartingr.com") {
foundExternalLinks = true
break
}
}
for _, result := range results.Errors {
if result.IsExternal && strings.Contains(result.URL, "e3H7iaV685rbH7R5lBNxgpietP7JTnMeknmi9SNAEUT4XSiH2sET6ixAcjhy4CAi") {
brokenExternalLinks = true
break
}
}
// We don't actually hit external URLs in tests, so we can't assert on them,
// but we can check that they're properly identified
if !foundExternalLinks {
t.Log("No successful external links found in test - this is expected in CI/isolated test environments")
}
if !brokenExternalLinks {
t.Log("No broken external links found in test - this is expected in CI/isolated test environments")
}
}