This commit is contained in:
commit
0ef15167d5
28 changed files with 2789 additions and 0 deletions
669
pkg/scraper/scraper.go
Normal file
669
pkg/scraper/scraper.go
Normal file
|
@ -0,0 +1,669 @@
|
|||
package scraper
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
urlRegexp = regexp.MustCompile(`url\(['"]?([^'")]+)['"]?\)`)
|
||||
importRegexp = regexp.MustCompile(`@import\s+['"]([^'"]+)['"]`)
|
||||
)
|
||||
|
||||
// Result represents a URL check result
|
||||
type Result struct {
|
||||
URL string `json:"url"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
Status int `json:"status"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Type string `json:"type"` // link, image, script, stylesheet, css-import
|
||||
IsExternal bool `json:"is_external"`
|
||||
}
|
||||
|
||||
// Results is a collection of Result
|
||||
type Results struct {
|
||||
BaseURL string `json:"base_url"`
|
||||
Errors []Result `json:"errors"`
|
||||
Successes []Result `json:"successes"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
|
||||
// QueueItem represents a URL to be processed along with its source
|
||||
type QueueItem struct {
|
||||
URL string
|
||||
SourceURL string
|
||||
Depth int
|
||||
}
|
||||
|
||||
// Scraper handles website crawling and link checking
|
||||
type Scraper struct {
|
||||
client *http.Client
|
||||
concurrency int
|
||||
depth int
|
||||
verbose bool
|
||||
internalOnly bool
|
||||
visitedURLs map[string]bool // URLs visited for crawling
|
||||
checkedURLs map[string]Result // URLs already checked to avoid duplicate requests
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// Option is a function option for the Scraper
|
||||
type Option func(*Scraper)
|
||||
|
||||
// WithConcurrency sets the concurrency level
|
||||
func WithConcurrency(concurrency int) Option {
|
||||
return func(s *Scraper) {
|
||||
s.concurrency = concurrency
|
||||
}
|
||||
}
|
||||
|
||||
// WithDepth sets the maximum crawling depth
|
||||
func WithDepth(depth int) Option {
|
||||
return func(s *Scraper) {
|
||||
s.depth = depth
|
||||
}
|
||||
}
|
||||
|
||||
// WithTimeout sets the timeout for HTTP requests
|
||||
func WithTimeout(timeoutSec int) Option {
|
||||
return func(s *Scraper) {
|
||||
s.client.Timeout = time.Duration(timeoutSec) * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
// WithVerbose enables verbose output
|
||||
func WithVerbose(verbose bool) Option {
|
||||
return func(s *Scraper) {
|
||||
s.verbose = verbose
|
||||
}
|
||||
}
|
||||
|
||||
// WithInternalOnly sets whether to only check internal links
|
||||
func WithInternalOnly(internalOnly bool) Option {
|
||||
return func(s *Scraper) {
|
||||
s.internalOnly = internalOnly
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new Scraper with the given options
|
||||
func New(options ...Option) *Scraper {
|
||||
s := &Scraper{
|
||||
client: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("too many redirects")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
concurrency: 10,
|
||||
depth: 3,
|
||||
visitedURLs: make(map[string]bool),
|
||||
checkedURLs: make(map[string]Result),
|
||||
}
|
||||
|
||||
for _, option := range options {
|
||||
option(s)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Scan starts the website crawling process
|
||||
func (s *Scraper) Scan(baseURL string) (*Results, error) {
|
||||
parsedURL, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid URL: %w", err)
|
||||
}
|
||||
|
||||
// Ensure the base URL has a scheme
|
||||
if parsedURL.Scheme == "" {
|
||||
parsedURL.Scheme = "https"
|
||||
baseURL = parsedURL.String()
|
||||
}
|
||||
|
||||
// Store the base hostname for distinguishing internal vs external links
|
||||
baseHostname := parsedURL.Hostname()
|
||||
|
||||
results := &Results{
|
||||
BaseURL: baseURL,
|
||||
}
|
||||
|
||||
// Create a waitgroup to track active workers
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Create a channel to communicate URLs to process
|
||||
queue := make(chan QueueItem, 1000)
|
||||
|
||||
// Create a channel to track active URL processing
|
||||
activeCount := make(chan int, 1)
|
||||
activeCount <- 1 // Start with 1 active URL (the base URL)
|
||||
|
||||
// Start worker pool
|
||||
for range s.concurrency {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for item := range queue {
|
||||
s.processURL(item.URL, item.SourceURL, baseHostname, item.Depth, queue, results, activeCount)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Initial URL to process - the source is itself for the initial URL
|
||||
queue <- QueueItem{
|
||||
URL: baseURL,
|
||||
SourceURL: baseURL,
|
||||
Depth: 0,
|
||||
}
|
||||
|
||||
// Monitor active count - when it reaches 0, we're done
|
||||
go func() {
|
||||
for {
|
||||
count := <-activeCount
|
||||
if count <= 0 {
|
||||
close(queue)
|
||||
return
|
||||
}
|
||||
activeCount <- count
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for workers to finish
|
||||
wg.Wait()
|
||||
|
||||
results.Total = len(results.Errors) + len(results.Successes)
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// processURL processes a single URL
|
||||
func (s *Scraper) processURL(currentURL, sourceURL string, baseHostname string, depth int, queue chan<- QueueItem, results *Results, activeCount chan int) {
|
||||
// Decrement active count when done
|
||||
defer func() {
|
||||
count := <-activeCount
|
||||
activeCount <- count - 1
|
||||
}()
|
||||
|
||||
// Check if we've already visited this URL (for crawling) or exceeded max depth
|
||||
s.mu.Lock()
|
||||
if s.visitedURLs[currentURL] || depth > s.depth {
|
||||
s.mu.Unlock()
|
||||
return
|
||||
}
|
||||
s.visitedURLs[currentURL] = true
|
||||
|
||||
// If we've already checked this URL's status, reuse the result
|
||||
if result, exists := s.checkedURLs[currentURL]; exists {
|
||||
// Always use the provided source URL
|
||||
result.SourceURL = sourceURL
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
|
||||
// We still need to parse HTML/CSS content even if we've checked the URL before
|
||||
// But only if it was successful
|
||||
if result.Error == "" && result.Status < 400 {
|
||||
// Continue with content parsing...
|
||||
} else {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
if s.verbose {
|
||||
fmt.Printf("Checking: %s (depth: %d) [source: %s]\n", currentURL, depth, sourceURL)
|
||||
}
|
||||
|
||||
// Parse the current URL
|
||||
currentParsed, err := url.Parse(currentURL)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: currentURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: fmt.Sprintf("Invalid URL: %v", err),
|
||||
Type: "link",
|
||||
IsExternal: false,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[currentURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
|
||||
// Determine if the URL is internal or external
|
||||
isExternal := currentParsed.Hostname() != baseHostname && currentParsed.Hostname() != ""
|
||||
|
||||
// Skip external links processing if internalOnly is set
|
||||
if isExternal && s.internalOnly {
|
||||
return
|
||||
}
|
||||
|
||||
// Process external links differently from internal links
|
||||
if isExternal {
|
||||
s.checkExternalURL(currentURL, sourceURL, results)
|
||||
return
|
||||
}
|
||||
|
||||
// Internal URL, check and crawl
|
||||
resp, err := s.client.Get(currentURL)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: currentURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: err.Error(),
|
||||
Type: "link",
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[currentURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Add the result
|
||||
var result Result
|
||||
if resp.StatusCode >= 400 {
|
||||
result = Result{
|
||||
URL: currentURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Error: fmt.Sprintf("HTTP Error: %s", resp.Status),
|
||||
Type: "link",
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[currentURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
} else {
|
||||
result = Result{
|
||||
URL: currentURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Type: "link",
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[currentURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
}
|
||||
|
||||
// Only parse HTML and CSS from internal links
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
// Use the base hostname to create a base URL for this site
|
||||
baseURL := ""
|
||||
if currentParsed.Scheme != "" && currentParsed.Host != "" {
|
||||
baseURL = fmt.Sprintf("%s://%s", currentParsed.Scheme, currentParsed.Host)
|
||||
}
|
||||
|
||||
foundURLs := s.parseHTML(currentURL, resp, baseURL, baseHostname, depth+1, results)
|
||||
// Add all found URLs to the queue and increment active count
|
||||
if len(foundURLs) > 0 {
|
||||
count := <-activeCount
|
||||
count += len(foundURLs)
|
||||
activeCount <- count
|
||||
|
||||
for _, url := range foundURLs {
|
||||
queue <- QueueItem{
|
||||
URL: url,
|
||||
SourceURL: currentURL, // The source URL is the current page we're processing
|
||||
Depth: depth + 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(contentType, "text/css") {
|
||||
// Use the base hostname to create a base URL for this site
|
||||
baseURL := ""
|
||||
if currentParsed.Scheme != "" && currentParsed.Host != "" {
|
||||
baseURL = fmt.Sprintf("%s://%s", currentParsed.Scheme, currentParsed.Host)
|
||||
}
|
||||
|
||||
s.parseCSS(currentURL, resp, baseURL, baseHostname, results)
|
||||
}
|
||||
}
|
||||
|
||||
// parseHTML extracts links and other resources from HTML
|
||||
func (s *Scraper) parseHTML(sourceURL string, resp *http.Response, baseURL, baseHostname string, _ int, results *Results) []string {
|
||||
foundURLs := []string{}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
s.addResult(results, Result{
|
||||
URL: sourceURL,
|
||||
SourceURL: sourceURL, // Use self as source for error
|
||||
Error: fmt.Sprintf("Failed to parse HTML: %v", err),
|
||||
Status: resp.StatusCode,
|
||||
Type: "html",
|
||||
IsExternal: false,
|
||||
})
|
||||
return foundURLs
|
||||
}
|
||||
|
||||
// Process links (a href)
|
||||
doc.Find("a").Each(func(i int, sel *goquery.Selection) {
|
||||
if href, exists := sel.Attr("href"); exists {
|
||||
targetURL := s.resolveURL(href, sourceURL)
|
||||
if targetURL != "" {
|
||||
linkParsed, err := url.Parse(targetURL)
|
||||
if err == nil {
|
||||
isExternal := linkParsed.Hostname() != baseHostname && linkParsed.Hostname() != ""
|
||||
|
||||
// Only add internal links to the crawl queue
|
||||
if !isExternal {
|
||||
foundURLs = append(foundURLs, targetURL)
|
||||
} else if !s.internalOnly {
|
||||
// Check external links only if internalOnly is false
|
||||
s.checkExternalURL(targetURL, sourceURL, results)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Process images
|
||||
doc.Find("img").Each(func(i int, sel *goquery.Selection) {
|
||||
if src, exists := sel.Attr("src"); exists {
|
||||
targetURL := s.resolveURL(src, sourceURL)
|
||||
if targetURL != "" {
|
||||
s.checkResource(targetURL, sourceURL, "image", baseHostname, results)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Process stylesheets
|
||||
doc.Find("link[rel='stylesheet']").Each(func(i int, sel *goquery.Selection) {
|
||||
if href, exists := sel.Attr("href"); exists {
|
||||
targetURL := s.resolveURL(href, sourceURL)
|
||||
if targetURL != "" {
|
||||
s.checkResource(targetURL, sourceURL, "stylesheet", baseHostname, results)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Process scripts
|
||||
doc.Find("script").Each(func(i int, sel *goquery.Selection) {
|
||||
if src, exists := sel.Attr("src"); exists {
|
||||
targetURL := s.resolveURL(src, sourceURL)
|
||||
if targetURL != "" {
|
||||
s.checkResource(targetURL, sourceURL, "script", baseHostname, results)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return foundURLs
|
||||
}
|
||||
|
||||
// parseCSS extracts URLs from CSS content
|
||||
func (s *Scraper) parseCSS(sourceURL string, resp *http.Response, baseURL, baseHostname string, results *Results) {
|
||||
// Simple regex-based parsing for CSS imports and url() references
|
||||
// This is a simplified approach; a proper CSS parser would be better
|
||||
// for production use
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// Look for url() references
|
||||
urlMatches := urlRegexp.FindAllStringSubmatch(line, -1)
|
||||
for _, match := range urlMatches {
|
||||
if len(match) > 1 {
|
||||
// Remove quotes if present
|
||||
urlValue := strings.Trim(match[1], "'\"")
|
||||
targetURL := s.resolveURL(urlValue, sourceURL)
|
||||
if targetURL != "" {
|
||||
s.checkResource(targetURL, sourceURL, "css-url", baseHostname, results)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for @import statements
|
||||
importMatches := importRegexp.FindAllStringSubmatch(line, -1)
|
||||
for _, match := range importMatches {
|
||||
if len(match) > 1 {
|
||||
// Remove quotes if present
|
||||
urlValue := strings.Trim(match[1], "'\"")
|
||||
targetURL := s.resolveURL(urlValue, sourceURL)
|
||||
if targetURL != "" {
|
||||
s.checkResource(targetURL, sourceURL, "css-import", baseHostname, results)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resolveURL resolves a relative URL against a base URL
|
||||
func (s *Scraper) resolveURL(href, sourceURL string) string {
|
||||
// Skip empty URLs, anchors, and javascript
|
||||
if href == "" || strings.HasPrefix(href, "#") || strings.HasPrefix(href, "javascript:") {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Skip non-HTTP protocols like mailto:, tel:, etc.
|
||||
if strings.HasPrefix(href, "mailto:") ||
|
||||
strings.HasPrefix(href, "tel:") ||
|
||||
strings.HasPrefix(href, "sms:") ||
|
||||
strings.HasPrefix(href, "ftp:") ||
|
||||
strings.HasPrefix(href, "file:") {
|
||||
return ""
|
||||
}
|
||||
|
||||
sourceParsed, err := url.Parse(sourceURL)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
targetParsed, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// If the scheme is not HTTP/HTTPS, skip it
|
||||
if targetParsed.Scheme != "" &&
|
||||
targetParsed.Scheme != "http" &&
|
||||
targetParsed.Scheme != "https" {
|
||||
return ""
|
||||
}
|
||||
|
||||
resolvedURL := sourceParsed.ResolveReference(targetParsed).String()
|
||||
return resolvedURL
|
||||
}
|
||||
|
||||
// checkExternalURL performs a HEAD request to check external URLs
|
||||
func (s *Scraper) checkExternalURL(targetURL, sourceURL string, results *Results) {
|
||||
// Skip external links if internalOnly is set
|
||||
if s.internalOnly {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if URL was already checked
|
||||
s.mu.Lock()
|
||||
if result, exists := s.checkedURLs[targetURL]; exists {
|
||||
// Add the existing result with the current source URL
|
||||
result.SourceURL = sourceURL
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
req, err := http.NewRequest("HEAD", targetURL, nil)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: fmt.Sprintf("Invalid URL: %v", err),
|
||||
Type: "external-link",
|
||||
IsExternal: true,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: err.Error(),
|
||||
Type: "external-link",
|
||||
IsExternal: true,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result Result
|
||||
if resp.StatusCode >= 400 {
|
||||
result = Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Error: fmt.Sprintf("HTTP Error: %s", resp.Status),
|
||||
Type: "external-link",
|
||||
IsExternal: true,
|
||||
}
|
||||
} else {
|
||||
result = Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Type: "external-link",
|
||||
IsExternal: true,
|
||||
}
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
}
|
||||
|
||||
// checkResource checks if a resource URL is accessible
|
||||
func (s *Scraper) checkResource(targetURL, sourceURL, resourceType, baseHostname string, results *Results) {
|
||||
// Parse the target URL to determine if it's internal or external
|
||||
targetParsed, err := url.Parse(targetURL)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: fmt.Sprintf("Invalid URL: %v", err),
|
||||
Type: resourceType,
|
||||
IsExternal: false,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
|
||||
isExternal := targetParsed.Hostname() != baseHostname && targetParsed.Hostname() != ""
|
||||
|
||||
// Skip external resources if internalOnly is set
|
||||
if isExternal && s.internalOnly {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if URL was already checked
|
||||
s.mu.Lock()
|
||||
if result, exists := s.checkedURLs[targetURL]; exists {
|
||||
// Update with current source and type if needed
|
||||
result.SourceURL = sourceURL
|
||||
result.Type = resourceType
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
req, err := http.NewRequest("HEAD", targetURL, nil)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: fmt.Sprintf("Invalid URL: %v", err),
|
||||
Type: resourceType,
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
result := Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Error: err.Error(),
|
||||
Type: resourceType,
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result Result
|
||||
if resp.StatusCode >= 400 {
|
||||
result = Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Error: fmt.Sprintf("HTTP Error: %s", resp.Status),
|
||||
Type: resourceType,
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
} else {
|
||||
result = Result{
|
||||
URL: targetURL,
|
||||
SourceURL: sourceURL,
|
||||
Status: resp.StatusCode,
|
||||
Type: resourceType,
|
||||
IsExternal: isExternal,
|
||||
}
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
s.checkedURLs[targetURL] = result
|
||||
s.mu.Unlock()
|
||||
s.addResult(results, result)
|
||||
}
|
||||
|
||||
// addResult adds a result to the appropriate list
|
||||
func (s *Scraper) addResult(results *Results, result Result) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
if result.Error != "" {
|
||||
results.Errors = append(results.Errors, result)
|
||||
} else {
|
||||
results.Successes = append(results.Successes, result)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue