diff --git a/internal/plugin/fun/hltb.go b/internal/plugin/fun/hltb.go index 4c92052..7da29e6 100644 --- a/internal/plugin/fun/hltb.go +++ b/internal/plugin/fun/hltb.go @@ -1,12 +1,13 @@ package fun import ( - "bytes" "encoding/json" "fmt" "io" "net/http" + "net/url" "regexp" + "strconv" "strings" "time" @@ -20,25 +21,26 @@ type HLTBPlugin struct { httpClient *http.Client } +// HLTBNextData represents the Next.js data structure from the page +type HLTBNextData struct { + Props struct { + PageProps struct { + SearchResults []HLTBGame `json:"searchResults"` + } `json:"pageProps"` + } `json:"props"` +} + // HLTBGame represents a game from HowLongToBeat type HLTBGame struct { - ID int `json:"game_id"` - Name string `json:"game_name"` - GameAlias string `json:"game_alias"` - GameImage string `json:"game_image"` - CompMain int `json:"comp_main"` - CompPlus int `json:"comp_plus"` - CompComplete int `json:"comp_complete"` - CompAll int `json:"comp_all"` - InvestedCo int `json:"invested_co"` - InvestedMp int `json:"invested_mp"` - CountComp int `json:"count_comp"` - CountSpeedruns int `json:"count_speedruns"` - CountBacklog int `json:"count_backlog"` - CountReview int `json:"count_review"` - ReviewScore int `json:"review_score"` - CountPlaying int `json:"count_playing"` - CountRetired int `json:"count_retired"` + ID int `json:"game_id"` + Name string `json:"game_name"` + GameAlias string `json:"game_alias"` + GameImage string `json:"game_image"` + CompMain int `json:"comp_main"` + CompPlus int `json:"comp_plus"` + CompComplete int `json:"comp_100"` + CompAll int `json:"comp_all"` + ReviewScore int `json:"review_score"` } // NewHLTB creates a new HLTBPlugin instance @@ -130,95 +132,27 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{} return []*model.MessageAction{action} } -// searchGame searches for a game on HowLongToBeat using the API +// searchGame searches for a game on HowLongToBeat using the new web scraping approach func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { - // Only the seek token endpoint works now - return p.searchWithSeekToken(gameName) -} - -// searchWithSeekToken attempts to search using the seek token approach -func (p *HLTBPlugin) searchWithSeekToken(gameName string) ([]HLTBGame, error) { - // Get the seek token from the main page - seekToken, err := p.getSeekToken() - if err != nil { - return nil, fmt.Errorf("failed to get seek token: %w", err) - } - - // Split search terms by words - searchTerms := strings.Fields(gameName) - - // Create search URL with seek token - searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken) - - // Prepare search request - searchRequest := map[string]interface{}{ - "searchType": "games", - "searchTerms": searchTerms, - "searchPage": 1, - "size": 20, - "searchOptions": map[string]interface{}{ - "games": map[string]interface{}{ - "userId": 0, - "platform": "", - "sortCategory": "popular", - "rangeCategory": "main", - "rangeTime": map[string]interface{}{ - "min": nil, - "max": nil, - }, - "gameplay": map[string]interface{}{ - "perspective": "", - "flow": "", - "genre": "", - "difficulty": "", - }, - "rangeYear": map[string]interface{}{ - "min": "", - "max": "", - }, - "modifier": "", - }, - "users": map[string]interface{}{ - "sortCategory": "postcount", - }, - "lists": map[string]interface{}{ - "sortCategory": "follows", - }, - "filter": "", - "sort": 0, - "randomizer": 0, - }, - "useCache": true, - } - - return p.performAPISearch(searchURL, searchRequest) -} - -// performAPISearch performs the actual API search request -func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string]interface{}) ([]HLTBGame, error) { - // Convert to JSON - jsonData, err := json.Marshal(searchRequest) - if err != nil { - return nil, fmt.Errorf("failed to marshal search request: %w", err) - } + // Create search URL with query parameter + searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName)) // Create HTTP request - req, err := http.NewRequest("POST", searchURL, bytes.NewBuffer(jsonData)) + req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - // Set headers to match the working curl request - req.Header.Set("Accept", "*/*") + // Set headers to match browser request + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Cache-Control", "no-cache") - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Origin", "https://howlongtobeat.com") req.Header.Set("Pragma", "no-cache") - req.Header.Set("Referer", "https://howlongtobeat.com/") - req.Header.Set("Sec-Fetch-Dest", "empty") - req.Header.Set("Sec-Fetch-Mode", "cors") - req.Header.Set("Sec-Fetch-Site", "same-origin") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + req.Header.Set("Upgrade-Insecure-Requests", "1") req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") // Send request @@ -231,7 +165,7 @@ func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode) + return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode) } // Read response body @@ -240,24 +174,105 @@ func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string return nil, fmt.Errorf("failed to read response: %w", err) } - // Parse response - var searchResponse struct { - Color string `json:"color"` - Title string `json:"title"` - Category string `json:"category"` - Count int `json:"count"` - Pagecurrent int `json:"pagecurrent"` - Pagesize int `json:"pagesize"` - Pagetotal int `json:"pagetotal"` - SearchTerm string `json:"searchTerm"` - SearchResults []HLTBGame `json:"data"` + // Parse games from HTML + games, err := p.parseGamesFromHTML(string(body)) + if err != nil { + return nil, fmt.Errorf("failed to parse games from HTML: %w", err) } - if err := json.Unmarshal(body, &searchResponse); err != nil { - return nil, fmt.Errorf("failed to parse response: %w", err) + return games, nil +} + +// parseGamesFromHTML extracts game information from the HTML response +func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) { + var games []HLTBGame + + // First, try to parse from the __NEXT_DATA__ JSON + jsonDataPattern := `` + jsonRegex := regexp.MustCompile(jsonDataPattern) + jsonMatches := jsonRegex.FindStringSubmatch(html) + + if len(jsonMatches) > 1 { + var nextData HLTBNextData + if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil { + // Try to extract search results from the Next.js data + if nextData.Props.PageProps.SearchResults != nil { + return nextData.Props.PageProps.SearchResults, nil + } + } } - return searchResponse.SearchResults, nil + // Fallback to HTML parsing for game cards + // Pattern to match game cards in the HTML + gameCardPattern := `
  • ]*>.*?href="/game/(\d+)"[^>]*>([^<]+).*?
  • ` + gameCardRegex := regexp.MustCompile(gameCardPattern) + gameCards := gameCardRegex.FindAllStringSubmatch(html, -1) + + for _, match := range gameCards { + if len(match) >= 3 { + gameID, err := strconv.Atoi(match[1]) + if err != nil { + continue + } + + gameName := strings.TrimSpace(match[2]) + if gameName == "" { + continue + } + + // Extract completion times from the game card + gameCardHTML := match[0] + compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story") + compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra") + compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist") + + // Extract game image + gameImage := p.extractGameImage(gameCardHTML) + + game := HLTBGame{ + ID: gameID, + Name: gameName, + GameImage: gameImage, + CompMain: compMain, + CompPlus: compPlus, + CompComplete: compComplete, + } + + games = append(games, game) + } + } + + return games, nil +} + +// extractTimeFromHTML extracts time values from HTML content +func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int { + // Pattern to match time values after category labels + pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category)) + timeRegex := regexp.MustCompile(pattern) + match := timeRegex.FindStringSubmatch(html) + + if len(match) > 1 { + timeStr := strings.ReplaceAll(match[1], "½", ".5") + if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil { + return int(timeFloat * 3600) // Convert to seconds + } + } + + return 0 +} + +// extractGameImage extracts the game image from HTML content +func (p *HLTBPlugin) extractGameImage(html string) string { + imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"` + imageRegex := regexp.MustCompile(imagePattern) + match := imageRegex.FindStringSubmatch(html) + + if len(match) > 1 { + return match[1] + } + + return "" } // formatGameInfo formats game information for display @@ -324,202 +339,6 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string { return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath) } -// getSeekToken retrieves the seek token from HowLongToBeat -func (p *HLTBPlugin) getSeekToken() (string, error) { - // Get the main page to extract buildId - req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil) - if err != nil { - return "", fmt.Errorf("failed to create token request: %w", err) - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", fmt.Errorf("failed to fetch token: %w", err) - } - defer func() { - _ = resp.Body.Close() - }() - - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed to read token response: %w", err) - } - - bodyStr := string(body) - - // First, try to find buildId in the __NEXT_DATA__ or page source - buildIdPatterns := []string{ - `"buildId":"([a-zA-Z0-9_-]+)"`, - `buildId":"([a-zA-Z0-9_-]+)"`, - `/_next/static/([a-zA-Z0-9_-]+)/_buildManifest`, - } - - for _, pattern := range buildIdPatterns { - re := regexp.MustCompile(pattern) - matches := re.FindStringSubmatch(bodyStr) - if len(matches) > 1 { - buildId := matches[1] - // Now try to get the seek token from the JavaScript files using buildId - if token, err := p.getSeekTokenFromBuildId(buildId); err == nil { - return token, nil - } - } - } - - // If we can't find buildId, look for direct seek token patterns - seekPatterns := []string{ - `/api/seek/([a-f0-9]{16})`, - `"seek/([a-f0-9]{16})"`, - `api/seek/([a-f0-9]{16})`, - `seek/([a-f0-9]{12,})`, - } - - for _, pattern := range seekPatterns { - re := regexp.MustCompile(pattern) - matches := re.FindStringSubmatch(bodyStr) - if len(matches) > 1 { - return matches[1], nil - } - } - - // Last resort: try multiple known working tokens - knownTokens := []string{ - "6e17f7a193ef3188", // From your curl example - "d4b2e330db04dbf3", // Common fallback - } - - for _, token := range knownTokens { - if p.testSeekToken(token) { - return token, nil - } - } - - // Generate a token as last resort - return p.generateSeekToken(), nil -} - -// getSeekTokenFromBuildId attempts to extract seek token from build-specific files -func (p *HLTBPlugin) getSeekTokenFromBuildId(buildId string) (string, error) { - // Common build file patterns where seek tokens might be stored - fileURLs := []string{ - fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_buildManifest.js", buildId), - fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_ssgManifest.js", buildId), - fmt.Sprintf("https://howlongtobeat.com/_next/static/chunks/pages/index-%s.js", buildId[:12]), - } - - for _, fileURL := range fileURLs { - if token, err := p.extractSeekTokenFromFile(fileURL); err == nil && token != "" { - return token, nil - } - } - - return "", fmt.Errorf("no seek token found in build files") -} - -// extractSeekTokenFromFile downloads and searches a file for seek token -func (p *HLTBPlugin) extractSeekTokenFromFile(fileURL string) (string, error) { - req, err := http.NewRequest("GET", fileURL, nil) - if err != nil { - return "", err - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", err - } - defer func() { - _ = resp.Body.Close() - }() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to fetch file: %d", resp.StatusCode) - } - - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - - bodyStr := string(body) - patterns := []string{ - `seek/([a-f0-9]{16})`, - `"([a-f0-9]{16})"`, - `'([a-f0-9]{16})'`, - } - - for _, pattern := range patterns { - re := regexp.MustCompile(pattern) - matches := re.FindStringSubmatch(bodyStr) - if len(matches) > 1 { - return matches[1], nil - } - } - - return "", fmt.Errorf("no seek token found in file") -} - -// testSeekToken tests if a seek token works by making a simple API call -func (p *HLTBPlugin) testSeekToken(token string) bool { - searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", token) - searchRequest := map[string]interface{}{ - "searchType": "games", - "searchTerms": []string{"test"}, - "searchPage": 1, - "size": 1, - "searchOptions": map[string]interface{}{ - "games": map[string]interface{}{ - "userId": 0, - "platform": "", - "sortCategory": "popular", - "rangeCategory": "main", - "rangeTime": map[string]interface{}{ - "min": nil, - "max": nil, - }, - "gameplay": map[string]interface{}{ - "perspective": "", - "flow": "", - "genre": "", - "difficulty": "", - }, - "rangeYear": map[string]interface{}{ - "min": "", - "max": "", - }, - "modifier": "", - }, - "users": map[string]interface{}{ - "sortCategory": "postcount", - }, - "lists": map[string]interface{}{ - "sortCategory": "follows", - }, - "filter": "", - "sort": 0, - "randomizer": 0, - }, - "useCache": true, - } - - // Test the token with a simple search - if _, err := p.performAPISearch(searchURL, searchRequest); err == nil { - return true - } - return false -} - -// generateSeekToken generates a seek token based on current time -func (p *HLTBPlugin) generateSeekToken() string { - // Use a simple hash-like approach with current timestamp - // This is a fallback approach since the real token generation is unknown - now := time.Now().Unix() - return fmt.Sprintf("%x", now%0xffffffff)[:16] -} - // createErrorResponse creates an error response message func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction { response := &model.Message{