diff --git a/internal/plugin/fun/hltb.go b/internal/plugin/fun/hltb.go index 7da29e6..4c92052 100644 --- a/internal/plugin/fun/hltb.go +++ b/internal/plugin/fun/hltb.go @@ -1,13 +1,12 @@ package fun import ( + "bytes" "encoding/json" "fmt" "io" "net/http" - "net/url" "regexp" - "strconv" "strings" "time" @@ -21,26 +20,25 @@ type HLTBPlugin struct { httpClient *http.Client } -// HLTBNextData represents the Next.js data structure from the page -type HLTBNextData struct { - Props struct { - PageProps struct { - SearchResults []HLTBGame `json:"searchResults"` - } `json:"pageProps"` - } `json:"props"` -} - // HLTBGame represents a game from HowLongToBeat type HLTBGame struct { - ID int `json:"game_id"` - Name string `json:"game_name"` - GameAlias string `json:"game_alias"` - GameImage string `json:"game_image"` - CompMain int `json:"comp_main"` - CompPlus int `json:"comp_plus"` - CompComplete int `json:"comp_100"` - CompAll int `json:"comp_all"` - ReviewScore int `json:"review_score"` + ID int `json:"game_id"` + Name string `json:"game_name"` + GameAlias string `json:"game_alias"` + GameImage string `json:"game_image"` + CompMain int `json:"comp_main"` + CompPlus int `json:"comp_plus"` + CompComplete int `json:"comp_complete"` + CompAll int `json:"comp_all"` + InvestedCo int `json:"invested_co"` + InvestedMp int `json:"invested_mp"` + CountComp int `json:"count_comp"` + CountSpeedruns int `json:"count_speedruns"` + CountBacklog int `json:"count_backlog"` + CountReview int `json:"count_review"` + ReviewScore int `json:"review_score"` + CountPlaying int `json:"count_playing"` + CountRetired int `json:"count_retired"` } // NewHLTB creates a new HLTBPlugin instance @@ -132,27 +130,95 @@ func (p *HLTBPlugin) OnMessage(msg *model.Message, config map[string]interface{} return []*model.MessageAction{action} } -// searchGame searches for a game on HowLongToBeat using the new web scraping approach +// searchGame searches for a game on HowLongToBeat using the API func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { - // Create search URL with query parameter - searchURL := fmt.Sprintf("https://howlongtobeat.com/?q=%s", url.QueryEscape(gameName)) + // Only the seek token endpoint works now + return p.searchWithSeekToken(gameName) +} + +// searchWithSeekToken attempts to search using the seek token approach +func (p *HLTBPlugin) searchWithSeekToken(gameName string) ([]HLTBGame, error) { + // Get the seek token from the main page + seekToken, err := p.getSeekToken() + if err != nil { + return nil, fmt.Errorf("failed to get seek token: %w", err) + } + + // Split search terms by words + searchTerms := strings.Fields(gameName) + + // Create search URL with seek token + searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", seekToken) + + // Prepare search request + searchRequest := map[string]interface{}{ + "searchType": "games", + "searchTerms": searchTerms, + "searchPage": 1, + "size": 20, + "searchOptions": map[string]interface{}{ + "games": map[string]interface{}{ + "userId": 0, + "platform": "", + "sortCategory": "popular", + "rangeCategory": "main", + "rangeTime": map[string]interface{}{ + "min": nil, + "max": nil, + }, + "gameplay": map[string]interface{}{ + "perspective": "", + "flow": "", + "genre": "", + "difficulty": "", + }, + "rangeYear": map[string]interface{}{ + "min": "", + "max": "", + }, + "modifier": "", + }, + "users": map[string]interface{}{ + "sortCategory": "postcount", + }, + "lists": map[string]interface{}{ + "sortCategory": "follows", + }, + "filter": "", + "sort": 0, + "randomizer": 0, + }, + "useCache": true, + } + + return p.performAPISearch(searchURL, searchRequest) +} + +// performAPISearch performs the actual API search request +func (p *HLTBPlugin) performAPISearch(searchURL string, searchRequest map[string]interface{}) ([]HLTBGame, error) { + // Convert to JSON + jsonData, err := json.Marshal(searchRequest) + if err != nil { + return nil, fmt.Errorf("failed to marshal search request: %w", err) + } // Create HTTP request - req, err := http.NewRequest("GET", searchURL, nil) + req, err := http.NewRequest("POST", searchURL, bytes.NewBuffer(jsonData)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - // Set headers to match browser request - req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + // Set headers to match the working curl request + req.Header.Set("Accept", "*/*") req.Header.Set("Accept-Language", "en-US,en;q=0.9") req.Header.Set("Cache-Control", "no-cache") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Origin", "https://howlongtobeat.com") req.Header.Set("Pragma", "no-cache") - req.Header.Set("Sec-Fetch-Dest", "document") - req.Header.Set("Sec-Fetch-Mode", "navigate") - req.Header.Set("Sec-Fetch-Site", "none") - req.Header.Set("Sec-Fetch-User", "?1") - req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Referer", "https://howlongtobeat.com/") + req.Header.Set("Sec-Fetch-Dest", "empty") + req.Header.Set("Sec-Fetch-Mode", "cors") + req.Header.Set("Sec-Fetch-Site", "same-origin") req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") // Send request @@ -165,7 +231,7 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode) + return nil, fmt.Errorf("API returned status code: %d", resp.StatusCode) } // Read response body @@ -174,105 +240,24 @@ func (p *HLTBPlugin) searchGame(gameName string) ([]HLTBGame, error) { return nil, fmt.Errorf("failed to read response: %w", err) } - // Parse games from HTML - games, err := p.parseGamesFromHTML(string(body)) - if err != nil { - return nil, fmt.Errorf("failed to parse games from HTML: %w", err) + // Parse response + var searchResponse struct { + Color string `json:"color"` + Title string `json:"title"` + Category string `json:"category"` + Count int `json:"count"` + Pagecurrent int `json:"pagecurrent"` + Pagesize int `json:"pagesize"` + Pagetotal int `json:"pagetotal"` + SearchTerm string `json:"searchTerm"` + SearchResults []HLTBGame `json:"data"` } - return games, nil -} - -// parseGamesFromHTML extracts game information from the HTML response -func (p *HLTBPlugin) parseGamesFromHTML(html string) ([]HLTBGame, error) { - var games []HLTBGame - - // First, try to parse from the __NEXT_DATA__ JSON - jsonDataPattern := `` - jsonRegex := regexp.MustCompile(jsonDataPattern) - jsonMatches := jsonRegex.FindStringSubmatch(html) - - if len(jsonMatches) > 1 { - var nextData HLTBNextData - if err := json.Unmarshal([]byte(jsonMatches[1]), &nextData); err == nil { - // Try to extract search results from the Next.js data - if nextData.Props.PageProps.SearchResults != nil { - return nextData.Props.PageProps.SearchResults, nil - } - } + if err := json.Unmarshal(body, &searchResponse); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) } - // Fallback to HTML parsing for game cards - // Pattern to match game cards in the HTML - gameCardPattern := `
  • ]*>.*?href="/game/(\d+)"[^>]*>([^<]+).*?
  • ` - gameCardRegex := regexp.MustCompile(gameCardPattern) - gameCards := gameCardRegex.FindAllStringSubmatch(html, -1) - - for _, match := range gameCards { - if len(match) >= 3 { - gameID, err := strconv.Atoi(match[1]) - if err != nil { - continue - } - - gameName := strings.TrimSpace(match[2]) - if gameName == "" { - continue - } - - // Extract completion times from the game card - gameCardHTML := match[0] - compMain := p.extractTimeFromHTML(gameCardHTML, "Main Story") - compPlus := p.extractTimeFromHTML(gameCardHTML, "Main + Extra") - compComplete := p.extractTimeFromHTML(gameCardHTML, "Completionist") - - // Extract game image - gameImage := p.extractGameImage(gameCardHTML) - - game := HLTBGame{ - ID: gameID, - Name: gameName, - GameImage: gameImage, - CompMain: compMain, - CompPlus: compPlus, - CompComplete: compComplete, - } - - games = append(games, game) - } - } - - return games, nil -} - -// extractTimeFromHTML extracts time values from HTML content -func (p *HLTBPlugin) extractTimeFromHTML(html, category string) int { - // Pattern to match time values after category labels - pattern := fmt.Sprintf(`%s.*?(\d+(?:½)?)\s*Hours?`, regexp.QuoteMeta(category)) - timeRegex := regexp.MustCompile(pattern) - match := timeRegex.FindStringSubmatch(html) - - if len(match) > 1 { - timeStr := strings.ReplaceAll(match[1], "½", ".5") - if timeFloat, err := strconv.ParseFloat(timeStr, 64); err == nil { - return int(timeFloat * 3600) // Convert to seconds - } - } - - return 0 -} - -// extractGameImage extracts the game image from HTML content -func (p *HLTBPlugin) extractGameImage(html string) string { - imagePattern := `src="https://howlongtobeat\.com/games/([^"]+)"` - imageRegex := regexp.MustCompile(imagePattern) - match := imageRegex.FindStringSubmatch(html) - - if len(match) > 1 { - return match[1] - } - - return "" + return searchResponse.SearchResults, nil } // formatGameInfo formats game information for display @@ -339,6 +324,202 @@ func (p *HLTBPlugin) getFullImageURL(imagePath string) string { return fmt.Sprintf("https://howlongtobeat.com/games/%s", imagePath) } +// getSeekToken retrieves the seek token from HowLongToBeat +func (p *HLTBPlugin) getSeekToken() (string, error) { + // Get the main page to extract buildId + req, err := http.NewRequest("GET", "https://howlongtobeat.com", nil) + if err != nil { + return "", fmt.Errorf("failed to create token request: %w", err) + } + + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to fetch token: %w", err) + } + defer func() { + _ = resp.Body.Close() + }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read token response: %w", err) + } + + bodyStr := string(body) + + // First, try to find buildId in the __NEXT_DATA__ or page source + buildIdPatterns := []string{ + `"buildId":"([a-zA-Z0-9_-]+)"`, + `buildId":"([a-zA-Z0-9_-]+)"`, + `/_next/static/([a-zA-Z0-9_-]+)/_buildManifest`, + } + + for _, pattern := range buildIdPatterns { + re := regexp.MustCompile(pattern) + matches := re.FindStringSubmatch(bodyStr) + if len(matches) > 1 { + buildId := matches[1] + // Now try to get the seek token from the JavaScript files using buildId + if token, err := p.getSeekTokenFromBuildId(buildId); err == nil { + return token, nil + } + } + } + + // If we can't find buildId, look for direct seek token patterns + seekPatterns := []string{ + `/api/seek/([a-f0-9]{16})`, + `"seek/([a-f0-9]{16})"`, + `api/seek/([a-f0-9]{16})`, + `seek/([a-f0-9]{12,})`, + } + + for _, pattern := range seekPatterns { + re := regexp.MustCompile(pattern) + matches := re.FindStringSubmatch(bodyStr) + if len(matches) > 1 { + return matches[1], nil + } + } + + // Last resort: try multiple known working tokens + knownTokens := []string{ + "6e17f7a193ef3188", // From your curl example + "d4b2e330db04dbf3", // Common fallback + } + + for _, token := range knownTokens { + if p.testSeekToken(token) { + return token, nil + } + } + + // Generate a token as last resort + return p.generateSeekToken(), nil +} + +// getSeekTokenFromBuildId attempts to extract seek token from build-specific files +func (p *HLTBPlugin) getSeekTokenFromBuildId(buildId string) (string, error) { + // Common build file patterns where seek tokens might be stored + fileURLs := []string{ + fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_buildManifest.js", buildId), + fmt.Sprintf("https://howlongtobeat.com/_next/static/%s/_ssgManifest.js", buildId), + fmt.Sprintf("https://howlongtobeat.com/_next/static/chunks/pages/index-%s.js", buildId[:12]), + } + + for _, fileURL := range fileURLs { + if token, err := p.extractSeekTokenFromFile(fileURL); err == nil && token != "" { + return token, nil + } + } + + return "", fmt.Errorf("no seek token found in build files") +} + +// extractSeekTokenFromFile downloads and searches a file for seek token +func (p *HLTBPlugin) extractSeekTokenFromFile(fileURL string) (string, error) { + req, err := http.NewRequest("GET", fileURL, nil) + if err != nil { + return "", err + } + + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("failed to fetch file: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + bodyStr := string(body) + patterns := []string{ + `seek/([a-f0-9]{16})`, + `"([a-f0-9]{16})"`, + `'([a-f0-9]{16})'`, + } + + for _, pattern := range patterns { + re := regexp.MustCompile(pattern) + matches := re.FindStringSubmatch(bodyStr) + if len(matches) > 1 { + return matches[1], nil + } + } + + return "", fmt.Errorf("no seek token found in file") +} + +// testSeekToken tests if a seek token works by making a simple API call +func (p *HLTBPlugin) testSeekToken(token string) bool { + searchURL := fmt.Sprintf("https://howlongtobeat.com/api/seek/%s", token) + searchRequest := map[string]interface{}{ + "searchType": "games", + "searchTerms": []string{"test"}, + "searchPage": 1, + "size": 1, + "searchOptions": map[string]interface{}{ + "games": map[string]interface{}{ + "userId": 0, + "platform": "", + "sortCategory": "popular", + "rangeCategory": "main", + "rangeTime": map[string]interface{}{ + "min": nil, + "max": nil, + }, + "gameplay": map[string]interface{}{ + "perspective": "", + "flow": "", + "genre": "", + "difficulty": "", + }, + "rangeYear": map[string]interface{}{ + "min": "", + "max": "", + }, + "modifier": "", + }, + "users": map[string]interface{}{ + "sortCategory": "postcount", + }, + "lists": map[string]interface{}{ + "sortCategory": "follows", + }, + "filter": "", + "sort": 0, + "randomizer": 0, + }, + "useCache": true, + } + + // Test the token with a simple search + if _, err := p.performAPISearch(searchURL, searchRequest); err == nil { + return true + } + return false +} + +// generateSeekToken generates a seek token based on current time +func (p *HLTBPlugin) generateSeekToken() string { + // Use a simple hash-like approach with current timestamp + // This is a fallback approach since the real token generation is unknown + now := time.Now().Unix() + return fmt.Sprintf("%x", now%0xffffffff)[:16] +} + // createErrorResponse creates an error response message func (p *HLTBPlugin) createErrorResponse(msg *model.Message, errorText string) []*model.MessageAction { response := &model.Message{