diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 74750d76039..f07fd93668c 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -4,6 +4,7 @@ package processor import ( + "encoding/json" "errors" "fmt" "log/slog" @@ -33,8 +34,8 @@ var ( youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`) nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`) odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) - bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`) - timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`) + bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`) + bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`) iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`) ) @@ -474,7 +475,7 @@ func shouldFetchBilibiliWatchTime(entry *model.Entry) bool { if !config.Opts.FetchBilibiliWatchTime() { return false } - matches := bilibiliRegex.FindStringSubmatch(entry.URL) + matches := bilibiliURLRegex.FindStringSubmatch(entry.URL) urlMatchesBilibiliPattern := len(matches) == 2 return urlMatchesBilibiliPattern } @@ -574,41 +575,66 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) { return int(dur / 60), nil } +func extractBilibiliVideoID(websiteURL string) (string, string, error) { + matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL) + if matches == nil { + return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL) + } + if matches[1] != "" { + return "aid", matches[1], nil + } + if matches[2] != "" { + return "bvid", matches[2], nil + } + return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL) +} + func fetchBilibiliWatchTime(websiteURL string) (int, error) { requestBuilder := fetcher.NewRequestBuilder() requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) - responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) + idType, videoID, extractErr := extractBilibiliVideoID(websiteURL) + if extractErr != nil { + return 0, extractErr + } + bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID) + + responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL)) defer responseHandler.Close() if localizedError := responseHandler.LocalizedError(); localizedError != nil { - slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) + slog.Warn("Unable to fetch Bilibili API", + slog.String("website_url", bilibiliApiURL), + slog.Any("error", localizedError.Error())) return 0, localizedError.Error() } - doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) - if docErr != nil { - return 0, docErr + var result map[string]interface{} + doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) + if docErr := doc.Decode(&result); docErr != nil { + return 0, fmt.Errorf("failed to decode API response: %v", docErr) } - timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text()) - if len(timelengthMatches) < 2 { - return 0, errors.New("duration has not found") + if code, ok := result["code"].(float64); !ok || code != 0 { + return 0, fmt.Errorf("API returned error code: %v", result["code"]) } - durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64) - if err != nil { - return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err) + data, ok := result["data"].(map[string]interface{}) + if !ok { + return 0, fmt.Errorf("data field not found or not an object") } - durationSec := durationMs / 1000 - durationMin := durationSec / 60 - if durationSec%60 != 0 { + duration, ok := data["duration"].(float64) + if !ok { + return 0, fmt.Errorf("duration not found or not a number") + } + intDuration := int(duration) + durationMin := intDuration / 60 + if intDuration%60 != 0 { durationMin++ } - - return int(durationMin), nil + return durationMin, nil } // parseISO8601 parses an ISO 8601 duration string.