如何通过抓取某些内容页面在Twitch上获得流媒体的名称

本文关键字:Twitch 流媒体 抓取 何通过 | 更新日期: 2023-09-27 18:00:13

我想要一个随机抽动拖缆。所以我认为这将是最容易解析的"http://twitch.tv/directory/random"html,但不幸的是,这些元素隐藏在其他地方。在C#中,最好的方法是什么?

希望你能帮助我,谢谢!

编辑:我的主要问题是,在源html中,你看不到twitch.tv/directory/rrandom上显示的拖缆的名称。因此,在无法下载网址的情况下,我需要另一种方式来获取该网站上的数据。因此,我正在寻找一种C#的网络抓取方式twitch.tv/directory/arandom,以获得该类别中顶级流媒体的名称。

如何通过抓取某些内容页面在Twitch上获得流媒体的名称

您要做的是web抓取。通常情况下,服务提供商并不欣赏这一点。

我在github上发现了这个Twitch-API,它可能会帮助你实现你的目标。如果你想在C#中使用API,我建议RestSharp(也在nuget上)实现对API的HTTP调用。

在快速查看了API公开的方法之后,这一方法可能会有所帮助:

获取/搜索/通道

它返回一个JSON对象,如下所示:

{
  "channels": [
    {
      "mature": false,
      "status": "test status",
      "broadcaster_language": "en",
      "display_name": "test_channel",
      "game": "StarCraft II: Heart of the Swarm",
      "delay": 0,
      "language": "en",
      "_id": 12345,
      "name": "test_channel",
      "created_at": "2007-05-22T10:39:54Z",
      "updated_at": "2015-02-12T04:15:49Z",
      "logo": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_image-94a42b3a13c31c02-300x300.jpeg",
      "banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_header_image-08dd874c17f39837-640x125.png",
      "video_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-channel_offline_image-b314c834d210dc1a-640x360.png",
      "background": null,
      "profile_banner": "http://static-cdn.jtvnw.net/jtv_user_pictures/test_channel-profile_banner-6936c61353e4aeed-480.png",
      "profile_banner_background_color": "null",
      "partner": true,
      "url": "http://www.twitch.tv/test_channel",
      "views": 49144894,
      "followers": 215780,
      "_links": {
        "self": "https://api.twitch.tv/kraken/channels/test_channel",
        "follows": "https://api.twitch.tv/kraken/channels/test_channel/follows",
        "commercial": "https://api.twitch.tv/kraken/channels/test_channel/commercial",
        "stream_key": "https://api.twitch.tv/kraken/channels/test_channel/stream_key",
        "chat": "https://api.twitch.tv/kraken/chat/test_channel",
        "features": "https://api.twitch.tv/kraken/channels/test_channel/features",
        "subscriptions": "https://api.twitch.tv/kraken/channels/test_channel/subscriptions",
        "editors": "https://api.twitch.tv/kraken/channels/test_channel/editors",
        "teams": "https://api.twitch.tv/kraken/channels/test_channel/teams",
        "videos": "https://api.twitch.tv/kraken/channels/test_channel/videos"
      }
    }, 
], 
  "_total": 42679, 
  "_links": {
    "self": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=0&q=starcraft", 
    "next": "https://api.twitch.tv/kraken/search/channels?limit=10&offset=10&q=starcraft"
  }
}

现在,通过使用json2csharp,我们可以将其转换为类层次结构:

public class Links
{
    public string self { get; set; }
    public string follows { get; set; }
    public string commercial { get; set; }
    public string stream_key { get; set; }
    public string chat { get; set; }
    public string features { get; set; }
    public string subscriptions { get; set; }
    public string editors { get; set; }
    public string teams { get; set; }
    public string videos { get; set; }
}
public class Channel
{
    public bool mature { get; set; }
    public string status { get; set; }
    public string broadcaster_language { get; set; }
    public string display_name { get; set; }
    public string game { get; set; }
    public int delay { get; set; }
    public string language { get; set; }
    public int _id { get; set; }
    public string name { get; set; }
    public string created_at { get; set; }
    public string updated_at { get; set; }
    public string logo { get; set; }
    public string banner { get; set; }
    public string video_banner { get; set; }
    public object background { get; set; }
    public string profile_banner { get; set; }
    public string profile_banner_background_color { get; set; }
    public bool partner { get; set; }
    public string url { get; set; }
    public int views { get; set; }
    public int followers { get; set; }
    public Links _links { get; set; }
}
public class Links2
{
    public string self { get; set; }
    public string next { get; set; }
}
public class Channels
{
    public List<Channel> channels { get; set; }
    public int _total { get; set; }
    public Links2 _links { get; set; }
}

安装RestSharp和Json.NET后,对该特定端点的调用可能如下所示:

public class TwitchSampleImplementation
{
    private IList<RestResponseCookie> _cookies;
    public void AuthenticateAndGetChannels()
    {
        // this is where your login credentials go which can be acquired here 
        // => https://github.com/justintv/Twitch-API/blob/master/authentication.md#developer-setup 
        Authenticate("yourClientId", "thatUrl", null, "thatState"); 
        var channels = GetChannel("popularChannel");
        Console.WriteLine(String.Format("Channels: ", channels.channels.Select(c =>
            c.display_name).Aggregate("", (a, b) => a + b + ",").TrimEnd(',')));
    }
    public void Authenticate(string clientId,
        string registeredRedirectURI, List<string> scopes, string state)
    {
        // reference: https://github.com/justintv/Twitch-API/blob/master/authentication.md
        var login = Tuple.Create("client_id", clientId);
        var redirectURI = Tuple.Create("redirect_uri", registeredRedirectURI);
        var theScope = Tuple.Create("scope", scopes.Aggregate("", (a, b) => a + b + ",").TrimEnd(','));
        var theState = Tuple.Create("state", state);
        // reference: baseUrl => https://github.com/justintv/Twitch-API#formats
        var client = new RestClient("https://api.twitch.tv/kraken/");
        var request = new RestRequest("oauth2/authorize", Method.POST); // try Method.GET if that doesn't work
        var type = ParameterType.GetOrPost;
        var paramList = new List<Parameter>()
        {
            new Parameter {Name = login.Item1, Value = login.Item2, Type = type},
            new Parameter {Name = redirectURI.Item1, Value = redirectURI.Item2, Type = type},
            new Parameter {Name = theScope.Item1, Value = theScope.Item2, Type = type},
            new Parameter {Name = theState.Item1, Value = theState.Item2, Type = type}
        };
        paramList.ForEach(p => request.AddParameter(p));
        // reference: https://github.com/justintv/Twitch-API#api-versions-and-mime-types
        request.RequestFormat = DataFormat.Json;
        var response = client.Execute(request);
        if (response.StatusCode == System.Net.HttpStatusCode.OK)
        {
            _cookies = _cookies ?? new List<RestResponseCookie>();
            response.Cookies.ToList().ForEach(c => _cookies.Add(c));
        }
        else
        {
            handleException(response);
        }
    }
    public Channels GetChannel(string searchTerm)
    {
        var client = new RestClient(@"https://api.twitch.tv/kraken");
        // reference: https://github.com/justintv/Twitch-API/blob/master/v3_resources/search.md#get-searchchannels
        // reference: http://restsharp.org/
        var request = new RestRequest(String.Format(@"search/channels?q={0}", searchTerm));
        _cookies.ToList().ForEach(c => request.AddCookie(c.Name, c.Value));
        var response = client.Execute(request);
        if (response.StatusCode == System.Net.HttpStatusCode.OK)
        {
            return JsonConvert.DeserializeObject<Channels>(response.Content);
        }
        handleException(response);
        return null;
    }
    private void handleException(IRestResponse response)
    {
        throw new HttpRequestException(String.Format("Exception '{0}' with status code '{1}' occurred.",
            response.Content, Enum.GetName(typeof(HttpStatusCode), response.StatusCode)));
    }
}

我还没有试过调用那个代码,但它应该会给你一些解决方案。

否则,在谷歌上搜索twitchscrapers也可能产生所需的结果。狩猎快乐,祝你好运。希望这能有所帮助。)