Add support for playlists and channels in search

This commit is contained in:
Omar Roth 2018-09-20 09:36:09 -05:00
parent 1627cfc2fa
commit 62380933b2
11 changed files with 414 additions and 210 deletions

View File

@ -434,6 +434,7 @@ get "/search" do |env|
ucids ||= [] of String ucids ||= [] of String
channel = nil channel = nil
content_type = "all"
date = "" date = ""
duration = "" duration = ""
features = [] of String features = [] of String
@ -447,6 +448,8 @@ get "/search" do |env|
case key case key
when "channel", "user" when "channel", "user"
channel = value channel = value
when "content_type", "type"
content_type = value
when "date" when "date"
date = value date = value
when "duration" when "duration"
@ -475,7 +478,7 @@ get "/search" do |env|
count = videos.size count = videos.size
else else
begin begin
search_params = produce_search_params(sort: sort, date: date, content_type: "video", search_params = produce_search_params(sort: sort, date: date, content_type: content_type,
duration: duration, features: features) duration: duration, features: features)
rescue ex rescue ex
error_message = ex.message error_message = ex.message
@ -2540,7 +2543,7 @@ get "/api/v1/channels/:ucid" do |env|
json.field "authorThumbnails" do json.field "authorThumbnails" do
json.array do json.array do
qualities = [32, 48, 76, 100, 512] qualities = [32, 48, 76, 100, 176, 512]
qualities.each do |quality| qualities.each do |quality|
json.object do json.object do
@ -2699,7 +2702,7 @@ end
env.response.content_type = "application/json" env.response.content_type = "application/json"
result result
end end
end end
get "/api/v1/search" do |env| get "/api/v1/search" do |env|
@ -2722,13 +2725,15 @@ get "/api/v1/search" do |env|
features ||= [] of String features ||= [] of String
# TODO: Support other content types # TODO: Support other content types
content_type = "video" content_type = env.params.query["type"]?.try &.downcase
content_type ||= "video"
env.response.content_type = "application/json" env.response.content_type = "application/json"
begin begin
search_params = produce_search_params(sort_by, date, content_type, duration, features) search_params = produce_search_params(sort_by, date, content_type, duration, features)
rescue ex rescue ex
env.response.status_code = 400
next JSON.build do |json| next JSON.build do |json|
json.object do json.object do
json.field "error", ex.message json.field "error", ex.message
@ -2739,26 +2744,79 @@ get "/api/v1/search" do |env|
response = JSON.build do |json| response = JSON.build do |json|
json.array do json.array do
count, search_results = search(query, page, search_params).as(Tuple) count, search_results = search(query, page, search_params).as(Tuple)
search_results.each do |video| search_results.each do |item|
json.object do
case item
when SearchVideo
json.field "type", "video"
json.field "title", item.title
json.field "videoId", item.id
json.field "author", item.author
json.field "authorId", item.ucid
json.field "authorUrl", "/channel/#{item.ucid}"
json.field "videoThumbnails" do
generate_thumbnails(json, item.id)
end
json.field "description", item.description
json.field "descriptionHtml", item.description_html
json.field "viewCount", item.views
json.field "published", item.published.epoch
json.field "publishedText", "#{recode_date(item.published)} ago"
json.field "lengthSeconds", item.length_seconds
json.field "liveNow", item.live_now
when SearchPlaylist
json.field "type", "playlist"
json.field "title", item.title
json.field "playlistId", item.id
json.field "author", item.author
json.field "authorId", item.ucid
json.field "authorUrl", "/channel/#{item.ucid}"
json.field "videos" do
json.array do
item.videos.each do |video|
json.object do json.object do
json.field "title", video.title json.field "title", video.title
json.field "videoId", video.id json.field "videoId", video.id
json.field "lengthSeconds", video.length_seconds
json.field "author", video.author
json.field "authorId", video.ucid
json.field "authorUrl", "/channel/#{video.ucid}"
json.field "videoThumbnails" do json.field "videoThumbnails" do
generate_thumbnails(json, video.id) generate_thumbnails(json, video.id)
end end
end
end
end
end
when SearchChannel
json.field "type", "channel"
json.field "author", item.author
json.field "authorId", item.ucid
json.field "authorUrl", "/channel/#{item.ucid}"
json.field "description", video.description json.field "authorThumbnails" do
json.field "descriptionHtml", video.description_html json.array do
qualities = [32, 48, 76, 100, 176, 512]
json.field "viewCount", video.views qualities.each do |quality|
json.field "published", video.published.epoch json.object do
json.field "publishedText", "#{recode_date(video.published)} ago" json.field "url", item.author_thumbnail.gsub("=s176-", "=s#{quality}-")
json.field "lengthSeconds", video.length_seconds json.field "width", quality
json.field "height", quality
end
end
end
end
json.field "subCount", item.subscriber_count
json.field "videoCount", item.video_count
json.field "description", item.description
json.field "descriptionHtml", item.description_html
end
end end
end end
end end

View File

@ -196,8 +196,14 @@ def html_to_content(description_html)
end end
def extract_videos(nodeset, ucid = nil) def extract_videos(nodeset, ucid = nil)
videos = extract_items(nodeset, ucid)
videos.select! { |item| !item.is_a?(SearchChannel | SearchPlaylist) }
videos.map { |video| video.as(SearchVideo) }
end
def extract_items(nodeset, ucid = nil)
# TODO: Make this a 'common', so it makes more sense to be used here # TODO: Make this a 'common', so it makes more sense to be used here
videos = [] of SearchVideo items = [] of SearchItem
nodeset.each do |node| nodeset.each do |node|
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)) anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
@ -209,29 +215,92 @@ def extract_videos(nodeset, ucid = nil)
next next
end end
case node.xpath_node(%q(.//div)).not_nil!["class"] anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
when .includes? "yt-lockup-playlist" if !anchor
next
when .includes? "yt-lockup-channel"
next
end
title = anchor.content.strip
id = anchor["href"].lchop("/watch?v=")
if ucid
author = "" author = ""
author_id = "" author_id = ""
else else
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
if !anchor
next
end
author = anchor.content author = anchor.content
author_id = anchor["href"].split("/")[-1] author_id = anchor["href"].split("/")[-1]
end end
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
if !anchor
next
end
title = anchor.content.strip
id = anchor["href"]
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
description_html, description = html_to_content(description_html)
case node.xpath_node(%q(.//div)).not_nil!["class"]
when .includes? "yt-lockup-playlist"
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
if anchor
video_count = anchor.content.match(/View full playlist \((?<count>\d+)/).try &.["count"].to_i?
end
video_count ||= 0
videos = [] of SearchPlaylistVideo
node.xpath_nodes(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
anchor = video.xpath_node(%q(.//a))
if anchor
video_title = anchor.content
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
end
video_title ||= ""
id ||= ""
anchor = video.xpath_node(%q(.//span/span))
if anchor
length_seconds = decode_length_seconds(anchor.content)
end
length_seconds ||= 0
videos << SearchPlaylistVideo.new(
video_title,
id,
length_seconds
)
end
items << SearchPlaylist.new(
title,
plid,
author,
author_id,
video_count,
videos
)
when .includes? "yt-lockup-channel"
author = title
ucid = id.split("/")[-1]
author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
author_thumbnail ||= ""
subscriber_count = node.xpath_node(%q(.//span[contains(@class, "yt-subscriber-count")])).try &.["title"].delete(",").to_i?
subscriber_count ||= 0
video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].delete(",").to_i?
video_count ||= 0
items << SearchChannel.new(
author,
ucid,
author_thumbnail,
subscriber_count,
video_count,
description,
description_html
)
else
id = id.lchop("/watch?v=")
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li)) metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
if metadata.empty? if metadata.empty?
next next
@ -259,9 +328,6 @@ def extract_videos(nodeset, ucid = nil)
end end
view_count ||= 0_i64 view_count ||= 0_i64
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
description_html, description = html_to_content(description_html)
length_seconds = node.xpath_node(%q(.//span[@class="video-time"])) length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
if length_seconds if length_seconds
length_seconds = decode_length_seconds(length_seconds.content) length_seconds = decode_length_seconds(length_seconds.content)
@ -269,7 +335,14 @@ def extract_videos(nodeset, ucid = nil)
length_seconds = -1 length_seconds = -1
end end
videos << SearchVideo.new( live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")]))
if live_now
live_now = true
else
live_now = false
end
items << SearchVideo.new(
title, title,
id, id,
author, author,
@ -279,8 +352,10 @@ def extract_videos(nodeset, ucid = nil)
description, description,
description_html, description_html,
length_seconds, length_seconds,
live_now
) )
end end
end
return videos return items
end end

View File

@ -3,15 +3,19 @@ def crawl_videos(db)
random = Random.new random = Random.new
search(random.base64(3)).as(Tuple)[1].each do |video| search(random.base64(3)).as(Tuple)[1].each do |video|
if video.is_a?(SearchVideo)
ids << video.id ids << video.id
end end
end
loop do loop do
if ids.empty? if ids.empty?
search(random.base64(3)).as(Tuple)[1].each do |video| search(random.base64(3)).as(Tuple)[1].each do |video|
if video.is_a?(SearchVideo)
ids << video.id ids << video.id
end end
end end
end
begin begin
id = ids[0] id = ids[0]

View File

@ -9,9 +9,43 @@ class SearchVideo
description: String, description: String,
description_html: String, description_html: String,
length_seconds: Int32, length_seconds: Int32,
live_now: Bool,
}) })
end end
class SearchPlaylistVideo
add_mapping({
title: String,
id: String,
length_seconds: Int32,
})
end
class SearchPlaylist
add_mapping({
title: String,
id: String,
author: String,
ucid: String,
video_count: Int32,
videos: Array(SearchPlaylistVideo),
})
end
class SearchChannel
add_mapping({
author: String,
ucid: String,
author_thumbnail: String,
subscriber_count: Int32,
video_count: Int32,
description: String,
description_html: String,
})
end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
def channel_search(query, page, channel) def channel_search(query, page, channel)
client = make_client(YT_URL) client = make_client(YT_URL)
@ -26,7 +60,7 @@ def channel_search(query, page, channel)
end end
if !canonical if !canonical
return 0, [] of SearchVideo return 0, [] of SearchItem
end end
ucid = canonical["href"].split("/")[-1] ucid = canonical["href"].split("/")[-1]
@ -40,31 +74,31 @@ def channel_search(query, page, channel)
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
count = nodeset.size count = nodeset.size
videos = extract_videos(nodeset) items = extract_items(nodeset)
else else
count = 0 count = 0
videos = [] of SearchVideo items = [] of SearchItem
end end
return count, videos return count, items
end end
def search(query, page = 1, search_params = produce_search_params(content_type: "video")) def search(query, page = 1, search_params = produce_search_params(content_type: "all"))
client = make_client(YT_URL) client = make_client(YT_URL)
if query.empty? if query.empty?
return {0, [] of SearchVideo} return {0, [] of SearchItem}
end end
html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body
if html.empty? if html.empty?
return {0, [] of SearchVideo} return {0, [] of SearchItem}
end end
html = XML.parse_html(html) html = XML.parse_html(html)
nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li)) nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
videos = extract_videos(nodeset) items = extract_items(nodeset)
return {nodeset.size, videos} return {nodeset.size, items}
end end
def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "", def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "",
@ -110,8 +144,10 @@ def produce_search_params(sort : String = "relevance", date : String = "", conte
"\x10\x04" "\x10\x04"
when "show" when "show"
"\x10\x05" "\x10\x05"
else when "all"
"" ""
else
"\x10\x01"
end end
body += case duration body += case duration

View File

@ -37,8 +37,8 @@
<% videos.each_slice(4) do |slice| %> <% videos.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>

View File

@ -0,0 +1,54 @@
<div class="pure-u-1 pure-u-md-1-4">
<div class="h-box">
<% case item when %>
<% when SearchChannel %>
<a style="width:100%;" href="/channel/<%= item.ucid %>">
<% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %>
<% else %>
<center>
<img style="width:56.25%;" src="/ggpht<%= URI.parse(item.author_thumbnail).full_path %>"/>
</center>
<% end %>
<p><%= item.author %></p>
</a>
<p><%= number_with_separator(item.subscriber_count) %> subscribers</p>
<h5><%= item.description_html %></h5>
<% when SearchPlaylist %>
<a style="width:100%;" href="/playlist?list=<%= item.id %>">
<% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %>
<% else %>
<img style="width:100%;" src="/vi/<%= item.videos[0].id %>/mqdefault.jpg"/>
<% end %>
<p><%= item.title %></p>
</a>
<p>
<b><a style="width:100%;" href="/channel/<%= item.ucid %>"><%= item.author %></a></b>
</p>
<p><%= number_with_separator(item.video_count) %> videos</p>
<p>PLAYLIST</p>
<% else %>
<% if item.responds_to?(:playlists) && !item.playlists.empty? %>
<% params = "&list=#{item.playlists[0]}" %>
<% else %>
<% params = nil %>
<% end %>
<a style="width:100%;" href="/watch?v=<%= item.id %><%= params %>">
<% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %>
<% else %>
<img style="width:100%;" src="/vi/<%= item.id %>/mqdefault.jpg"/>
<% end %>
<p><%= item.title %></p>
</a>
<% if item.responds_to?(:live_now) && item.live_now %>
<p>LIVE</p>
<% end %>
<p>
<b><a style="width:100%;" href="/channel/<%= item.ucid %>"><%= item.author %></a></b>
</p>
<% if Time.now - item.published > 1.minute %>
<h5>Shared <%= recode_date(item.published) %> ago</h5>
<% end %>
<% end %>
</div>
</div>

View File

@ -1,23 +0,0 @@
<div class="pure-u-1 pure-u-md-1-4">
<div class="h-box">
<% if video.responds_to?(:playlists) && !video.playlists.empty? %>
<% params = "&list=#{video.playlists[0]}" %>
<% else %>
<% params = nil %>
<% end %>
<a style="width:100%;" href="/watch?v=<%= video.id %><%= params %>">
<% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %>
<% else %>
<img style="width:100%;" src="/vi/<%= video.id %>/mqdefault.jpg"/>
<% end %>
<p><%= video.title %></p>
</a>
<p>
<b><a style="width:100%;" href="/channel/<%= video.ucid %>"><%= video.author %></a></b>
</p>
<% if Time.now - video.published > 1.minute %>
<h5>Shared <%= recode_date(video.published) %> ago</h5>
<% end %>
</div>
</div>

View File

@ -4,8 +4,8 @@
<% top_videos.each_slice(4) do |slice| %> <% top_videos.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>

View File

@ -26,8 +26,8 @@
<% videos.each_slice(4) do |slice| %> <% videos.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>

View File

@ -4,8 +4,8 @@
<% videos.each_slice(4) do |slice| %> <% videos.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>

View File

@ -25,8 +25,8 @@
<% notifications.each_slice(4) do |slice| %> <% notifications.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>
@ -37,8 +37,8 @@
<% videos.each_slice(4) do |slice| %> <% videos.each_slice(4) do |slice| %>
<div class="pure-g"> <div class="pure-g">
<% slice.each do |video| %> <% slice.each do |item| %>
<%= rendered "components/video" %> <%= rendered "components/item" %>
<% end %> <% end %>
</div> </div>
<% end %> <% end %>