[^)]+)\)/).not_nil!["url"]
+ # Raises a KeyError on failure.
+ banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
+ banner = banners.try &.[-1]?.try &.["url"].as_s?
- if banner.includes? "channels/c4/default_banner"
- banner = nil
- end
+ # if banner.includes? "channels/c4/default_banner"
+ # banner = nil
+ # end
- description_html = about.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s ||
- %()
+ description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || ""
+ description_html = HTML.escape(description).gsub("\n", "
")
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
- related_channels = about.xpath_nodes(%q(//div[contains(@class, "branded-page-related-channels")]/ul/li))
- related_channels = related_channels.map do |node|
- related_id = node["data-external-id"]?
- related_id ||= ""
+ related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"]
+ .["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]?
+ .try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node|
+ renderer = node["miniChannelRenderer"]?
+ related_id = renderer.try &.["channelId"]?.try &.as_s?
+ related_id ||= ""
- anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- related_title = anchor.try &.["title"]
- related_title ||= ""
+ related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s?
+ related_title ||= ""
- related_author_url = anchor.try &.["href"]
- related_author_url ||= ""
+ related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]?
+ .try &.["url"]?.try &.as_s?
+ related_author_url ||= ""
- related_author_thumbnail = node.xpath_node(%q(.//img)).try &.["data-thumb"]
- related_author_thumbnail ||= ""
+ related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a?
+ related_author_thumbnails ||= [] of JSON::Any
- AboutRelatedChannel.new(
- ucid: related_id,
- author: related_title,
- author_url: related_author_url,
- author_thumbnail: related_author_thumbnail,
- )
- end
+ related_author_thumbnail = ""
+ if related_author_thumbnails.size > 0
+ related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s?
+ related_author_thumbnail ||= ""
+ end
- joined = about.xpath_node(%q(//span[contains(., "Joined")]))
- .try &.content.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
+ AboutRelatedChannel.new({
+ ucid: related_id,
+ author: related_title,
+ author_url: related_author_url,
+ author_thumbnail: related_author_thumbnail,
+ })
+ end
+ related_channels ||= [] of AboutRelatedChannel
- total_views = about.xpath_node(%q(//span[contains(., "views")]/b))
- .try &.content.try &.gsub(/\D/, "").to_i64? || 0_i64
-
- sub_count = about.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
- .try &.["title"].try { |text| short_text_to_number(text) } || 0
-
- # Auto-generated channels
- # https://support.google.com/youtube/answer/2579942
+ total_views = 0_i64
+ joined = Time.unix(0)
+ tabs = [] of String
auto_generated = false
- if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
- about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
- auto_generated = true
+
+ tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
+ if !tabs_json.nil?
+ # Retrieve information from the tabs array. The index we are looking for varies between channels.
+ tabs_json.each do |node|
+ # Try to find the about section which is located in only one of the tabs.
+ channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
+ .try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
+ .try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
+
+ if !channel_about_meta.nil?
+ total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
+
+ # The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
+ joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s }
+ .try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
+
+ # Auto-generated channels
+ # https://support.google.com/youtube/answer/2579942
+ # For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
+ if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?) &&
+ (channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
+ auto_generated = true
+ end
+ end
+ end
+ tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map { |node| node["tabRenderer"]["title"].as_s.downcase }
end
- tabs = about.xpath_nodes(%q(//ul[@id="channel-navigation-menu"]/li/a/span)).map { |node| node.content.downcase }
+ sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
+ .try { |text| short_text_to_number(text.split(" ")[0]) } || 0
- AboutChannel.new(
- ucid: ucid,
- author: author,
- auto_generated: auto_generated,
- author_url: author_url,
- author_thumbnail: author_thumbnail,
- banner: banner,
- description_html: description_html,
- paid: paid,
- total_views: total_views,
- sub_count: sub_count,
- joined: joined,
+ AboutChannel.new({
+ ucid: ucid,
+ author: author,
+ auto_generated: auto_generated,
+ author_url: author_url,
+ author_thumbnail: author_thumbnail,
+ banner: banner,
+ description_html: description_html,
+ paid: paid,
+ total_views: total_views,
+ sub_count: sub_count,
+ joined: joined,
is_family_friendly: is_family_friendly,
- allowed_regions: allowed_regions,
- related_channels: related_channels,
- tabs: tabs
- )
+ allowed_regions: allowed_regions,
+ related_channels: related_channels,
+ tabs: tabs,
+ })
+end
+
+def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+ url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
+ return YT_POOL.client &.get(url)
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
- count = 0
videos = [] of SearchVideo
2.times do |i|
- url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
- response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
-
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-
- if !json["load_more_widget_html"]?.try &.as_s.empty?
- count += 30
- end
-
- if auto_generated
- videos += extract_videos(nodeset)
- else
- videos += extract_videos(nodeset, ucid, author)
- end
- else
- break
- end
+ response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ break if !initial_data
+ videos.concat extract_videos(initial_data.as_h, author, ucid)
end
- return videos, count
+ return videos.size, videos
end
def get_latest_videos(ucid)
- videos = [] of SearchVideo
+ response = get_channel_videos_response(ucid, 1)
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ return [] of SearchVideo if !initial_data
+ author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
+ items = extract_videos(initial_data.as_h, author, ucid)
- url = produce_channel_videos_url(ucid, 0)
- response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
-
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-
- videos = extract_videos(nodeset, ucid)
- end
-
- return videos
+ return items
end
diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr
index 2d7bc1cf..407cef78 100644
--- a/src/invidious/comments.cr
+++ b/src/invidious/comments.cr
@@ -1,11 +1,23 @@
class RedditThing
- JSON.mapping({
- kind: String,
- data: RedditComment | RedditLink | RedditMore | RedditListing,
- })
+ include JSON::Serializable
+
+ property kind : String
+ property data : RedditComment | RedditLink | RedditMore | RedditListing
end
class RedditComment
+ include JSON::Serializable
+
+ property author : String
+ property body_html : String
+ property replies : RedditThing | String
+ property score : Int32
+ property depth : Int32
+ property permalink : String
+
+ @[JSON::Field(converter: RedditComment::TimeConverter)]
+ property created_utc : Time
+
module TimeConverter
def self.from_json(value : JSON::PullParser) : Time
Time.unix(value.read_float.to_i)
@@ -15,51 +27,38 @@ class RedditComment
json.number(value.to_unix)
end
end
-
- JSON.mapping({
- author: String,
- body_html: String,
- replies: RedditThing | String,
- score: Int32,
- depth: Int32,
- permalink: String,
- created_utc: {
- type: Time,
- converter: RedditComment::TimeConverter,
- },
- })
end
struct RedditLink
- JSON.mapping({
- author: String,
- score: Int32,
- subreddit: String,
- num_comments: Int32,
- id: String,
- permalink: String,
- title: String,
- })
+ include JSON::Serializable
+
+ property author : String
+ property score : Int32
+ property subreddit : String
+ property num_comments : Int32
+ property id : String
+ property permalink : String
+ property title : String
end
struct RedditMore
- JSON.mapping({
- children: Array(String),
- count: Int32,
- depth: Int32,
- })
+ include JSON::Serializable
+
+ property children : Array(String)
+ property count : Int32
+ property depth : Int32
end
class RedditListing
- JSON.mapping({
- children: Array(RedditThing),
- modhash: String,
- })
+ include JSON::Serializable
+
+ property children : Array(RedditThing)
+ property modhash : String
end
def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, sort_by = "top")
video = get_video(id, db, region: region)
- session_token = video.info["session_token"]?
+ session_token = video.session_token
case cursor
when nil, ""
@@ -85,17 +84,9 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
session_token: session_token,
}
- headers = HTTP::Headers.new
-
- headers["content-type"] = "application/x-www-form-urlencoded"
- headers["cookie"] = video.info["cookie"]
-
- headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ=="
- headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"
- headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"
-
- headers["x-youtube-client-name"] = "1"
- headers["x-youtube-client-version"] = "2.20180719"
+ headers = HTTP::Headers{
+ "cookie" => video.cookie,
+ }
response = YT_POOL.client(region, &.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req))
response = JSON.parse(response.body)
@@ -150,8 +141,7 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
node_comment = node["commentRenderer"]
end
- content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s ||
- content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || ""
+ content_html = node_comment["contentText"]?.try { |t| parse_content(t) } || ""
author = node_comment["authorText"]?.try &.["simpleText"]? || ""
json.field "author", author
@@ -294,7 +284,7 @@ def template_youtube_comments(comments, locale, thin_mode)
@@ -347,7 +337,7 @@ def template_youtube_comments(comments, locale, thin_mode)
END_HTML
else
html << <<-END_HTML
-
+
END_HTML
end
@@ -356,6 +346,7 @@ def template_youtube_comments(comments, locale, thin_mode)
END_HTML
+ else nil # Ignore
end
end
@@ -413,7 +404,7 @@ def template_youtube_comments(comments, locale, thin_mode)
@@ -451,7 +442,7 @@ def template_reddit_comments(root, locale)
html << <<-END_HTML
- [ - ]
+ [ - ]
#{child.author}
#{translate(locale, "`x` points", number_with_separator(child.score))}
#{translate(locale, "`x` ago", recode_date(child.created_utc, locale))}
@@ -522,6 +513,11 @@ def fill_links(html, scheme, host)
return html.to_xml(options: XML::SaveOptions::NO_DECL)
end
+def parse_content(content : JSON::Any) : String
+ content["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s ||
+ content["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || ""
+end
+
def content_to_comment_html(content)
comment_html = content.map do |run|
text = HTML.escape(run["text"].as_s)
@@ -556,7 +552,7 @@ def content_to_comment_html(content)
video_id = watch_endpoint["videoId"].as_s
if length_seconds
- text = %(#{text})
+ text = %(#{text})
else
text = %(#{text})
end
@@ -609,6 +605,8 @@ def produce_comment_continuation(video_id, cursor = "", sort_by = "top")
object["6:embedded"].as(Hash)["4:embedded"].as(Hash)["6:varint"] = 0_i64
when "new", "newest"
object["6:embedded"].as(Hash)["4:embedded"].as(Hash)["6:varint"] = 1_i64
+ else # top
+ object["6:embedded"].as(Hash)["4:embedded"].as(Hash)["6:varint"] = 0_i64
end
continuation = object.try { |i| Protodec::Any.cast_json(object) }
diff --git a/src/invidious/helpers/handlers.cr b/src/invidious/helpers/handlers.cr
index 456618cf..045b6701 100644
--- a/src/invidious/helpers/handlers.cr
+++ b/src/invidious/helpers/handlers.cr
@@ -61,7 +61,7 @@ class Kemal::ExceptionHandler
end
class FilteredCompressHandler < Kemal::Handler
- exclude ["/videoplayback", "/videoplayback/*", "/vi/*", "/ggpht/*", "/api/v1/auth/notifications"]
+ exclude ["/videoplayback", "/videoplayback/*", "/vi/*", "/sb/*", "/ggpht/*", "/api/v1/auth/notifications"]
exclude ["/api/v1/auth/notifications", "/data_control"], "POST"
def call(env)
@@ -74,10 +74,10 @@ class FilteredCompressHandler < Kemal::Handler
if request_headers.includes_word?("Accept-Encoding", "gzip")
env.response.headers["Content-Encoding"] = "gzip"
- env.response.output = Gzip::Writer.new(env.response.output, sync_close: true)
+ env.response.output = Compress::Gzip::Writer.new(env.response.output, sync_close: true)
elsif request_headers.includes_word?("Accept-Encoding", "deflate")
env.response.headers["Content-Encoding"] = "deflate"
- env.response.output = Flate::Writer.new(env.response.output, sync_close: true)
+ env.response.output = Compress::Deflate::Writer.new(env.response.output, sync_close: true)
end
call_next env
@@ -212,29 +212,3 @@ class DenyFrame < Kemal::Handler
call_next env
end
end
-
-# Temp fixes for https://github.com/crystal-lang/crystal/issues/7383
-class HTTP::UnknownLengthContent
- def read_byte
- ensure_send_continue
- if @io.is_a?(OpenSSL::SSL::Socket::Client)
- return if @io.as(OpenSSL::SSL::Socket::Client).@in_buffer_rem.empty?
- end
- @io.read_byte
- end
-end
-
-class HTTP::Client
- private def handle_response(response)
- if @socket.is_a?(OpenSSL::SSL::Socket::Client) && @host.ends_with?("googlevideo.com")
- close unless response.keep_alive? || @socket.as(OpenSSL::SSL::Socket::Client).@in_buffer_rem.empty?
-
- if @socket.as(OpenSSL::SSL::Socket::Client).@in_buffer_rem.empty?
- @socket = nil
- end
- else
- close unless response.keep_alive?
- end
- response
- end
-end
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index 2341d3be..62c24f3e 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -1,217 +1,100 @@
require "./macros"
struct Nonce
- db_mapping({
- nonce: String,
- expire: Time,
- })
+ include DB::Serializable
+
+ property nonce : String
+ property expire : Time
end
struct SessionId
- db_mapping({
- id: String,
- email: String,
- issued: String,
- })
+ include DB::Serializable
+
+ property id : String
+ property email : String
+ property issued : String
end
struct Annotation
- db_mapping({
- id: String,
- annotations: String,
- })
+ include DB::Serializable
+
+ property id : String
+ property annotations : String
end
struct ConfigPreferences
- module StringToArray
- def self.to_json(value : Array(String), json : JSON::Builder)
- json.array do
- value.each do |element|
- json.string element
- end
- end
- end
+ include YAML::Serializable
- def self.from_json(value : JSON::PullParser) : Array(String)
- begin
- result = [] of String
- value.read_array do
- result << HTML.escape(value.read_string[0, 100])
- end
- rescue ex
- result = [HTML.escape(value.read_string[0, 100]), ""]
- end
+ property annotations : Bool = false
+ property annotations_subscribed : Bool = false
+ property autoplay : Bool = false
+ property captions : Array(String) = ["", "", ""]
+ property comments : Array(String) = ["youtube", ""]
+ property continue : Bool = false
+ property continue_autoplay : Bool = true
+ property dark_mode : String = ""
+ property latest_only : Bool = false
+ property listen : Bool = false
+ property local : Bool = false
+ property locale : String = "en-US"
+ property max_results : Int32 = 40
+ property notifications_only : Bool = false
+ property player_style : String = "invidious"
+ property quality : String = "hd720"
+ property default_home : String = "Popular"
+ property feed_menu : Array(String) = ["Popular", "Trending", "Subscriptions", "Playlists"]
+ property related_videos : Bool = true
+ property sort : String = "published"
+ property speed : Float32 = 1.0_f32
+ property thin_mode : Bool = false
+ property unseen_only : Bool = false
+ property video_loop : Bool = false
+ property volume : Int32 = 100
- result
- end
-
- def self.to_yaml(value : Array(String), yaml : YAML::Nodes::Builder)
- yaml.sequence do
- value.each do |element|
- yaml.scalar element
- end
- end
- end
-
- def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : Array(String)
- begin
- unless node.is_a?(YAML::Nodes::Sequence)
- node.raise "Expected sequence, not #{node.class}"
- end
-
- result = [] of String
- node.nodes.each do |item|
- unless item.is_a?(YAML::Nodes::Scalar)
- node.raise "Expected scalar, not #{item.class}"
- end
-
- result << HTML.escape(item.value[0, 100])
- end
- rescue ex
- if node.is_a?(YAML::Nodes::Scalar)
- result = [HTML.escape(node.value[0, 100]), ""]
- else
- result = ["", ""]
- end
- end
-
- result
- end
+ def to_tuple
+ {% begin %}
+ {
+ {{*@type.instance_vars.map { |var| "#{var.name}: #{var.name}".id }}}
+ }
+ {% end %}
end
-
- module BoolToString
- def self.to_json(value : String, json : JSON::Builder)
- json.string value
- end
-
- def self.from_json(value : JSON::PullParser) : String
- begin
- result = value.read_string
-
- if result.empty?
- CONFIG.default_user_preferences.dark_mode
- else
- result
- end
- rescue ex
- if value.read_bool
- "dark"
- else
- "light"
- end
- end
- end
-
- def self.to_yaml(value : String, yaml : YAML::Nodes::Builder)
- yaml.scalar value
- end
-
- def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : String
- unless node.is_a?(YAML::Nodes::Scalar)
- node.raise "Expected scalar, not #{node.class}"
- end
-
- case node.value
- when "true"
- "dark"
- when "false"
- "light"
- when ""
- CONFIG.default_user_preferences.dark_mode
- else
- node.value
- end
- end
- end
-
- yaml_mapping({
- annotations: {type: Bool, default: false},
- annotations_subscribed: {type: Bool, default: false},
- autoplay: {type: Bool, default: false},
- captions: {type: Array(String), default: ["", "", ""], converter: StringToArray},
- comments: {type: Array(String), default: ["youtube", ""], converter: StringToArray},
- continue: {type: Bool, default: false},
- continue_autoplay: {type: Bool, default: true},
- dark_mode: {type: String, default: "", converter: BoolToString},
- latest_only: {type: Bool, default: false},
- listen: {type: Bool, default: false},
- local: {type: Bool, default: false},
- locale: {type: String, default: "en-US"},
- max_results: {type: Int32, default: 40},
- notifications_only: {type: Bool, default: false},
- player_style: {type: String, default: "invidious"},
- quality: {type: String, default: "hd720"},
- default_home: {type: String, default: "Popular"},
- feed_menu: {type: Array(String), default: ["Popular", "Trending", "Subscriptions", "Playlists"]},
- related_videos: {type: Bool, default: true},
- sort: {type: String, default: "published"},
- speed: {type: Float32, default: 1.0_f32},
- thin_mode: {type: Bool, default: false},
- unseen_only: {type: Bool, default: false},
- video_loop: {type: Bool, default: false},
- volume: {type: Int32, default: 100},
- })
end
struct Config
- module ConfigPreferencesConverter
- def self.to_yaml(value : Preferences, yaml : YAML::Nodes::Builder)
- value.to_yaml(yaml)
- end
+ include YAML::Serializable
- def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : Preferences
- Preferences.new(*ConfigPreferences.new(ctx, node).to_tuple)
- end
- end
+ property channel_threads : Int32 # Number of threads to use for crawling videos from channels (for updating subscriptions)
+ property feed_threads : Int32 # Number of threads to use for updating feeds
+ property db : DBConfig # Database configuration
+ property full_refresh : Bool # Used for crawling channels: threads should check all videos uploaded by a channel
+ property https_only : Bool? # Used to tell Invidious it is behind a proxy, so links to resources should be https://
+ property hmac_key : String? # HMAC signing key for CSRF tokens and verifying pubsub subscriptions
+ property domain : String? # Domain to be used for links to resources on the site where an absolute URL is required
+ property use_pubsub_feeds : Bool | Int32 = false # Subscribe to channels using PubSubHubbub (requires domain, hmac_key)
+ property captcha_enabled : Bool = true
+ property login_enabled : Bool = true
+ property registration_enabled : Bool = true
+ property statistics_enabled : Bool = false
+ property admins : Array(String) = [] of String
+ property external_port : Int32? = nil
+ property default_user_preferences : ConfigPreferences = ConfigPreferences.from_yaml("")
+ property dmca_content : Array(String) = [] of String # For compliance with DMCA, disables download widget using list of video IDs
+ property check_tables : Bool = false # Check table integrity, automatically try to add any missing columns, create tables, etc.
+ property cache_annotations : Bool = false # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
+ property banner : String? = nil # Optional banner to be displayed along top of page for announcements, etc.
+ property hsts : Bool? = true # Enables 'Strict-Transport-Security'. Ensure that `domain` and all subdomains are served securely
+ property disable_proxy : Bool? | Array(String)? = false # Disable proxying server-wide: options: 'dash', 'livestreams', 'downloads', 'local'
- module FamilyConverter
- def self.to_yaml(value : Socket::Family, yaml : YAML::Nodes::Builder)
- case value
- when Socket::Family::UNSPEC
- yaml.scalar nil
- when Socket::Family::INET
- yaml.scalar "ipv4"
- when Socket::Family::INET6
- yaml.scalar "ipv6"
- end
- end
+ @[YAML::Field(converter: Preferences::FamilyConverter)]
+ property force_resolve : Socket::Family = Socket::Family::UNSPEC # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729)
+ property port : Int32 = 3000 # Port to listen for connections (overrided by command line argument)
+ property host_binding : String = "0.0.0.0" # Host to bind (overrided by command line argument)
+ property pool_size : Int32 = 100 # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
+ property admin_email : String = "omarroth@protonmail.com" # Email for bug reports
- def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : Socket::Family
- if node.is_a?(YAML::Nodes::Scalar)
- case node.value.downcase
- when "ipv4"
- Socket::Family::INET
- when "ipv6"
- Socket::Family::INET6
- else
- Socket::Family::UNSPEC
- end
- else
- node.raise "Expected scalar, not #{node.class}"
- end
- end
- end
-
- module StringToCookies
- def self.to_yaml(value : HTTP::Cookies, yaml : YAML::Nodes::Builder)
- (value.map { |c| "#{c.name}=#{c.value}" }).join("; ").to_yaml(yaml)
- end
-
- def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : HTTP::Cookies
- unless node.is_a?(YAML::Nodes::Scalar)
- node.raise "Expected scalar, not #{node.class}"
- end
-
- cookies = HTTP::Cookies.new
- node.value.split(";").each do |cookie|
- next if cookie.strip.empty?
- name, value = cookie.split("=", 2)
- cookies << HTTP::Cookie.new(name.strip, value.strip)
- end
-
- cookies
- end
- end
+ @[YAML::Field(converter: Preferences::StringToCookies)]
+ property cookies : HTTP::Cookies = HTTP::Cookies.new # Saved cookies in "name1=value1; name2=value2..." format
+ property captcha_key : String? = nil # Key for Anti-Captcha
def disabled?(option)
case disabled = CONFIG.disable_proxy
@@ -223,77 +106,20 @@ struct Config
else
return false
end
+ else
+ return false
end
end
-
- YAML.mapping({
- channel_threads: Int32, # Number of threads to use for crawling videos from channels (for updating subscriptions)
- feed_threads: Int32, # Number of threads to use for updating feeds
- db: DBConfig, # Database configuration
- full_refresh: Bool, # Used for crawling channels: threads should check all videos uploaded by a channel
- https_only: Bool?, # Used to tell Invidious it is behind a proxy, so links to resources should be https://
- hmac_key: String?, # HMAC signing key for CSRF tokens and verifying pubsub subscriptions
- domain: String?, # Domain to be used for links to resources on the site where an absolute URL is required
- use_pubsub_feeds: {type: Bool | Int32, default: false}, # Subscribe to channels using PubSubHubbub (requires domain, hmac_key)
- top_enabled: {type: Bool, default: true},
- captcha_enabled: {type: Bool, default: true},
- login_enabled: {type: Bool, default: true},
- registration_enabled: {type: Bool, default: true},
- statistics_enabled: {type: Bool, default: false},
- admins: {type: Array(String), default: [] of String},
- external_port: {type: Int32?, default: nil},
- default_user_preferences: {type: Preferences,
- default: Preferences.new(*ConfigPreferences.from_yaml("").to_tuple),
- converter: ConfigPreferencesConverter,
- },
- dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs
- check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc.
- cache_annotations: {type: Bool, default: false}, # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
- banner: {type: String?, default: nil}, # Optional banner to be displayed along top of page for announcements, etc.
- hsts: {type: Bool?, default: true}, # Enables 'Strict-Transport-Security'. Ensure that `domain` and all subdomains are served securely
- disable_proxy: {type: Bool? | Array(String)?, default: false}, # Disable proxying server-wide: options: 'dash', 'livestreams', 'downloads', 'local'
- force_resolve: {type: Socket::Family, default: Socket::Family::UNSPEC, converter: FamilyConverter}, # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729)
- port: {type: Int32, default: 3000}, # Port to listen for connections (overrided by command line argument)
- host_binding: {type: String, default: "0.0.0.0"}, # Host to bind (overrided by command line argument)
- pool_size: {type: Int32, default: 100}, # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`)
- admin_email: {type: String, default: "omarroth@protonmail.com"}, # Email for bug reports
- cookies: {type: HTTP::Cookies, default: HTTP::Cookies.new, converter: StringToCookies}, # Saved cookies in "name1=value1; name2=value2..." format
- captcha_key: {type: String?, default: nil}, # Key for Anti-Captcha
- })
end
struct DBConfig
- yaml_mapping({
- user: String,
- password: String,
- host: String,
- port: Int32,
- dbname: String,
- })
-end
+ include YAML::Serializable
-def rank_videos(db, n)
- top = [] of {Float64, String}
-
- db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs|
- rs.each do
- id = rs.read(String)
- wilson_score = rs.read(Float64)
- published = rs.read(Time)
-
- # Exponential decay, older videos tend to rank lower
- temperature = wilson_score * Math.exp(-0.000005*((Time.utc - published).total_minutes))
- top << {temperature, id}
- end
- end
-
- top.sort!
-
- # Make hottest come first
- top.reverse!
- top = top.map { |a, b| b }
-
- return top[0..n - 1]
+ property user : String
+ property password : String
+ property host : String
+ property port : Int32
+ property dbname : String
end
def login_req(f_req)
@@ -334,293 +160,179 @@ def html_to_content(description_html : String)
return description
end
-def extract_videos(nodeset, ucid = nil, author_name = nil)
- videos = extract_items(nodeset, ucid, author_name)
- videos.select { |item| item.is_a?(SearchVideo) }.map { |video| video.as(SearchVideo) }
+def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+ extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
-def extract_items(nodeset, ucid = nil, author_name = nil)
- # TODO: Make this a 'common', so it makes more sense to be used here
+def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+ if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
+ video_id = i["videoId"].as_s
+ title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
+
+ author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
+
+ published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+ view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+ length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
+ i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
+ .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+
+ live_now = false
+ paid = false
+ premium = false
+
+ premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+
+ i["badges"]?.try &.as_a.each do |badge|
+ b = badge["metadataBadgeRenderer"]
+ case b["label"].as_s
+ when "LIVE NOW"
+ live_now = true
+ when "New", "4K", "CC"
+ # TODO
+ when "Premium"
+ paid = true
+
+ # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
+ premium = true
+ else nil # Ignore
+ end
+ end
+
+ SearchVideo.new({
+ title: title,
+ id: video_id,
+ author: author,
+ ucid: author_id,
+ published: published,
+ views: view_count,
+ description_html: description_html,
+ length_seconds: length_seconds,
+ live_now: live_now,
+ paid: paid,
+ premium: premium,
+ premiere_timestamp: premiere_timestamp,
+ })
+ elsif i = item["channelRenderer"]?
+ author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
+ author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
+
+ author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
+ subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+
+ auto_generated = false
+ auto_generated = true if !i["videoCountText"]?
+ video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+
+ SearchChannel.new({
+ author: author,
+ ucid: author_id,
+ author_thumbnail: author_thumbnail,
+ subscriber_count: subscriber_count,
+ video_count: video_count,
+ description_html: description_html,
+ auto_generated: auto_generated,
+ })
+ elsif i = item["gridPlaylistRenderer"]?
+ title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+ plid = i["playlistId"]?.try &.as_s || ""
+
+ video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+ playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+
+ SearchPlaylist.new({
+ title: title,
+ id: plid,
+ author: author_fallback || "",
+ ucid: author_id_fallback || "",
+ video_count: video_count,
+ videos: [] of SearchPlaylistVideo,
+ thumbnail: playlist_thumbnail,
+ })
+ elsif i = item["playlistRenderer"]?
+ title = i["title"]["simpleText"]?.try &.as_s || ""
+ plid = i["playlistId"]?.try &.as_s || ""
+
+ video_count = i["videoCount"]?.try &.as_s.to_i || 0
+ playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+
+ author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
+
+ videos = i["videos"]?.try &.as_a.map do |v|
+ v = v["childVideoRenderer"]
+ v_title = v["title"]["simpleText"]?.try &.as_s || ""
+ v_id = v["videoId"]?.try &.as_s || ""
+ v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+ SearchPlaylistVideo.new({
+ title: v_title,
+ id: v_id,
+ length_seconds: v_length_seconds,
+ })
+ end || [] of SearchPlaylistVideo
+
+ # TODO: i["publishedTimeText"]?
+
+ SearchPlaylist.new({
+ title: title,
+ id: plid,
+ author: author,
+ ucid: author_id,
+ video_count: video_count,
+ videos: videos,
+ thumbnail: playlist_thumbnail,
+ })
+ elsif i = item["radioRenderer"]? # Mix
+ # TODO
+ elsif i = item["showRenderer"]? # Show
+ # TODO
+ elsif i = item["shelfRenderer"]?
+ elsif i = item["horizontalCardListRenderer"]?
+ elsif i = item["searchPyvRenderer"]? # Ad
+ end
+end
+
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
- nodeset.each do |node|
- anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if !anchor
- next
- end
- title = anchor.content.strip
- id = anchor["href"]
+ channel_v2_response = initial_data
+ .try &.["response"]?
+ .try &.["continuationContents"]?
+ .try &.["gridContinuation"]?
+ .try &.["items"]?
- if anchor["href"].starts_with? "https://www.googleadservices.com"
- next
- end
-
- author_id = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.["href"].split("/")[-1] || ucid || ""
- author = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.content.strip || author_name || ""
- description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
-
- tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
- if !tile
- next
- end
-
- case tile["class"]
- when .includes? "yt-lockup-playlist"
- plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
-
- anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-meta")]/a))
-
- if !anchor
- anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
- end
-
- video_count = node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
- node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
- if video_count
- video_count = video_count.content
-
- if video_count == "50+"
- author = "YouTube"
- author_id = "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
- end
-
- video_count = video_count.gsub(/\D/, "").to_i?
- end
- video_count ||= 0
-
- videos = [] of SearchPlaylistVideo
- node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
- anchor = video.xpath_node(%q(.//a))
- if anchor
- video_title = anchor.content.strip
- id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- id ||= ""
-
- anchor = video.xpath_node(%q(.//span/span))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- id,
- length_seconds
- )
- end
-
- playlist_thumbnail = node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
- playlist_thumbnail ||= node.xpath_node(%q(.//span/img)).try &.["src"]
-
- items << SearchPlaylist.new(
- title: title,
- id: plid,
- author: author,
- ucid: author_id,
- video_count: video_count,
- videos: videos,
- thumbnail: playlist_thumbnail
- )
- when .includes? "yt-lockup-channel"
- author = title.strip
-
- ucid = node.xpath_node(%q(.//button[contains(@class, "yt-uix-subscription-button")])).try &.["data-channel-external-id"]?
- ucid ||= id.split("/")[-1]
-
- author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
- author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
- if author_thumbnail
- author_thumbnail = URI.parse(author_thumbnail)
- author_thumbnail.scheme = "https"
- author_thumbnail = author_thumbnail.to_s
- end
-
- author_thumbnail ||= ""
-
- subscriber_count = node.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
- .try &.["title"].try { |text| short_text_to_number(text) } || 0
-
- video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].gsub(/\D/, "").to_i?
-
- items << SearchChannel.new(
- author: author,
- ucid: ucid,
- author_thumbnail: author_thumbnail,
- subscriber_count: subscriber_count,
- video_count: video_count || 0,
- description_html: description_html,
- auto_generated: video_count ? false : true,
- )
- else
- id = id.lchop("/watch?v=")
-
- metadata = node.xpath_node(%q(.//div[contains(@class,"yt-lockup-meta")]/ul))
-
- published = metadata.try &.xpath_node(%q(.//li[contains(text(), " ago")])).try { |node| decode_date(node.content.sub(/^[a-zA-Z]+ /, "")) }
- published ||= metadata.try &.xpath_node(%q(.//span[@data-timestamp])).try { |node| Time.unix(node["data-timestamp"].to_i64) }
- published ||= Time.utc
-
- view_count = metadata.try &.xpath_node(%q(.//li[contains(text(), " views")])).try &.content.gsub(/\D/, "").to_i64?
- view_count ||= 0_i64
-
- length_seconds = node.xpath_node(%q(.//span[@class="video-time"])).try { |node| decode_length_seconds(node.content) }
- length_seconds ||= -1
-
- live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) ? true : false
- premium = node.xpath_node(%q(.//span[text()="Premium"])) ? true : false
-
- if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
- paid = false
- else
- paid = true
- end
-
- premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64
- if premiere_timestamp
- premiere_timestamp = Time.unix(premiere_timestamp)
- end
-
- items << SearchVideo.new(
- title: title,
- id: id,
- author: author,
- ucid: author_id,
- published: published,
- views: view_count,
- description_html: description_html,
- length_seconds: length_seconds,
- live_now: live_now,
- paid: paid,
- premium: premium,
- premiere_timestamp: premiere_timestamp
- )
- end
+ if channel_v2_response
+ channel_v2_response.try &.as_a.each { |item|
+ extract_item(item, author_fallback, author_id_fallback)
+ .try { |t| items << t }
+ }
+ else
+ initial_data.try { |t| t["contents"]? || t["response"]? }
+ .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
+ t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
+ t["continuationContents"]? }
+ .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+ .try &.["contents"].as_a
+ .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
+ .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
+ t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
+ .each { |item|
+ extract_item(item, author_fallback, author_id_fallback)
+ .try { |t| items << t }
+ } }
end
- return items
-end
-
-def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
- items = [] of SearchPlaylist
-
- nodeset.each do |shelf|
- shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
- next if !shelf_anchor
-
- title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])).try &.content.strip
- title ||= ""
-
- id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
- next if !id
-
- shelf_is_playlist = false
- videos = [] of SearchPlaylistVideo
-
- shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list") or contains(@class, "expanded-shelf-content-list")]/li)).each do |child_node|
- type = child_node.xpath_node(%q(./div))
- if !type
- next
- end
-
- case type["class"]
- when .includes? "yt-lockup-video"
- shelf_is_playlist = true
-
- anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if anchor
- video_title = anchor.content.strip
- video_id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- video_id ||= ""
-
- anchor = child_node.xpath_node(%q(.//span[@class="video-time"]))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- video_id,
- length_seconds
- )
- when .includes? "yt-lockup-playlist"
- anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if anchor
- playlist_title = anchor.content.strip
- params = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)
- plid = params["list"]
- end
- playlist_title ||= ""
- plid ||= ""
-
- playlist_thumbnail = child_node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
- playlist_thumbnail ||= child_node.xpath_node(%q(.//span/img)).try &.["src"]
-
- video_count = child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
- child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
- if video_count
- video_count = video_count.content.gsub(/\D/, "").to_i?
- end
- video_count ||= 50
-
- videos = [] of SearchPlaylistVideo
- child_node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
- anchor = video.xpath_node(%q(.//a))
- if anchor
- video_title = anchor.content.strip
- id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- id ||= ""
-
- anchor = video.xpath_node(%q(.//span/span))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- id,
- length_seconds
- )
- end
-
- items << SearchPlaylist.new(
- title: playlist_title,
- id: plid,
- author: author_name,
- ucid: ucid,
- video_count: video_count,
- videos: videos,
- thumbnail: playlist_thumbnail
- )
- end
- end
-
- if shelf_is_playlist
- plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
-
- items << SearchPlaylist.new(
- title: title,
- id: plid,
- author: author_name,
- ucid: ucid,
- video_count: videos.size,
- videos: videos,
- thumbnail: "https://i.ytimg.com/vi/#{videos[0].id}/mqdefault.jpg"
- )
- end
- end
-
- return items
+ items
end
def check_enum(db, logger, enum_name, struct_type = nil)
+ return # TODO
+
if !db.query_one?("SELECT true FROM pg_type WHERE typname = $1", enum_name, as: Bool)
logger.puts("CREATE TYPE #{enum_name}")
@@ -642,18 +354,14 @@ def check_table(db, logger, table_name, struct_type = nil)
end
end
- if !struct_type
- return
- end
+ return if !struct_type
- struct_array = struct_type.to_type_tuple
+ struct_array = struct_type.type_array
column_array = get_column_array(db, table_name)
column_types = File.read("config/sql/#{table_name}.sql").match(/CREATE TABLE public\.#{table_name}\n\((?[\d\D]*?)\);/)
- .try &.["types"].split(",").map { |line| line.strip }
+ .try &.["types"].split(",").map { |line| line.strip }.reject &.starts_with?("CONSTRAINT")
- if !column_types
- return
- end
+ return if !column_types
struct_array.each_with_index do |name, i|
if name != column_array[i]?
@@ -704,6 +412,15 @@ def check_table(db, logger, table_name, struct_type = nil)
end
end
end
+
+ return if column_array.size <= struct_array.size
+
+ column_array.each do |column|
+ if !struct_array.includes? column
+ logger.puts("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
+ db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
+ end
+ end
end
class PG::ResultSet
@@ -732,9 +449,7 @@ def cache_annotation(db, id, annotations)
body = XML.parse(annotations)
nodeset = body.xpath_nodes(%q(/document/annotations/annotation))
- if nodeset == 0
- return
- end
+ return if nodeset == 0
has_legacy_annotations = false
nodeset.each do |node|
@@ -744,13 +459,10 @@ def cache_annotation(db, id, annotations)
end
end
- if has_legacy_annotations
- # TODO: Update on conflict?
- db.exec("INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING", id, annotations)
- end
+ db.exec("INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING", id, annotations) if has_legacy_annotations
end
-def create_notification_stream(env, config, kemal_config, decrypt_function, topics, connection_channel)
+def create_notification_stream(env, topics, connection_channel)
connection = Channel(PQ::Notification).new(8)
connection_channel.send({true, connection})
@@ -765,12 +477,12 @@ def create_notification_stream(env, config, kemal_config, decrypt_function, topi
loop do
time_span = [0, 0, 0, 0]
time_span[rand(4)] = rand(30) + 5
- published = Time.utc - Time::Span.new(time_span[0], time_span[1], time_span[2], time_span[3])
+ published = Time.utc - Time::Span.new(days: time_span[0], hours: time_span[1], minutes: time_span[2], seconds: time_span[3])
video_id = TEST_IDS[rand(TEST_IDS.size)]
video = get_video(video_id, PG_DB)
video.published = published
- response = JSON.parse(video.to_json(locale, config, kemal_config, decrypt_function))
+ response = JSON.parse(video.to_json(locale))
if fields_text = env.params.query["fields"]?
begin
@@ -804,7 +516,7 @@ def create_notification_stream(env, config, kemal_config, decrypt_function, topi
when .match(/UC[A-Za-z0-9_-]{22}/)
PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid = $1 AND published > $2 ORDER BY published DESC LIMIT 15",
topic, Time.unix(since.not_nil!), as: ChannelVideo).each do |video|
- response = JSON.parse(video.to_json(locale, config, Kemal.config))
+ response = JSON.parse(video.to_json(locale))
if fields_text = env.params.query["fields"]?
begin
@@ -846,7 +558,7 @@ def create_notification_stream(env, config, kemal_config, decrypt_function, topi
video = get_video(video_id, PG_DB)
video.published = Time.unix(published)
- response = JSON.parse(video.to_json(locale, config, Kemal.config, decrypt_function))
+ response = JSON.parse(video.to_json(locale))
if fields_text = env.params.query["fields"]?
begin
@@ -884,26 +596,46 @@ def create_notification_stream(env, config, kemal_config, decrypt_function, topi
end
end
-def extract_initial_data(body)
- initial_data = body.match(/window\["ytInitialData"\] = (?.*?);\n/).try &.["info"] || "{}"
+def extract_initial_data(body) : Hash(String, JSON::Any)
+ initial_data = body.match(/(window\["ytInitialData"\]|var\s+ytInitialData)\s*=\s*(?.*?);+\s*\n/).try &.["info"] || "{}"
if initial_data.starts_with?("JSON.parse(\"")
- return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s)
+ return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s).as_h
else
- return JSON.parse(initial_data)
+ return JSON.parse(initial_data).as_h
end
end
def proxy_file(response, env)
if response.headers.includes_word?("Content-Encoding", "gzip")
- Gzip::Writer.open(env.response) do |deflate|
- response.pipe(deflate)
+ Compress::Gzip::Writer.open(env.response) do |deflate|
+ IO.copy response.body_io, deflate
end
elsif response.headers.includes_word?("Content-Encoding", "deflate")
- Flate::Writer.open(env.response) do |deflate|
- response.pipe(deflate)
+ Compress::Deflate::Writer.open(env.response) do |deflate|
+ IO.copy response.body_io, deflate
end
else
- response.pipe(env.response)
+ IO.copy response.body_io, env.response
+ end
+end
+
+# See https://github.com/kemalcr/kemal/pull/576
+class HTTP::Server::Response::Output
+ def close
+ return if closed?
+
+ unless response.wrote_headers?
+ response.content_length = @out_count
+ end
+
+ ensure_headers_written
+
+ super
+
+ if @chunked
+ @io << "0\r\n\r\n"
+ @io.flush
+ end
end
end
diff --git a/src/invidious/helpers/i18n.cr b/src/invidious/helpers/i18n.cr
index 4c9bb2d6..0faa2e32 100644
--- a/src/invidious/helpers/i18n.cr
+++ b/src/invidious/helpers/i18n.cr
@@ -24,6 +24,8 @@ def translate(locale : Hash(String, JSON::Any) | Nil, translation : String, text
if !locale[translation].as_s.empty?
translation = locale[translation].as_s
end
+ else
+ raise "Invalid translation #{translation}"
end
end
diff --git a/src/invidious/helpers/jobs.cr b/src/invidious/helpers/jobs.cr
deleted file mode 100644
index f368d6df..00000000
--- a/src/invidious/helpers/jobs.cr
+++ /dev/null
@@ -1,370 +0,0 @@
-def refresh_channels(db, logger, config)
- max_channel = Channel(Int32).new
-
- spawn do
- max_threads = max_channel.receive
- active_threads = 0
- active_channel = Channel(Bool).new
-
- loop do
- db.query("SELECT id FROM channels ORDER BY updated") do |rs|
- rs.each do
- id = rs.read(String)
-
- if active_threads >= max_threads
- if active_channel.receive
- active_threads -= 1
- end
- end
-
- active_threads += 1
- spawn do
- begin
- channel = fetch_channel(id, db, config.full_refresh)
-
- db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id)
- rescue ex
- if ex.message == "Deleted or invalid channel"
- db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id)
- end
- logger.puts("#{id} : #{ex.message}")
- end
-
- active_channel.send(true)
- end
- end
- end
-
- sleep 1.minute
- Fiber.yield
- end
- end
-
- max_channel.send(config.channel_threads)
-end
-
-def refresh_feeds(db, logger, config)
- max_channel = Channel(Int32).new
- spawn do
- max_threads = max_channel.receive
- active_threads = 0
- active_channel = Channel(Bool).new
-
- loop do
- db.query("SELECT email FROM users WHERE feed_needs_update = true OR feed_needs_update IS NULL") do |rs|
- rs.each do
- email = rs.read(String)
- view_name = "subscriptions_#{sha256(email)}"
-
- if active_threads >= max_threads
- if active_channel.receive
- active_threads -= 1
- end
- end
-
- active_threads += 1
- spawn do
- begin
- # Drop outdated views
- column_array = get_column_array(db, view_name)
- ChannelVideo.to_type_tuple.each_with_index do |name, i|
- if name != column_array[i]?
- logger.puts("DROP MATERIALIZED VIEW #{view_name}")
- db.exec("DROP MATERIALIZED VIEW #{view_name}")
- raise "view does not exist"
- end
- end
-
- if !db.query_one("SELECT pg_get_viewdef('#{view_name}')", as: String).includes? "WHERE ((cv.ucid = ANY (u.subscriptions))"
- logger.puts("Materialized view #{view_name} is out-of-date, recreating...")
- db.exec("DROP MATERIALIZED VIEW #{view_name}")
- end
-
- db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
- db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
- rescue ex
- # Rename old views
- begin
- legacy_view_name = "subscriptions_#{sha256(email)[0..7]}"
-
- db.exec("SELECT * FROM #{legacy_view_name} LIMIT 0")
- logger.puts("RENAME MATERIALIZED VIEW #{legacy_view_name}")
- db.exec("ALTER MATERIALIZED VIEW #{legacy_view_name} RENAME TO #{view_name}")
- rescue ex
- begin
- # While iterating through, we may have an email stored from a deleted account
- if db.query_one?("SELECT true FROM users WHERE email = $1", email, as: Bool)
- logger.puts("CREATE #{view_name}")
- db.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(email)}")
- db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
- end
- rescue ex
- logger.puts("REFRESH #{email} : #{ex.message}")
- end
- end
- end
-
- active_channel.send(true)
- end
- end
- end
-
- sleep 5.seconds
- Fiber.yield
- end
- end
-
- max_channel.send(config.feed_threads)
-end
-
-def subscribe_to_feeds(db, logger, key, config)
- if config.use_pubsub_feeds
- case config.use_pubsub_feeds
- when Bool
- max_threads = config.use_pubsub_feeds.as(Bool).to_unsafe
- when Int32
- max_threads = config.use_pubsub_feeds.as(Int32)
- end
- max_channel = Channel(Int32).new
-
- spawn do
- max_threads = max_channel.receive
- active_threads = 0
- active_channel = Channel(Bool).new
-
- loop do
- db.query_all("SELECT id FROM channels WHERE CURRENT_TIMESTAMP - subscribed > interval '4 days' OR subscribed IS NULL") do |rs|
- rs.each do
- ucid = rs.read(String)
-
- if active_threads >= max_threads.as(Int32)
- if active_channel.receive
- active_threads -= 1
- end
- end
-
- active_threads += 1
-
- spawn do
- begin
- response = subscribe_pubsub(ucid, key, config)
-
- if response.status_code >= 400
- logger.puts("#{ucid} : #{response.body}")
- end
- rescue ex
- logger.puts("#{ucid} : #{ex.message}")
- end
-
- active_channel.send(true)
- end
- end
- end
-
- sleep 1.minute
- Fiber.yield
- end
- end
-
- max_channel.send(max_threads.as(Int32))
- end
-end
-
-def pull_top_videos(config, db)
- loop do
- begin
- top = rank_videos(db, 40)
- rescue ex
- sleep 1.minute
- Fiber.yield
-
- next
- end
-
- if top.size == 0
- sleep 1.minute
- Fiber.yield
-
- next
- end
-
- videos = [] of Video
-
- top.each do |id|
- begin
- videos << get_video(id, db)
- rescue ex
- next
- end
- end
-
- yield videos
-
- sleep 1.minute
- Fiber.yield
- end
-end
-
-def pull_popular_videos(db)
- loop do
- videos = db.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE ucid IN \
- (SELECT channel FROM (SELECT UNNEST(subscriptions) AS channel FROM users) AS d \
- GROUP BY channel ORDER BY COUNT(channel) DESC LIMIT 40) \
- ORDER BY ucid, published DESC", as: ChannelVideo).sort_by { |video| video.published }.reverse
-
- yield videos
-
- sleep 1.minute
- Fiber.yield
- end
-end
-
-def update_decrypt_function
- loop do
- begin
- decrypt_function = fetch_decrypt_function
- yield decrypt_function
- rescue ex
- next
- ensure
- sleep 1.minute
- Fiber.yield
- end
- end
-end
-
-def bypass_captcha(captcha_key, logger)
- loop do
- begin
- response = YT_POOL.client &.get("/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
- if response.body.includes?("To continue with your YouTube experience, please fill out the form below.")
- html = XML.parse_html(response.body)
- form = html.xpath_node(%(//form[@action="/das_captcha"])).not_nil!
- site_key = form.xpath_node(%(.//div[@class="g-recaptcha"])).try &.["data-sitekey"]
-
- inputs = {} of String => String
- form.xpath_nodes(%(.//input[@name])).map do |node|
- inputs[node["name"]] = node["value"]
- end
-
- headers = response.cookies.add_request_headers(HTTP::Headers.new)
-
- response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/createTask", body: {
- "clientKey" => CONFIG.captcha_key,
- "task" => {
- "type" => "NoCaptchaTaskProxyless",
- # "type" => "NoCaptchaTask",
- "websiteURL" => "https://www.youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999",
- "websiteKey" => site_key,
- # "proxyType" => "http",
- # "proxyAddress" => CONFIG.proxy_address,
- # "proxyPort" => CONFIG.proxy_port,
- # "proxyLogin" => CONFIG.proxy_user,
- # "proxyPassword" => CONFIG.proxy_pass,
- # "userAgent" => "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36",
- },
- }.to_json).body)
-
- if response["error"]?
- raise response["error"].as_s
- end
-
- task_id = response["taskId"].as_i
-
- loop do
- sleep 10.seconds
-
- response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/getTaskResult", body: {
- "clientKey" => CONFIG.captcha_key,
- "taskId" => task_id,
- }.to_json).body)
-
- if response["status"]?.try &.== "ready"
- break
- elsif response["errorId"]?.try &.as_i != 0
- raise response["errorDescription"].as_s
- end
- end
-
- inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
- response = YT_POOL.client &.post("/das_captcha", headers, form: inputs)
-
- yield response.cookies.select { |cookie| cookie.name != "PREF" }
- elsif response.headers["Location"]?.try &.includes?("/sorry/index")
- location = response.headers["Location"].try { |u| URI.parse(u) }
- client = QUIC::Client.new(location.host.not_nil!)
- response = client.get(location.full_path)
-
- html = XML.parse_html(response.body)
- form = html.xpath_node(%(//form[@action="index"])).not_nil!
- site_key = form.xpath_node(%(.//div[@class="g-recaptcha"])).try &.["data-sitekey"]
-
- inputs = {} of String => String
- form.xpath_nodes(%(.//input[@name])).map do |node|
- inputs[node["name"]] = node["value"]
- end
-
- response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/createTask", body: {
- "clientKey" => CONFIG.captcha_key,
- "task" => {
- "type" => "NoCaptchaTaskProxyless",
- "websiteURL" => location.to_s,
- "websiteKey" => site_key,
- },
- }.to_json).body)
-
- if response["error"]?
- raise response["error"].as_s
- end
-
- task_id = response["taskId"].as_i
-
- loop do
- sleep 10.seconds
-
- response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/getTaskResult", body: {
- "clientKey" => CONFIG.captcha_key,
- "taskId" => task_id,
- }.to_json).body)
-
- if response["status"]?.try &.== "ready"
- break
- elsif response["errorId"]?.try &.as_i != 0
- raise response["errorDescription"].as_s
- end
- end
-
- inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
- client.close
- client = QUIC::Client.new("www.google.com")
- response = client.post(location.full_path, form: inputs)
- headers = HTTP::Headers{
- "Cookie" => URI.parse(response.headers["location"]).query_params["google_abuse"].split(";")[0],
- }
- cookies = HTTP::Cookies.from_headers(headers)
-
- yield cookies
- end
- rescue ex
- logger.puts("Exception: #{ex.message}")
- ensure
- sleep 1.minute
- Fiber.yield
- end
- end
-end
-
-def find_working_proxies(regions)
- loop do
- regions.each do |region|
- proxies = get_proxies(region).first(20)
- proxies = proxies.map { |proxy| {ip: proxy[:ip], port: proxy[:port]} }
- # proxies = filter_proxies(proxies)
-
- yield region, proxies
- end
-
- sleep 1.minute
- Fiber.yield
- end
-end
diff --git a/src/invidious/helpers/macros.cr b/src/invidious/helpers/macros.cr
index ddfb9f8e..8b74bc86 100644
--- a/src/invidious/helpers/macros.cr
+++ b/src/invidious/helpers/macros.cr
@@ -1,43 +1,51 @@
-macro db_mapping(mapping)
- def initialize({{*mapping.keys.map { |id| "@#{id}".id }}})
- end
+module DB::Serializable
+ macro included
+ {% verbatim do %}
+ macro finished
+ def self.type_array
+ \{{ @type.instance_vars
+ .reject { |var| var.annotation(::DB::Field) && var.annotation(::DB::Field)[:ignore] }
+ .map { |name| name.stringify }
+ }}
+ end
- def to_a
- return [ {{*mapping.keys.map { |id| "@#{id}".id }}} ]
- end
+ def initialize(tuple)
+ \{% for var in @type.instance_vars %}
+ \{% ann = var.annotation(::DB::Field) %}
+ \{% if ann && ann[:ignore] %}
+ \{% else %}
+ @\{{var.name}} = tuple[:\{{var.name.id}}]
+ \{% end %}
+ \{% end %}
+ end
- def self.to_type_tuple
- return { {{*mapping.keys.map { |id| "#{id}" }}} }
+ def to_a
+ \{{ @type.instance_vars
+ .reject { |var| var.annotation(::DB::Field) && var.annotation(::DB::Field)[:ignore] }
+ .map { |name| name }
+ }}
+ end
+ end
+ {% end %}
end
-
- DB.mapping( {{mapping}} )
end
-macro json_mapping(mapping)
- def initialize({{*mapping.keys.map { |id| "@#{id}".id }}})
+module JSON::Serializable
+ macro included
+ {% verbatim do %}
+ macro finished
+ def initialize(tuple)
+ \{% for var in @type.instance_vars %}
+ \{% ann = var.annotation(::JSON::Field) %}
+ \{% if ann && ann[:ignore] %}
+ \{% else %}
+ @\{{var.name}} = tuple[:\{{var.name.id}}]
+ \{% end %}
+ \{% end %}
+ end
+ end
+ {% end %}
end
-
- def to_a
- return [ {{*mapping.keys.map { |id| "@#{id}".id }}} ]
- end
-
- patched_json_mapping( {{mapping}} )
- YAML.mapping( {{mapping}} )
-end
-
-macro yaml_mapping(mapping)
- def initialize({{*mapping.keys.map { |id| "@#{id}".id }}})
- end
-
- def to_a
- return [ {{*mapping.keys.map { |id| "@#{id}".id }}} ]
- end
-
- def to_tuple
- return { {{*mapping.keys.map { |id| "@#{id}".id }}} }
- end
-
- YAML.mapping({{mapping}})
end
macro templated(filename, template = "template")
diff --git a/src/invidious/helpers/patch_mapping.cr b/src/invidious/helpers/patch_mapping.cr
deleted file mode 100644
index 19bd8ca1..00000000
--- a/src/invidious/helpers/patch_mapping.cr
+++ /dev/null
@@ -1,166 +0,0 @@
-# Overloads https://github.com/crystal-lang/crystal/blob/0.28.0/src/json/from_json.cr#L24
-def Object.from_json(string_or_io, default) : self
- parser = JSON::PullParser.new(string_or_io)
- new parser, default
-end
-
-# Adds configurable 'default'
-macro patched_json_mapping(_properties_, strict = false)
- {% for key, value in _properties_ %}
- {% _properties_[key] = {type: value} unless value.is_a?(HashLiteral) || value.is_a?(NamedTupleLiteral) %}
- {% end %}
-
- {% for key, value in _properties_ %}
- {% _properties_[key][:key_id] = key.id.gsub(/\?$/, "") %}
- {% end %}
-
- {% for key, value in _properties_ %}
- @{{value[:key_id]}} : {{value[:type]}}{{ (value[:nilable] ? "?" : "").id }}
-
- {% if value[:setter] == nil ? true : value[:setter] %}
- def {{value[:key_id]}}=(_{{value[:key_id]}} : {{value[:type]}}{{ (value[:nilable] ? "?" : "").id }})
- @{{value[:key_id]}} = _{{value[:key_id]}}
- end
- {% end %}
-
- {% if value[:getter] == nil ? true : value[:getter] %}
- def {{key.id}} : {{value[:type]}}{{ (value[:nilable] ? "?" : "").id }}
- @{{value[:key_id]}}
- end
- {% end %}
-
- {% if value[:presence] %}
- @{{value[:key_id]}}_present : Bool = false
-
- def {{value[:key_id]}}_present?
- @{{value[:key_id]}}_present
- end
- {% end %}
- {% end %}
-
- def initialize(%pull : ::JSON::PullParser, default = nil)
- {% for key, value in _properties_ %}
- %var{key.id} = nil
- %found{key.id} = false
- {% end %}
-
- %location = %pull.location
- begin
- %pull.read_begin_object
- rescue exc : ::JSON::ParseException
- raise ::JSON::MappingError.new(exc.message, self.class.to_s, nil, *%location, exc)
- end
- until %pull.kind.end_object?
- %key_location = %pull.location
- key = %pull.read_object_key
- case key
- {% for key, value in _properties_ %}
- when {{value[:key] || value[:key_id].stringify}}
- %found{key.id} = true
- begin
- %var{key.id} =
- {% if value[:nilable] || value[:default] != nil %} %pull.read_null_or { {% end %}
-
- {% if value[:root] %}
- %pull.on_key!({{value[:root]}}) do
- {% end %}
-
- {% if value[:converter] %}
- {{value[:converter]}}.from_json(%pull)
- {% elsif value[:type].is_a?(Path) || value[:type].is_a?(Generic) %}
- {{value[:type]}}.new(%pull)
- {% else %}
- ::Union({{value[:type]}}).new(%pull)
- {% end %}
-
- {% if value[:root] %}
- end
- {% end %}
-
- {% if value[:nilable] || value[:default] != nil %} } {% end %}
- rescue exc : ::JSON::ParseException
- raise ::JSON::MappingError.new(exc.message, self.class.to_s, {{value[:key] || value[:key_id].stringify}}, *%key_location, exc)
- end
- {% end %}
- else
- {% if strict %}
- raise ::JSON::MappingError.new("Unknown JSON attribute: #{key}", self.class.to_s, nil, *%key_location, nil)
- {% else %}
- %pull.skip
- {% end %}
- end
- end
- %pull.read_next
-
- {% for key, value in _properties_ %}
- {% unless value[:nilable] || value[:default] != nil %}
- if %var{key.id}.nil? && !%found{key.id} && !::Union({{value[:type]}}).nilable?
- raise ::JSON::MappingError.new("Missing JSON attribute: {{(value[:key] || value[:key_id]).id}}", self.class.to_s, nil, *%location, nil)
- end
- {% end %}
-
- {% if value[:nilable] %}
- {% if value[:default] != nil %}
- @{{value[:key_id]}} = %found{key.id} ? %var{key.id} : (default.responds_to?(:{{value[:key_id]}}) ? default.{{value[:key_id]}} : {{value[:default]}})
- {% else %}
- @{{value[:key_id]}} = %var{key.id}
- {% end %}
- {% elsif value[:default] != nil %}
- @{{value[:key_id]}} = %var{key.id}.nil? ? (default.responds_to?(:{{value[:key_id]}}) ? default.{{value[:key_id]}} : {{value[:default]}}) : %var{key.id}
- {% else %}
- @{{value[:key_id]}} = (%var{key.id}).as({{value[:type]}})
- {% end %}
-
- {% if value[:presence] %}
- @{{value[:key_id]}}_present = %found{key.id}
- {% end %}
- {% end %}
- end
-
- def to_json(json : ::JSON::Builder)
- json.object do
- {% for key, value in _properties_ %}
- _{{value[:key_id]}} = @{{value[:key_id]}}
-
- {% unless value[:emit_null] %}
- unless _{{value[:key_id]}}.nil?
- {% end %}
-
- json.field({{value[:key] || value[:key_id].stringify}}) do
- {% if value[:root] %}
- {% if value[:emit_null] %}
- if _{{value[:key_id]}}.nil?
- nil.to_json(json)
- else
- {% end %}
-
- json.object do
- json.field({{value[:root]}}) do
- {% end %}
-
- {% if value[:converter] %}
- if _{{value[:key_id]}}
- {{ value[:converter] }}.to_json(_{{value[:key_id]}}, json)
- else
- nil.to_json(json)
- end
- {% else %}
- _{{value[:key_id]}}.to_json(json)
- {% end %}
-
- {% if value[:root] %}
- {% if value[:emit_null] %}
- end
- {% end %}
- end
- end
- {% end %}
- end
-
- {% unless value[:emit_null] %}
- end
- {% end %}
- {% end %}
- end
- end
-end
diff --git a/src/invidious/helpers/signatures.cr b/src/invidious/helpers/signatures.cr
index 1d238576..f811500f 100644
--- a/src/invidious/helpers/signatures.cr
+++ b/src/invidious/helpers/signatures.cr
@@ -1,69 +1,53 @@
+alias SigProc = Proc(Array(String), Int32, Array(String))
+
def fetch_decrypt_function(id = "CvFH_6DNRCY")
- document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
- url = document.match(/src="(?\/yts\/jsbin\/player_ias-.{9}\/en_US\/base.js)"/).not_nil!["url"]
+ document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en").body
+ url = document.match(/src="(?\/s\/player\/[^\/]+\/player_ias[^\/]+\/en_US\/base.js)"/).not_nil!["url"]
player = YT_POOL.client &.get(url).body
- function_name = player.match(/^(?[^=]+)=function\(a\){a=a\.split\(""\)/m).not_nil!["name"]
- function_body = player.match(/^#{Regex.escape(function_name)}=function\(a\){(?[^}]+)}/m).not_nil!["body"]
+ function_name = player.match(/^(?[^=]+)=function\(\w\){\w=\w\.split\(""\);[^\. ]+\.[^( ]+/m).not_nil!["name"]
+ function_body = player.match(/^#{Regex.escape(function_name)}=function\(\w\){(?[^}]+)}/m).not_nil!["body"]
function_body = function_body.split(";")[1..-2]
var_name = function_body[0][0, 2]
var_body = player.delete("\n").match(/var #{Regex.escape(var_name)}={(?(.*?))};/).not_nil!["body"]
- operations = {} of String => String
+ operations = {} of String => SigProc
var_body.split("},").each do |operation|
op_name = operation.match(/^[^:]+/).not_nil![0]
op_body = operation.match(/\{[^}]+/).not_nil![0]
case op_body
when "{a.reverse()"
- operations[op_name] = "a"
+ operations[op_name] = ->(a : Array(String), b : Int32) { a.reverse }
when "{a.splice(0,b)"
- operations[op_name] = "b"
+ operations[op_name] = ->(a : Array(String), b : Int32) { a.delete_at(0..(b - 1)); a }
else
- operations[op_name] = "c"
+ operations[op_name] = ->(a : Array(String), b : Int32) { c = a[0]; a[0] = a[b % a.size]; a[b % a.size] = c; a }
end
end
- decrypt_function = [] of {name: String, value: Int32}
+ decrypt_function = [] of {SigProc, Int32}
function_body.each do |function|
function = function.lchop(var_name).delete("[].")
op_name = function.match(/[^\(]+/).not_nil![0]
- value = function.match(/\(a,(?[\d]+)\)/).not_nil!["value"].to_i
+ value = function.match(/\(\w,(?[\d]+)\)/).not_nil!["value"].to_i
- decrypt_function << {name: operations[op_name], value: value}
+ decrypt_function << {operations[op_name], value}
end
return decrypt_function
end
-def decrypt_signature(fmt, code)
- if !fmt["s"]?
- return ""
+def decrypt_signature(fmt : Hash(String, JSON::Any))
+ return "" if !fmt["s"]? || !fmt["sp"]?
+
+ sp = fmt["sp"].as_s
+ sig = fmt["s"].as_s.split("")
+ DECRYPT_FUNCTION.each do |proc, value|
+ sig = proc.call(sig, value)
end
- a = fmt["s"]
- a = a.split("")
-
- code.each do |item|
- case item[:name]
- when "a"
- a.reverse!
- when "b"
- a.delete_at(0..(item[:value] - 1))
- when "c"
- a = splice(a, item[:value])
- end
- end
-
- signature = a.join("")
- return "{fmt["sp"]?}=#{signature}"
-end
-
-def splice(a, b)
- c = a[0]
- a[0] = a[b % a.size]
- a[b % a.size] = c
- return a
+ return "{sp}=#{sig.join("")}"
end
diff --git a/src/invidious/helpers/static_file_handler.cr b/src/invidious/helpers/static_file_handler.cr
index 20d92b9c..be9d36ab 100644
--- a/src/invidious/helpers/static_file_handler.cr
+++ b/src/invidious/helpers/static_file_handler.cr
@@ -81,12 +81,12 @@ def send_file(env : HTTP::Server::Context, file_path : String, data : Slice(UInt
condition = config.is_a?(Hash) && config["gzip"]? == true && filesize > minsize && Kemal::Utils.zip_types(file_path)
if condition && request_headers.includes_word?("Accept-Encoding", "gzip")
env.response.headers["Content-Encoding"] = "gzip"
- Gzip::Writer.open(env.response) do |deflate|
+ Compress::Gzip::Writer.open(env.response) do |deflate|
IO.copy(file, deflate)
end
elsif condition && request_headers.includes_word?("Accept-Encoding", "deflate")
env.response.headers["Content-Encoding"] = "deflate"
- Flate::Writer.open(env.response) do |deflate|
+ Compress::Deflate::Writer.open(env.response) do |deflate|
IO.copy(file, deflate)
end
else
diff --git a/src/invidious/helpers/tokens.cr b/src/invidious/helpers/tokens.cr
index 30f7d4f4..39aae367 100644
--- a/src/invidious/helpers/tokens.cr
+++ b/src/invidious/helpers/tokens.cr
@@ -1,3 +1,5 @@
+require "crypto/subtle"
+
def generate_token(email, scopes, expire, key, db)
session = "v1:#{Base64.urlsafe_encode(Random::Secure.random_bytes(32))}"
PG_DB.exec("INSERT INTO session_ids VALUES ($1, $2, $3)", session, email, Time.utc)
@@ -41,15 +43,10 @@ def sign_token(key, hash)
string_to_sign = [] of String
hash.each do |key, value|
- if key == "signature"
- next
- end
+ next if key == "signature"
- if value.is_a?(JSON::Any)
- case value
- when .as_a?
- value = value.as_a.map { |item| item.as_s }
- end
+ if value.is_a?(JSON::Any) && value.as_a?
+ value = value.as_a.map { |i| i.as_s }
end
case value
@@ -76,14 +73,25 @@ def validate_request(token, session, request, key, db, locale = nil)
raise translate(locale, "Hidden field \"token\" is a required field")
end
- if token["signature"] != sign_token(key, token)
- raise translate(locale, "Invalid signature")
+ expire = token["expire"]?.try &.as_i
+ if expire.try &.< Time.utc.to_unix
+ raise translate(locale, "Token is expired, please try again")
end
if token["session"] != session
raise translate(locale, "Erroneous token")
end
+ scopes = token["scopes"].as_a.map { |v| v.as_s }
+ scope = "#{request.method}:#{request.path.lchop("/api/v1/auth/").lstrip("/")}"
+ if !scopes_include_scope(scopes, scope)
+ raise translate(locale, "Invalid scope")
+ end
+
+ if !Crypto::Subtle.constant_time_compare(token["signature"].to_s, sign_token(key, token))
+ raise translate(locale, "Invalid signature")
+ end
+
if token["nonce"]? && (nonce = db.query_one?("SELECT * FROM nonces WHERE nonce = $1", token["nonce"], as: {String, Time}))
if nonce[1] > Time.utc
db.exec("UPDATE nonces SET expire = $1 WHERE nonce = $2", Time.utc(1990, 1, 1), nonce[0])
@@ -92,18 +100,6 @@ def validate_request(token, session, request, key, db, locale = nil)
end
end
- scopes = token["scopes"].as_a.map { |v| v.as_s }
- scope = "#{request.method}:#{request.path.lchop("/api/v1/auth/").lstrip("/")}"
-
- if !scopes_include_scope(scopes, scope)
- raise translate(locale, "Invalid scope")
- end
-
- expire = token["expire"]?.try &.as_i
- if expire.try &.< Time.utc.to_unix
- raise translate(locale, "Token is expired, please try again")
- end
-
return {scopes, expire, token["signature"].as_s}
end
diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr
index 6fcfa8d2..a51f15ce 100644
--- a/src/invidious/helpers/utils.cr
+++ b/src/invidious/helpers/utils.cr
@@ -2,13 +2,16 @@ require "lsquic"
require "pool/connection"
def add_yt_headers(request)
- request.headers["x-youtube-client-name"] ||= "1"
- request.headers["x-youtube-client-version"] ||= "1.20180719"
request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["accept-language"] ||= "en-us,en;q=0.5"
- request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
+ return if request.resource.starts_with? "/sorry/index"
+ request.headers["x-youtube-client-name"] ||= "1"
+ request.headers["x-youtube-client-version"] ||= "2.20200609"
+ if !CONFIG.cookies.empty?
+ request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
+ end
end
struct QUICPool
@@ -77,7 +80,8 @@ def elapsed_text(elapsed)
end
def make_client(url : URI, region = nil)
- client = HTTPClient.new(url)
+ # TODO: Migrate any applicable endpoints to QUIC
+ client = HTTPClient.new(url, OpenSSL::SSL::Context::Client.insecure)
client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC
client.read_timeout = 10.seconds
client.connect_timeout = 10.seconds
@@ -99,7 +103,7 @@ end
def decode_length_seconds(string)
length_seconds = string.gsub(/[^0-9:]/, "").split(":").map &.to_i
length_seconds = [0] * (3 - length_seconds.size) + length_seconds
- length_seconds = Time::Span.new(length_seconds[0], length_seconds[1], length_seconds[2])
+ length_seconds = Time::Span.new hours: length_seconds[0], minutes: length_seconds[1], seconds: length_seconds[2]
length_seconds = length_seconds.total_seconds.to_i
return length_seconds
@@ -161,6 +165,7 @@ def decode_date(string : String)
return Time.utc
when "yesterday"
return Time.utc - 1.day
+ else nil # Continue
end
# String matches format "20 hours ago", "4 months ago"...
@@ -315,7 +320,7 @@ def get_referer(env, fallback = "/", unroll = true)
end
referer = referer.full_path
- referer = "/" + referer.lstrip("\/\\")
+ referer = "/" + referer.gsub(/[^\/?@&%=\-_.0-9a-zA-Z]/, "").lstrip("/\\")
if referer == env.request.path
referer = fallback
@@ -324,47 +329,10 @@ def get_referer(env, fallback = "/", unroll = true)
return referer
end
-struct VarInt
- def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian) : Int32
- result = 0_u32
- num_read = 0
-
- loop do
- byte = io.read_byte
- raise "Invalid VarInt" if !byte
- value = byte & 0x7f
-
- result |= value.to_u32 << (7 * num_read)
- num_read += 1
-
- break if byte & 0x80 == 0
- raise "Invalid VarInt" if num_read > 5
- end
-
- result.to_i32
- end
-
- def self.to_io(io : IO, value : Int32)
- io.write_byte 0x00 if value == 0x00
- value = value.to_u32
-
- while value != 0
- byte = (value & 0x7f).to_u8
- value >>= 7
-
- if value != 0
- byte |= 0x80
- end
-
- io.write_byte byte
- end
- end
-end
-
def sha256(text)
digest = OpenSSL::Digest.new("SHA256")
digest << text
- return digest.hexdigest
+ return digest.final.hexstring
end
def subscribe_pubsub(topic, key, config)
@@ -383,10 +351,8 @@ def subscribe_pubsub(topic, key, config)
nonce = Random::Secure.hex(4)
signature = "#{time}:#{nonce}"
- host_url = make_host_url(config, Kemal.config)
-
body = {
- "hub.callback" => "#{host_url}/feed/webhook/v1:#{time}:#{nonce}:#{OpenSSL::HMAC.hexdigest(:sha1, key, signature)}",
+ "hub.callback" => "#{HOST_URL}/feed/webhook/v1:#{time}:#{nonce}:#{OpenSSL::HMAC.hexdigest(:sha1, key, signature)}",
"hub.topic" => "https://www.youtube.com/xml/feeds/videos.xml?#{topic}",
"hub.verify" => "async",
"hub.mode" => "subscribe",
diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr
new file mode 100644
index 00000000..ec0cad64
--- /dev/null
+++ b/src/invidious/jobs.cr
@@ -0,0 +1,13 @@
+module Invidious::Jobs
+ JOBS = [] of BaseJob
+
+ def self.register(job : BaseJob)
+ JOBS << job
+ end
+
+ def self.start_all
+ JOBS.each do |job|
+ spawn { job.begin }
+ end
+ end
+end
diff --git a/src/invidious/jobs/base_job.cr b/src/invidious/jobs/base_job.cr
new file mode 100644
index 00000000..47e75864
--- /dev/null
+++ b/src/invidious/jobs/base_job.cr
@@ -0,0 +1,3 @@
+abstract class Invidious::Jobs::BaseJob
+ abstract def begin
+end
diff --git a/src/invidious/jobs/bypass_captcha_job.cr b/src/invidious/jobs/bypass_captcha_job.cr
new file mode 100644
index 00000000..8b69e01a
--- /dev/null
+++ b/src/invidious/jobs/bypass_captcha_job.cr
@@ -0,0 +1,131 @@
+class Invidious::Jobs::BypassCaptchaJob < Invidious::Jobs::BaseJob
+ private getter logger : Invidious::LogHandler
+ private getter config : Config
+
+ def initialize(@logger, @config)
+ end
+
+ def begin
+ loop do
+ begin
+ {"/watch?v=jNQXAC9IVRw&gl=US&hl=en&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UC4QobU6STFB0P71PMvOGN5A")}.each do |path|
+ response = YT_POOL.client &.get(path)
+ if response.body.includes?("To continue with your YouTube experience, please fill out the form below.")
+ html = XML.parse_html(response.body)
+ form = html.xpath_node(%(//form[@action="/das_captcha"])).not_nil!
+ site_key = form.xpath_node(%(.//div[@id="recaptcha"])).try &.["data-sitekey"]
+ s_value = form.xpath_node(%(.//div[@id="recaptcha"])).try &.["data-s"]
+
+ inputs = {} of String => String
+ form.xpath_nodes(%(.//input[@name])).map do |node|
+ inputs[node["name"]] = node["value"]
+ end
+
+ headers = response.cookies.add_request_headers(HTTP::Headers.new)
+
+ response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/createTask", body: {
+ "clientKey" => config.captcha_key,
+ "task" => {
+ "type" => "NoCaptchaTaskProxyless",
+ "websiteURL" => "https://www.youtube.com#{path}",
+ "websiteKey" => site_key,
+ "recaptchaDataSValue" => s_value,
+ },
+ }.to_json).body)
+
+ raise response["error"].as_s if response["error"]?
+ task_id = response["taskId"].as_i
+
+ loop do
+ sleep 10.seconds
+
+ response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/getTaskResult", body: {
+ "clientKey" => config.captcha_key,
+ "taskId" => task_id,
+ }.to_json).body)
+
+ if response["status"]?.try &.== "ready"
+ break
+ elsif response["errorId"]?.try &.as_i != 0
+ raise response["errorDescription"].as_s
+ end
+ end
+
+ inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
+ headers["Cookies"] = response["solution"]["cookies"].as_h?.try &.map { |k, v| "#{k}=#{v}" }.join("; ") || ""
+ response = YT_POOL.client &.post("/das_captcha", headers, form: inputs)
+
+ response.cookies
+ .select { |cookie| cookie.name != "PREF" }
+ .each { |cookie| config.cookies << cookie }
+
+ # Persist cookies between runs
+ File.write("config/config.yml", config.to_yaml)
+ elsif response.headers["Location"]?.try &.includes?("/sorry/index")
+ location = response.headers["Location"].try { |u| URI.parse(u) }
+ headers = HTTP::Headers{":authority" => location.host.not_nil!}
+ response = YT_POOL.client &.get(location.full_path, headers)
+
+ html = XML.parse_html(response.body)
+ form = html.xpath_node(%(//form[@action="index"])).not_nil!
+ site_key = form.xpath_node(%(.//div[@id="recaptcha"])).try &.["data-sitekey"]
+ s_value = form.xpath_node(%(.//div[@id="recaptcha"])).try &.["data-s"]
+
+ inputs = {} of String => String
+ form.xpath_nodes(%(.//input[@name])).map do |node|
+ inputs[node["name"]] = node["value"]
+ end
+
+ captcha_client = HTTPClient.new(URI.parse("https://api.anti-captcha.com"))
+ captcha_client.family = config.force_resolve || Socket::Family::INET
+ response = JSON.parse(captcha_client.post("/createTask", body: {
+ "clientKey" => config.captcha_key,
+ "task" => {
+ "type" => "NoCaptchaTaskProxyless",
+ "websiteURL" => location.to_s,
+ "websiteKey" => site_key,
+ "recaptchaDataSValue" => s_value,
+ },
+ }.to_json).body)
+
+ raise response["error"].as_s if response["error"]?
+ task_id = response["taskId"].as_i
+
+ loop do
+ sleep 10.seconds
+
+ response = JSON.parse(captcha_client.post("/getTaskResult", body: {
+ "clientKey" => config.captcha_key,
+ "taskId" => task_id,
+ }.to_json).body)
+
+ if response["status"]?.try &.== "ready"
+ break
+ elsif response["errorId"]?.try &.as_i != 0
+ raise response["errorDescription"].as_s
+ end
+ end
+
+ inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
+ headers["Cookies"] = response["solution"]["cookies"].as_h?.try &.map { |k, v| "#{k}=#{v}" }.join("; ") || ""
+ response = YT_POOL.client &.post("/sorry/index", headers: headers, form: inputs)
+ headers = HTTP::Headers{
+ "Cookie" => URI.parse(response.headers["location"]).query_params["google_abuse"].split(";")[0],
+ }
+ cookies = HTTP::Cookies.from_headers(headers)
+
+ cookies.each { |cookie| config.cookies << cookie }
+
+ # Persist cookies between runs
+ File.write("config/config.yml", config.to_yaml)
+ end
+ end
+ rescue ex
+ logger.puts("Exception: #{ex.message}")
+ ensure
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+ end
+end
diff --git a/src/invidious/jobs/notification_job.cr b/src/invidious/jobs/notification_job.cr
new file mode 100644
index 00000000..2f525e08
--- /dev/null
+++ b/src/invidious/jobs/notification_job.cr
@@ -0,0 +1,24 @@
+class Invidious::Jobs::NotificationJob < Invidious::Jobs::BaseJob
+ private getter connection_channel : Channel({Bool, Channel(PQ::Notification)})
+ private getter pg_url : URI
+
+ def initialize(@connection_channel, @pg_url)
+ end
+
+ def begin
+ connections = [] of Channel(PQ::Notification)
+
+ PG.connect_listen(pg_url, "notifications") { |event| connections.each(&.send(event)) }
+
+ loop do
+ action, connection = connection_channel.receive
+
+ case action
+ when true
+ connections << connection
+ when false
+ connections.delete(connection)
+ end
+ end
+ end
+end
diff --git a/src/invidious/jobs/pull_popular_videos_job.cr b/src/invidious/jobs/pull_popular_videos_job.cr
new file mode 100644
index 00000000..7a8ab84e
--- /dev/null
+++ b/src/invidious/jobs/pull_popular_videos_job.cr
@@ -0,0 +1,27 @@
+class Invidious::Jobs::PullPopularVideosJob < Invidious::Jobs::BaseJob
+ QUERY = <<-SQL
+ SELECT DISTINCT ON (ucid) *
+ FROM channel_videos
+ WHERE ucid IN (SELECT channel FROM (SELECT UNNEST(subscriptions) AS channel FROM users) AS d
+ GROUP BY channel ORDER BY COUNT(channel) DESC LIMIT 40)
+ ORDER BY ucid, published DESC
+ SQL
+ POPULAR_VIDEOS = Atomic.new([] of ChannelVideo)
+ private getter db : DB::Database
+
+ def initialize(@db)
+ end
+
+ def begin
+ loop do
+ videos = db.query_all(QUERY, as: ChannelVideo)
+ .sort_by(&.published)
+ .reverse
+
+ POPULAR_VIDEOS.set(videos)
+
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+end
diff --git a/src/invidious/jobs/refresh_channels_job.cr b/src/invidious/jobs/refresh_channels_job.cr
new file mode 100644
index 00000000..75fc474d
--- /dev/null
+++ b/src/invidious/jobs/refresh_channels_job.cr
@@ -0,0 +1,59 @@
+class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob
+ private getter db : DB::Database
+ private getter logger : Invidious::LogHandler
+ private getter config : Config
+
+ def initialize(@db, @logger, @config)
+ end
+
+ def begin
+ max_threads = config.channel_threads
+ lim_threads = max_threads
+ active_threads = 0
+ active_channel = Channel(Bool).new
+ backoff = 1.seconds
+
+ loop do
+ db.query("SELECT id FROM channels ORDER BY updated") do |rs|
+ rs.each do
+ id = rs.read(String)
+
+ if active_threads >= lim_threads
+ if active_channel.receive
+ active_threads -= 1
+ end
+ end
+
+ active_threads += 1
+ spawn do
+ begin
+ channel = fetch_channel(id, db, config.full_refresh)
+
+ lim_threads = max_threads
+ db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id)
+ rescue ex
+ logger.puts("#{id} : #{ex.message}")
+ if ex.message == "Deleted or invalid channel"
+ db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id)
+ else
+ lim_threads = 1
+ logger.puts("#{id} : backing off for #{backoff}s")
+ sleep backoff
+ if backoff < 1.days
+ backoff += backoff
+ else
+ backoff = 1.days
+ end
+ end
+ end
+
+ active_channel.send(true)
+ end
+ end
+ end
+
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+end
diff --git a/src/invidious/jobs/refresh_feeds_job.cr b/src/invidious/jobs/refresh_feeds_job.cr
new file mode 100644
index 00000000..eebdf0f3
--- /dev/null
+++ b/src/invidious/jobs/refresh_feeds_job.cr
@@ -0,0 +1,77 @@
+class Invidious::Jobs::RefreshFeedsJob < Invidious::Jobs::BaseJob
+ private getter db : DB::Database
+ private getter logger : Invidious::LogHandler
+ private getter config : Config
+
+ def initialize(@db, @logger, @config)
+ end
+
+ def begin
+ max_threads = config.feed_threads
+ active_threads = 0
+ active_channel = Channel(Bool).new
+
+ loop do
+ db.query("SELECT email FROM users WHERE feed_needs_update = true OR feed_needs_update IS NULL") do |rs|
+ rs.each do
+ email = rs.read(String)
+ view_name = "subscriptions_#{sha256(email)}"
+
+ if active_threads >= max_threads
+ if active_channel.receive
+ active_threads -= 1
+ end
+ end
+
+ active_threads += 1
+ spawn do
+ begin
+ # Drop outdated views
+ column_array = get_column_array(db, view_name)
+ ChannelVideo.type_array.each_with_index do |name, i|
+ if name != column_array[i]?
+ logger.puts("DROP MATERIALIZED VIEW #{view_name}")
+ db.exec("DROP MATERIALIZED VIEW #{view_name}")
+ raise "view does not exist"
+ end
+ end
+
+ if !db.query_one("SELECT pg_get_viewdef('#{view_name}')", as: String).includes? "WHERE ((cv.ucid = ANY (u.subscriptions))"
+ logger.puts("Materialized view #{view_name} is out-of-date, recreating...")
+ db.exec("DROP MATERIALIZED VIEW #{view_name}")
+ end
+
+ db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
+ db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
+ rescue ex
+ # Rename old views
+ begin
+ legacy_view_name = "subscriptions_#{sha256(email)[0..7]}"
+
+ db.exec("SELECT * FROM #{legacy_view_name} LIMIT 0")
+ logger.puts("RENAME MATERIALIZED VIEW #{legacy_view_name}")
+ db.exec("ALTER MATERIALIZED VIEW #{legacy_view_name} RENAME TO #{view_name}")
+ rescue ex
+ begin
+ # While iterating through, we may have an email stored from a deleted account
+ if db.query_one?("SELECT true FROM users WHERE email = $1", email, as: Bool)
+ logger.puts("CREATE #{view_name}")
+ db.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(email)}")
+ db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
+ end
+ rescue ex
+ logger.puts("REFRESH #{email} : #{ex.message}")
+ end
+ end
+ end
+
+ active_channel.send(true)
+ end
+ end
+ end
+
+ sleep 5.seconds
+ Fiber.yield
+ end
+ end
+end
diff --git a/src/invidious/jobs/statistics_refresh_job.cr b/src/invidious/jobs/statistics_refresh_job.cr
new file mode 100644
index 00000000..021671be
--- /dev/null
+++ b/src/invidious/jobs/statistics_refresh_job.cr
@@ -0,0 +1,59 @@
+class Invidious::Jobs::StatisticsRefreshJob < Invidious::Jobs::BaseJob
+ STATISTICS = {
+ "version" => "2.0",
+ "software" => {
+ "name" => "invidious",
+ "version" => "",
+ "branch" => "",
+ },
+ "openRegistrations" => true,
+ "usage" => {
+ "users" => {
+ "total" => 0_i64,
+ "activeHalfyear" => 0_i64,
+ "activeMonth" => 0_i64,
+ },
+ },
+ "metadata" => {
+ "updatedAt" => Time.utc.to_unix,
+ "lastChannelRefreshedAt" => 0_i64,
+ },
+ }
+
+ private getter db : DB::Database
+ private getter config : Config
+
+ def initialize(@db, @config, @software_config : Hash(String, String))
+ end
+
+ def begin
+ load_initial_stats
+
+ loop do
+ refresh_stats
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+
+ # should only be called once at the very beginning
+ private def load_initial_stats
+ STATISTICS["software"] = {
+ "name" => @software_config["name"],
+ "version" => @software_config["version"],
+ "branch" => @software_config["branch"],
+ }
+ STATISTICS["openRegistration"] = config.registration_enabled
+ end
+
+ private def refresh_stats
+ users = STATISTICS.dig("usage", "users").as(Hash(String, Int64))
+ users["total"] = db.query_one("SELECT count(*) FROM users", as: Int64)
+ users["activeHalfyear"] = db.query_one("SELECT count(*) FROM users WHERE CURRENT_TIMESTAMP - updated < '6 months'", as: Int64)
+ users["activeMonth"] = db.query_one("SELECT count(*) FROM users WHERE CURRENT_TIMESTAMP - updated < '1 month'", as: Int64)
+ STATISTICS["metadata"] = {
+ "updatedAt" => Time.utc.to_unix,
+ "lastChannelRefreshedAt" => db.query_one?("SELECT updated FROM channels ORDER BY updated DESC LIMIT 1", as: Time).try &.to_unix || 0_i64,
+ }
+ end
+end
diff --git a/src/invidious/jobs/subscribe_to_feeds_job.cr b/src/invidious/jobs/subscribe_to_feeds_job.cr
new file mode 100644
index 00000000..3d3b2218
--- /dev/null
+++ b/src/invidious/jobs/subscribe_to_feeds_job.cr
@@ -0,0 +1,52 @@
+class Invidious::Jobs::SubscribeToFeedsJob < Invidious::Jobs::BaseJob
+ private getter db : DB::Database
+ private getter logger : Invidious::LogHandler
+ private getter hmac_key : String
+ private getter config : Config
+
+ def initialize(@db, @logger, @config, @hmac_key)
+ end
+
+ def begin
+ max_threads = 1
+ if config.use_pubsub_feeds.is_a?(Int32)
+ max_threads = config.use_pubsub_feeds.as(Int32)
+ end
+
+ active_threads = 0
+ active_channel = Channel(Bool).new
+
+ loop do
+ db.query_all("SELECT id FROM channels WHERE CURRENT_TIMESTAMP - subscribed > interval '4 days' OR subscribed IS NULL") do |rs|
+ rs.each do
+ ucid = rs.read(String)
+
+ if active_threads >= max_threads.as(Int32)
+ if active_channel.receive
+ active_threads -= 1
+ end
+ end
+
+ active_threads += 1
+
+ spawn do
+ begin
+ response = subscribe_pubsub(ucid, hmac_key, config)
+
+ if response.status_code >= 400
+ logger.puts("#{ucid} : #{response.body}")
+ end
+ rescue ex
+ logger.puts("#{ucid} : #{ex.message}")
+ end
+
+ active_channel.send(true)
+ end
+ end
+ end
+
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+end
diff --git a/src/invidious/jobs/update_decrypt_function_job.cr b/src/invidious/jobs/update_decrypt_function_job.cr
new file mode 100644
index 00000000..5332c672
--- /dev/null
+++ b/src/invidious/jobs/update_decrypt_function_job.cr
@@ -0,0 +1,19 @@
+class Invidious::Jobs::UpdateDecryptFunctionJob < Invidious::Jobs::BaseJob
+ DECRYPT_FUNCTION = [] of {SigProc, Int32}
+
+ def begin
+ loop do
+ begin
+ decrypt_function = fetch_decrypt_function
+ DECRYPT_FUNCTION.clear
+ decrypt_function.each { |df| DECRYPT_FUNCTION << df }
+ rescue ex
+ # TODO: Log error
+ next
+ ensure
+ sleep 1.minute
+ Fiber.yield
+ end
+ end
+ end
+end
diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr
index 04a37b87..c69eb0c4 100644
--- a/src/invidious/mixes.cr
+++ b/src/invidious/mixes.cr
@@ -1,32 +1,32 @@
struct MixVideo
- db_mapping({
- title: String,
- id: String,
- author: String,
- ucid: String,
- length_seconds: Int32,
- index: Int32,
- rdid: String,
- })
+ include DB::Serializable
+
+ property title : String
+ property id : String
+ property author : String
+ property ucid : String
+ property length_seconds : Int32
+ property index : Int32
+ property rdid : String
end
struct Mix
- db_mapping({
- title: String,
- id: String,
- videos: Array(MixVideo),
- })
+ include DB::Serializable
+
+ property title : String
+ property id : String
+ property videos : Array(MixVideo)
end
def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
headers = HTTP::Headers.new
- headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
if cookies
headers = cookies.add_request_headers(headers)
end
- response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en&has_verified=1&bpctr=9999999999", headers)
+ video_id = "CvFH_6DNRCY" if rdid.starts_with? "OLAK5uy_"
+ response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en", headers)
initial_data = extract_initial_data(response.body)
if !initial_data["contents"]["twoColumnWatchNextResults"]["playlist"]?
@@ -49,23 +49,22 @@ def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
id = item["videoId"].as_s
title = item["title"]?.try &.["simpleText"].as_s
- if !title
- next
- end
+ next if !title
+
author = item["longBylineText"]["runs"][0]["text"].as_s
ucid = item["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s
length_seconds = decode_length_seconds(item["lengthText"]["simpleText"].as_s)
index = item["navigationEndpoint"]["watchEndpoint"]["index"].as_i
- videos << MixVideo.new(
- title,
- id,
- author,
- ucid,
- length_seconds,
- index,
- rdid
- )
+ videos << MixVideo.new({
+ title: title,
+ id: id,
+ author: author,
+ ucid: ucid,
+ length_seconds: length_seconds,
+ index: index,
+ rdid: rdid,
+ })
end
if !cookies
@@ -75,7 +74,11 @@ def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
videos.uniq! { |video| video.id }
videos = videos.first(50)
- return Mix.new(mix_title, rdid, videos)
+ return Mix.new({
+ title: mix_title,
+ id: rdid,
+ videos: videos,
+ })
end
def template_mix(mix)
diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr
index 9c8afd3c..c984a12a 100644
--- a/src/invidious/playlists.cr
+++ b/src/invidious/playlists.cr
@@ -1,26 +1,38 @@
struct PlaylistVideo
- def to_xml(host_url, auto_generated, xml : XML::Builder)
+ include DB::Serializable
+
+ property title : String
+ property id : String
+ property author : String
+ property ucid : String
+ property length_seconds : Int32
+ property published : Time
+ property plid : String
+ property index : Int64
+ property live_now : Bool
+
+ def to_xml(auto_generated, xml : XML::Builder)
xml.element("entry") do
xml.element("id") { xml.text "yt:video:#{self.id}" }
xml.element("yt:videoId") { xml.text self.id }
xml.element("yt:channelId") { xml.text self.ucid }
xml.element("title") { xml.text self.title }
- xml.element("link", rel: "alternate", href: "#{host_url}/watch?v=#{self.id}")
+ xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?v=#{self.id}")
xml.element("author") do
if auto_generated
xml.element("name") { xml.text self.author }
- xml.element("uri") { xml.text "#{host_url}/channel/#{self.ucid}" }
+ xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
else
xml.element("name") { xml.text author }
- xml.element("uri") { xml.text "#{host_url}/channel/#{ucid}" }
+ xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
end
end
xml.element("content", type: "xhtml") do
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
- xml.element("a", href: "#{host_url}/watch?v=#{self.id}") do
- xml.element("img", src: "#{host_url}/vi/#{self.id}/mqdefault.jpg")
+ xml.element("a", href: "#{HOST_URL}/watch?v=#{self.id}") do
+ xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
end
end
end
@@ -29,23 +41,23 @@ struct PlaylistVideo
xml.element("media:group") do
xml.element("media:title") { xml.text self.title }
- xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
+ xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
end
end
end
- def to_xml(host_url, auto_generated, xml : XML::Builder? = nil)
+ def to_xml(auto_generated, xml : XML::Builder? = nil)
if xml
- to_xml(host_url, auto_generated, xml)
+ to_xml(auto_generated, xml)
else
XML.build do |json|
- to_xml(host_url, auto_generated, xml)
+ to_xml(auto_generated, xml)
end
end
end
- def to_json(locale, config, kemal_config, json : JSON::Builder, index : Int32?)
+ def to_json(locale, json : JSON::Builder, index : Int32?)
json.object do
json.field "title", self.title
json.field "videoId", self.id
@@ -55,7 +67,7 @@ struct PlaylistVideo
json.field "authorUrl", "/channel/#{self.ucid}"
json.field "videoThumbnails" do
- generate_thumbnails(json, self.id, config, kemal_config)
+ generate_thumbnails(json, self.id)
end
if index
@@ -69,31 +81,32 @@ struct PlaylistVideo
end
end
- def to_json(locale, config, kemal_config, json : JSON::Builder? = nil, index : Int32? = nil)
+ def to_json(locale, json : JSON::Builder? = nil, index : Int32? = nil)
if json
- to_json(locale, config, kemal_config, json, index: index)
+ to_json(locale, json, index: index)
else
JSON.build do |json|
- to_json(locale, config, kemal_config, json, index: index)
+ to_json(locale, json, index: index)
end
end
end
-
- db_mapping({
- title: String,
- id: String,
- author: String,
- ucid: String,
- length_seconds: Int32,
- published: Time,
- plid: String,
- index: Int64,
- live_now: Bool,
- })
end
struct Playlist
- def to_json(offset, locale, config, kemal_config, json : JSON::Builder, continuation : String? = nil)
+ include DB::Serializable
+
+ property title : String
+ property id : String
+ property author : String
+ property author_thumbnail : String
+ property ucid : String
+ property description : String
+ property video_count : Int32
+ property views : Int64
+ property updated : Time
+ property thumbnail : String?
+
+ def to_json(offset, locale, json : JSON::Builder, continuation : String? = nil)
json.object do
json.field "type", "playlist"
json.field "title", self.title
@@ -118,7 +131,7 @@ struct Playlist
end
end
- json.field "description", html_to_content(self.description_html)
+ json.field "description", self.description
json.field "descriptionHtml", self.description_html
json.field "videoCount", self.video_count
@@ -130,39 +143,30 @@ struct Playlist
json.array do
videos = get_playlist_videos(PG_DB, self, offset: offset, locale: locale, continuation: continuation)
videos.each_with_index do |video, index|
- video.to_json(locale, config, Kemal.config, json)
+ video.to_json(locale, json)
end
end
end
end
end
- def to_json(offset, locale, config, kemal_config, json : JSON::Builder? = nil, continuation : String? = nil)
+ def to_json(offset, locale, json : JSON::Builder? = nil, continuation : String? = nil)
if json
- to_json(offset, locale, config, kemal_config, json, continuation: continuation)
+ to_json(offset, locale, json, continuation: continuation)
else
JSON.build do |json|
- to_json(offset, locale, config, kemal_config, json, continuation: continuation)
+ to_json(offset, locale, json, continuation: continuation)
end
end
end
- db_mapping({
- title: String,
- id: String,
- author: String,
- author_thumbnail: String,
- ucid: String,
- description_html: String,
- video_count: Int32,
- views: Int64,
- updated: Time,
- thumbnail: String?,
- })
-
def privacy
PlaylistPrivacy::Public
end
+
+ def description_html
+ HTML.escape(self.description).gsub("\n", "
")
+ end
end
enum PlaylistPrivacy
@@ -172,7 +176,30 @@ enum PlaylistPrivacy
end
struct InvidiousPlaylist
- def to_json(offset, locale, config, kemal_config, json : JSON::Builder, continuation : String? = nil)
+ include DB::Serializable
+
+ property title : String
+ property id : String
+ property author : String
+ property description : String = ""
+ property video_count : Int32
+ property created : Time
+ property updated : Time
+
+ @[DB::Field(converter: InvidiousPlaylist::PlaylistPrivacyConverter)]
+ property privacy : PlaylistPrivacy = PlaylistPrivacy::Private
+ property index : Array(Int64)
+
+ @[DB::Field(ignore: true)]
+ property thumbnail_id : String?
+
+ module PlaylistPrivacyConverter
+ def self.from_rs(rs)
+ return PlaylistPrivacy.parse(String.new(rs.read(Slice(UInt8))))
+ end
+ end
+
+ def to_json(offset, locale, json : JSON::Builder, continuation : String? = nil)
json.object do
json.field "type", "invidiousPlaylist"
json.field "title", self.title
@@ -195,43 +222,23 @@ struct InvidiousPlaylist
json.array do
videos = get_playlist_videos(PG_DB, self, offset: offset, locale: locale, continuation: continuation)
videos.each_with_index do |video, index|
- video.to_json(locale, config, Kemal.config, json, offset + index)
+ video.to_json(locale, json, offset + index)
end
end
end
end
end
- def to_json(offset, locale, config, kemal_config, json : JSON::Builder? = nil, continuation : String? = nil)
+ def to_json(offset, locale, json : JSON::Builder? = nil, continuation : String? = nil)
if json
- to_json(offset, locale, config, kemal_config, json, continuation: continuation)
+ to_json(offset, locale, json, continuation: continuation)
else
JSON.build do |json|
- to_json(offset, locale, config, kemal_config, json, continuation: continuation)
+ to_json(offset, locale, json, continuation: continuation)
end
end
end
- property thumbnail_id
-
- module PlaylistPrivacyConverter
- def self.from_rs(rs)
- return PlaylistPrivacy.parse(String.new(rs.read(Slice(UInt8))))
- end
- end
-
- db_mapping({
- title: String,
- id: String,
- author: String,
- description: {type: String, default: ""},
- video_count: Int32,
- created: Time,
- updated: Time,
- privacy: {type: PlaylistPrivacy, default: PlaylistPrivacy::Private, converter: PlaylistPrivacyConverter},
- index: Array(Int64),
- })
-
def thumbnail
@thumbnail_id ||= PG_DB.query_one?("SELECT id FROM playlist_videos WHERE plid = $1 ORDER BY array_position($2, index) LIMIT 1", self.id, self.index, as: String) || "-----------"
"/vi/#{@thumbnail_id}/mqdefault.jpg"
@@ -257,17 +264,17 @@ end
def create_playlist(db, title, privacy, user)
plid = "IVPL#{Random::Secure.urlsafe_base64(24)[0, 31]}"
- playlist = InvidiousPlaylist.new(
- title: title.byte_slice(0, 150),
- id: plid,
- author: user.email,
+ playlist = InvidiousPlaylist.new({
+ title: title.byte_slice(0, 150),
+ id: plid,
+ author: user.email,
description: "", # Max 5000 characters
video_count: 0,
- created: Time.utc,
- updated: Time.utc,
- privacy: privacy,
- index: [] of Int64,
- )
+ created: Time.utc,
+ updated: Time.utc,
+ privacy: privacy,
+ index: [] of Int64,
+ })
playlist_array = playlist.to_a
args = arg_array(playlist_array)
@@ -277,50 +284,25 @@ def create_playlist(db, title, privacy, user)
return playlist
end
-def extract_playlist(plid, nodeset, index)
- videos = [] of PlaylistVideo
+def subscribe_playlist(db, user, playlist)
+ playlist = InvidiousPlaylist.new({
+ title: playlist.title.byte_slice(0, 150),
+ id: playlist.id,
+ author: user.email,
+ description: "", # Max 5000 characters
+ video_count: playlist.video_count,
+ created: Time.utc,
+ updated: playlist.updated,
+ privacy: PlaylistPrivacy::Private,
+ index: [] of Int64,
+ })
- nodeset.each_with_index do |video, offset|
- anchor = video.xpath_node(%q(.//td[@class="pl-video-title"]))
- if !anchor
- next
- end
+ playlist_array = playlist.to_a
+ args = arg_array(playlist_array)
- title = anchor.xpath_node(%q(.//a)).not_nil!.content.strip(" \n")
- id = anchor.xpath_node(%q(.//a)).not_nil!["href"].lchop("/watch?v=")[0, 11]
+ db.exec("INSERT INTO playlists VALUES (#{args})", args: playlist_array)
- anchor = anchor.xpath_node(%q(.//div[@class="pl-video-owner"]/a))
- if anchor
- author = anchor.content
- ucid = anchor["href"].split("/")[2]
- else
- author = ""
- ucid = ""
- end
-
- anchor = video.xpath_node(%q(.//td[@class="pl-video-time"]/div/div[1]))
- if anchor && !anchor.content.empty?
- length_seconds = decode_length_seconds(anchor.content)
- live_now = false
- else
- length_seconds = 0
- live_now = true
- end
-
- videos << PlaylistVideo.new(
- title: title,
- id: id,
- author: author,
- ucid: ucid,
- length_seconds: length_seconds,
- published: Time.utc,
- plid: plid,
- index: (index + offset).to_i64,
- live_now: live_now
- )
- end
-
- return videos
+ return playlist
end
def produce_playlist_url(id, index)
@@ -368,58 +350,64 @@ def fetch_playlist(plid, locale)
plid = "UU#{plid.lchop("UC")}"
end
- response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
+ response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en")
if response.status_code != 200
- raise translate(locale, "Not a playlist.")
+ if response.headers["location"]?.try &.includes? "/sorry/index"
+ raise "Could not extract playlist info. Instance is likely blocked."
+ else
+ raise translate(locale, "Not a playlist.")
+ end
end
- body = response.body.gsub(/