From f8f998cc35c0385159189441980acb5eff26526f Mon Sep 17 00:00:00 2001 From: Dave Jansen Date: Mon, 14 Aug 2023 01:31:07 +0000 Subject: [PATCH] Add basic mimetype check and allow files <8mb to be proxied through. --- read.php | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/read.php b/read.php index e2634bf..3bb19e9 100644 --- a/read.php +++ b/read.php @@ -6,6 +6,18 @@ $article_html = ""; $error_text = ""; $loc = "US"; +// List of Content-Types that we know we can (try to) parse. +// Anything else will get piped through directly, if possible. +$compatible_content_types = [ + "text/html", + "text/plain" +]; + +// The maximum allowed filesize for proxy download passthroughs. +// Any file larger than this will instead show an error message, with +// a direct link to the file. +$proxy_download_max_filesize = 8000000; // ~ 8Mb + if( isset( $_GET['loc'] ) ) { $loc = strtoupper($_GET["loc"]); } @@ -22,7 +34,53 @@ if (substr( $article_url, 0, 4 ) != "http") { die(); } -$host = parse_url($article_url, PHP_URL_HOST); +$url = parse_url($article_url); +$host = $url['host']; + +// Attempt to figure out what the requested URL content-type may be +$context = stream_context_create(['http' => array('method' => 'HEAD')]); +$headers = get_headers($article_url, true, $context); + +if (!array_key_exists('Content-Type', $headers) || !array_key_exists('Content-Length', $headers)) { + $error_text .= "Failed to get the article, its server did not return expected details :(
"; +} +else { + // Attempt to handle downloads or other mime-types by passing proxying them through. + if (!in_array($headers['Content-Type'], $compatible_content_types)) { + $filesize = $headers['Content-Length']; + + // Check if the linked file isn't too large for us to proxy. + if ($filesize > $proxy_download_max_filesize) { + echo 'Failed to proxy file download, it\'s too large. :(
'; + echo 'You can try downloading the file directly: ' . $article_url; + die(); + } + else { + $contentType = $headers['Content-Type']; + // Only use the last-provided content type if an array was returned (ie. when there were redirects involved) + if (is_array($contentType)) { + $contentType = $contentType[count($contentType)-1]; + } + + $filename = basename($url['path']); + + // If no filename can be deduced from the URL, set a placeholder filename + if (!$filename) { + $filename = "download"; + } + + // Set the content headers based on the file we're proxying through. + header('Content-Type: ' . $contentType); + header('Content-Length: ' . $filesize); + // Set the content-disposition to encourage the browser to download the file. + header('Content-Disposition: attachment; filename="'. $filename . '"'); + + // Use readfile + readfile($article_url); + die(); + } + } +} use fivefilters\Readability\Readability; use fivefilters\Readability\Configuration;