renamed services into crawer

This commit is contained in:
Christian Schabesberger 2016-01-28 21:34:35 +01:00
parent 54d318bf04
commit f8ed96bb25
21 changed files with 50 additions and 48 deletions

View file

@ -0,0 +1,16 @@
package org.schabi.newpipe.crawler;
import android.graphics.Bitmap;
/**Common properties between VideoInfo and VideoPreviewInfo.*/
public abstract class AbstractVideoInfo {
public String id = "";
public String title = "";
public String uploader = "";
//public int duration = -1;
public String thumbnail_url = "";
public Bitmap thumbnail = null;
public String webpage_url = "";
public String upload_date = "";
public long view_count = -1;
}

View file

@ -0,0 +1,37 @@
package org.schabi.newpipe.crawler;
/**
* Created by Christian Schabesberger on 28.01.16.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* Downloader.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public interface Downloader {
/**Download the text file at the supplied URL as in download(String),
* but set the HTTP header field "Accept-Language" to the supplied string.
* @param siteUrl the URL of the text file to return the contents of
* @param language the language (usually a 2-character code) to set as the preferred language
* @return the contents of the specified text file*/
String download(String siteUrl, String language);
/**Download (via HTTP) the text file located at the supplied URL, and return its contents.
* Primarily intended for downloading web pages.
* @param siteUrl the URL of the text file to download
* @return the contents of the specified text file*/
String download(String siteUrl);
}

View file

@ -0,0 +1,81 @@
package org.schabi.newpipe.crawler;
/**
* Created by Adam Howard on 08/11/15.
*
* Copyright (c) Christian Schabesberger <chris.schabesberger@mailbox.org>
* and Adam Howard <achdisposable1@gmail.com> 2015
*
* VideoListAdapter.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Static data about various media formats support by Newpipe, eg mime type, extension*/
public enum MediaFormat {
// id name suffix mime type
MPEG_4 (0x0, "MPEG-4", "mp4", "video/mp4"),
v3GPP (0x1, "3GPP", "3gp", "video/3gpp"),
WEBM (0x2, "WebM", "webm", "video/webm"),
M4A (0x3, "m4a", "m4a", "audio/mp4"),
WEBMA (0x4, "WebM", "webm", "audio/webm");
public final int id;
@SuppressWarnings("WeakerAccess")
public final String name;
@SuppressWarnings("WeakerAccess")
public final String suffix;
public final String mimeType;
MediaFormat(int id, String name, String suffix, String mimeType) {
this.id = id;
this.name = name;
this.suffix = suffix;
this.mimeType = mimeType;
}
/**Return the friendly name of the media format with the supplied id
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the friendly name of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
public static String getNameById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if(vf.id == ident) return vf.name;
}
return "";
}
/**Return the file extension of the media format with the supplied id
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the file extension of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
public static String getSuffixById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if(vf.id == ident) return vf.suffix;
}
return "";
}
/**Return the MIME type of the media format with the supplied id
* @param ident the id of the media format. Currently an arbitrary, NewPipe-specific number.
* @return the MIME type of the MediaFormat associated with this ids,
* or an empty String if none match it.*/
public static String getMimeById(int ident) {
for (MediaFormat vf : MediaFormat.values()) {
if(vf.id == ident) return vf.mimeType;
}
return "";
}
}

View file

@ -0,0 +1,38 @@
package org.schabi.newpipe.crawler;
import java.util.ArrayList;
import java.util.Vector;
/**
* Created by Christian Schabesberger on 10.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* SearchEngine.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
@SuppressWarnings("ALL")
public interface SearchEngine {
class Result {
public String errorMessage = "";
public String suggestion = "";
public final Vector<VideoPreviewInfo> resultList = new Vector<>();
}
ArrayList<String> suggestionList(String query, Downloader dl);
//Result search(String query, int page);
Result search(String query, int page, String contentCountry, Downloader dl);
}

View file

@ -0,0 +1,54 @@
package org.schabi.newpipe.crawler;
import android.util.Log;
import org.schabi.newpipe.crawler.services.youtube.YoutubeService;
/**
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* ServiceList.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Provides access to the video streaming services supported by NewPipe.
* Currently only Youtube until the API becomes more stable.*/
@SuppressWarnings("ALL")
public class ServiceList {
private static final String TAG = ServiceList.class.toString();
private static final StreamingService[] services = {
new YoutubeService()
};
public static StreamingService[] getServices() {
return services;
}
public static StreamingService getService(int serviceId) {
return services[serviceId];
}
public static StreamingService getService(String serviceName) {
return services[getIdOfService(serviceName)];
}
public static int getIdOfService(String serviceName) {
for(int i = 0; i < services.length; i++) {
if(services[i].getServiceInfo().name.equals(serviceName)) {
return i;
}
}
Log.e(TAG, "Error: Service " + serviceName + " not known.");
return -1;
}
}

View file

@ -0,0 +1,35 @@
package org.schabi.newpipe.crawler;
/**
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* StreamingService.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public interface StreamingService {
class ServiceInfo {
public String name = "";
}
ServiceInfo getServiceInfo();
VideoExtractor getExtractorInstance(String url, Downloader downloader);
SearchEngine getSearchEngineInstance();
/**When a VIEW_ACTION is caught this function will test if the url delivered within the calling
Intent was meant to be watched with this Service.
Return false if this service shall not allow to be called through ACTIONs.*/
boolean acceptUrl(String videoUrl);
}

View file

@ -0,0 +1,128 @@
package org.schabi.newpipe.crawler;
/**
* Created by Christian Schabesberger on 10.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* VideoExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Scrapes information from a video streaming service (eg, YouTube).*/
@SuppressWarnings("ALL")
public abstract class VideoExtractor {
protected final String pageUrl;
protected VideoInfo videoInfo;
@SuppressWarnings("WeakerAccess")
public VideoExtractor(String url, Downloader dl) {
this.pageUrl = url;
}
/**Fills out the video info fields which are common to all services.
* Probably needs to be overridden by subclasses*/
public VideoInfo getVideoInfo()
{
if(videoInfo == null) {
videoInfo = new VideoInfo();
}
if(videoInfo.webpage_url.isEmpty()) {
videoInfo.webpage_url = pageUrl;
}
if(getErrorCode() == VideoInfo.NO_ERROR) {
if (videoInfo.title.isEmpty()) {
videoInfo.title = getTitle();
}
if (videoInfo.duration < 1) {
videoInfo.duration = getLength();
}
if (videoInfo.uploader.isEmpty()) {
videoInfo.uploader = getUploader();
}
if (videoInfo.description.isEmpty()) {
videoInfo.description = getDescription();
}
if (videoInfo.view_count == -1) {
videoInfo.view_count = getViews();
}
if (videoInfo.upload_date.isEmpty()) {
videoInfo.upload_date = getUploadDate();
}
if (videoInfo.thumbnail_url.isEmpty()) {
videoInfo.thumbnail_url = getThumbnailUrl();
}
if (videoInfo.id.isEmpty()) {
videoInfo.id = getVideoId(pageUrl);
}
/** Load and extract audio*/
if (videoInfo.audioStreams == null) {
videoInfo.audioStreams = getAudioStreams();
}
/** Extract video stream url*/
if (videoInfo.videoStreams == null) {
videoInfo.videoStreams = getVideoStreams();
}
if (videoInfo.uploader_thumbnail_url.isEmpty()) {
videoInfo.uploader_thumbnail_url = getUploaderThumbnailUrl();
}
if (videoInfo.startPosition < 0) {
videoInfo.startPosition = getTimeStamp();
}
} else {
videoInfo.errorCode = getErrorCode();
videoInfo.errorMessage = getErrorMessage();
}
//Bitmap thumbnail = null;
//Bitmap uploader_thumbnail = null;
//int videoAvailableStatus = VIDEO_AVAILABLE;
return videoInfo;
}
//todo: add licence field
public abstract int getErrorCode();
public abstract String getErrorMessage();
//todo: remove these functions, or make them static, otherwise its useles, to have them here
public abstract String getVideoUrl(String videoId);
public abstract String getVideoId(String siteUrl);
///////////////////////////////////////////////////////////////////////////////////////////
public abstract int getTimeStamp();
public abstract String getTitle();
public abstract String getDescription();
public abstract String getUploader();
public abstract int getLength();
public abstract long getViews();
public abstract String getUploadDate();
public abstract String getThumbnailUrl();
public abstract String getUploaderThumbnailUrl();
public abstract VideoInfo.AudioStream[] getAudioStreams();
public abstract VideoInfo.VideoStream[] getVideoStreams();
}

View file

@ -0,0 +1,105 @@
package org.schabi.newpipe.crawler;
import java.util.List;
/**
* Created by Christian Schabesberger on 26.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* VideoInfo.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Info object for opened videos, ie the video ready to play.*/
@SuppressWarnings("ALL")
public class VideoInfo extends AbstractVideoInfo {
// If a video could not be parsed, this predefined error codes
// will be returned AND can be parsed by the frontend of the app.
// Error codes:
public final static int NO_ERROR = 0x0;
public final static int ERROR_NO_SPECIFIED_ERROR = 0x1;
// GEMA a german music colecting society.
public final static int ERROR_BLOCKED_BY_GEMA = 0x2;
public String uploader_thumbnail_url = "";
public String description = "";
public VideoStream[] videoStreams = null;
public AudioStream[] audioStreams = null;
public int errorCode = NO_ERROR;
public String errorMessage = "";
public int duration = -1;
/*YouTube-specific fields
todo: move these to a subclass*/
public int age_limit = 0;
public int like_count = -1;
public int dislike_count = -1;
public String average_rating = "";
public VideoPreviewInfo nextVideo = null;
public List<VideoPreviewInfo> relatedVideos = null;
public int startPosition = -1;//in seconds. some metadata is not passed using a VideoInfo object!
public VideoInfo() {}
/**Creates a new VideoInfo object from an existing AbstractVideoInfo.
* All the shared properties are copied to the new VideoInfo.*/
@SuppressWarnings("WeakerAccess")
public VideoInfo(AbstractVideoInfo avi) {
this.id = avi.id;
this.title = avi.title;
this.uploader = avi.uploader;
this.thumbnail_url = avi.thumbnail_url;
this.thumbnail = avi.thumbnail;
this.webpage_url = avi.webpage_url;
this.upload_date = avi.upload_date;
this.upload_date = avi.upload_date;
this.view_count = avi.view_count;
//todo: better than this
if(avi instanceof VideoPreviewInfo) {
//shitty String to convert code
String dur = ((VideoPreviewInfo)avi).duration;
int minutes = Integer.parseInt(dur.substring(0, dur.indexOf(":")));
int seconds = Integer.parseInt(dur.substring(dur.indexOf(":")+1, dur.length()));
this.duration = (minutes*60)+seconds;
}
}
public static class VideoStream {
//url of the stream
public String url = "";
public int format = -1;
public String resolution = "";
public VideoStream(String url, int format, String res) {
this.url = url; this.format = format; resolution = res;
}
}
@SuppressWarnings("unused")
public static class AudioStream {
public String url = "";
public int format = -1;
public int bandwidth = -1;
public int samplingRate = -1;
public AudioStream(String url, int format, int bandwidth, int samplingRate) {
this.url = url; this.format = format;
this.bandwidth = bandwidth; this.samplingRate = samplingRate;
}
}
}

View file

@ -0,0 +1,79 @@
package org.schabi.newpipe.crawler;
import android.graphics.Bitmap;
import android.os.Parcel;
import android.os.Parcelable;
import org.schabi.newpipe.crawler.AbstractVideoInfo;
/**
* Created by Christian Schabesberger on 26.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* VideoPreviewInfo.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**Info object for previews of unopened videos, eg search results, related videos*/
public class VideoPreviewInfo extends AbstractVideoInfo implements Parcelable {
public String duration = "";
@SuppressWarnings("WeakerAccess")
protected VideoPreviewInfo(Parcel in) {
id = in.readString();
title = in.readString();
uploader = in.readString();
duration = in.readString();
thumbnail_url = in.readString();
thumbnail = (Bitmap) in.readValue(Bitmap.class.getClassLoader());
webpage_url = in.readString();
upload_date = in.readString();
view_count = in.readLong();
}
public VideoPreviewInfo() {
}
@Override
public int describeContents() {
return 0;
}
@Override
public void writeToParcel(Parcel dest, int flags) {
dest.writeString(id);
dest.writeString(title);
dest.writeString(uploader);
dest.writeString(duration);
dest.writeString(thumbnail_url);
dest.writeValue(thumbnail);
dest.writeString(webpage_url);
dest.writeString(upload_date);
dest.writeLong(view_count);
}
@SuppressWarnings("unused")
public static final Parcelable.Creator<VideoPreviewInfo> CREATOR = new Parcelable.Creator<VideoPreviewInfo>() {
@Override
public VideoPreviewInfo createFromParcel(Parcel in) {
return new VideoPreviewInfo(in);
}
@Override
public VideoPreviewInfo[] newArray(int size) {
return new VideoPreviewInfo[size];
}
};
}

View file

@ -0,0 +1,190 @@
package org.schabi.newpipe.crawler.services.youtube;
import android.net.Uri;
import android.util.Log;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.SearchEngine;
import org.schabi.newpipe.crawler.VideoPreviewInfo;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
/**
* Created by Christian Schabesberger on 09.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* YoutubeSearchEngine.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeSearchEngine implements SearchEngine {
private static final String TAG = YoutubeSearchEngine.class.toString();
@Override
public Result search(String query, int page, String languageCode, Downloader downloader) {
//String contentCountry = PreferenceManager.getDefaultSharedPreferences(this).getString(getString(R.string., "");
Uri.Builder builder = new Uri.Builder();
builder.scheme("https")
.authority("www.youtube.com")
.appendPath("results")
.appendQueryParameter("search_query", query)
.appendQueryParameter("page", Integer.toString(page))
.appendQueryParameter("filters", "video");
String site;
String url = builder.build().toString();
//if we've been passed a valid language code, append it to the URL
if(!languageCode.isEmpty()) {
//assert Pattern.matches("[a-z]{2}(-([A-Z]{2}|[0-9]{1,3}))?", languageCode);
site = downloader.download(url, languageCode);
}
else {
site = downloader.download(url);
}
Document doc = Jsoup.parse(site, url);
Result result = new Result();
Element list = doc.select("ol[class=\"item-section\"]").first();
int i = 0;
for(Element item : list.children()) {
i++;
/* First we need to determine which kind of item we are working with.
Youtube depicts five different kinds of items on its search result page. These are
regular videos, playlists, channels, two types of video suggestions, and a "no video
found" item. Since we only want videos, we need to filter out all the others.
An example for this can be seen here:
https://www.youtube.com/results?search_query=asdf&page=1
We already applied a filter to the url, so we don't need to care about channels and
playlists now.
*/
Element el;
// both types of spell correction item
if(!((el = item.select("div[class*=\"spell-correction\"]").first()) == null)) {
result.suggestion = el.select("a").first().text();
// search message item
} else if(!((el = item.select("div[class*=\"search-message\"]").first()) == null)) {
result.errorMessage = el.text();
// video item type
} else if(!((el = item.select("div[class*=\"yt-lockup-video\"").first()) == null)) {
VideoPreviewInfo resultItem = new VideoPreviewInfo();
Element dl = el.select("h3").first().select("a").first();
resultItem.webpage_url = dl.attr("abs:href");
try {
Pattern p = Pattern.compile("v=([0-9a-zA-Z-]*)");
Matcher m = p.matcher(resultItem.webpage_url);
resultItem.id=m.group(1);
} catch (Exception e) {
//e.printStackTrace();
}
resultItem.title = dl.text();
resultItem.duration = item.select("span[class=\"video-time\"]").first().text();
resultItem.uploader = item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.text();
resultItem.upload_date = item.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").first()
.text();
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
.select("img").first();
resultItem.thumbnail_url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if(resultItem.thumbnail_url.contains(".gif")) {
resultItem.thumbnail_url = te.attr("abs:data-thumb");
}
result.resultList.add(resultItem);
} else {
//noinspection ConstantConditions
Log.e(TAG, "unexpected element found:\""+el+"\"");
}
}
return result;
}
@Override
public ArrayList<String> suggestionList(String query, Downloader dl) {
ArrayList<String> suggestions = new ArrayList<>();
Uri.Builder builder = new Uri.Builder();
builder.scheme("https")
.authority("suggestqueries.google.com")
.appendPath("complete")
.appendPath("search")
.appendQueryParameter("client", "")
.appendQueryParameter("output", "toolbar")
.appendQueryParameter("ds", "yt")
.appendQueryParameter("q", query);
String url = builder.build().toString();
String response = dl.download(url);
//TODO: Parse xml data using Jsoup not done
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
org.w3c.dom.Document doc = null;
try {
dBuilder = dbFactory.newDocumentBuilder();
doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("utf-8"))));
doc.getDocumentElement().normalize();
}catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}
if(doc!=null){
NodeList nList = doc.getElementsByTagName("CompleteSuggestion");
for (int temp = 0; temp < nList.getLength(); temp++) {
NodeList nList1 = doc.getElementsByTagName("suggestion");
Node nNode1 = nList1.item(temp);
if (nNode1.getNodeType() == Node.ELEMENT_NODE) {
org.w3c.dom.Element eElement = (org.w3c.dom.Element) nNode1;
suggestions.add(eElement.getAttribute("data"));
}
}
}else {
Log.e(TAG, "GREAT FUCKING ERROR");
}
return suggestions;
}
}

View file

@ -0,0 +1,54 @@
package org.schabi.newpipe.crawler.services.youtube;
import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.StreamingService;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.SearchEngine;
/**
* Created by Christian Schabesberger on 23.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* YoutubeService.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeService implements StreamingService {
@Override
public ServiceInfo getServiceInfo() {
ServiceInfo serviceInfo = new ServiceInfo();
serviceInfo.name = "Youtube";
return serviceInfo;
}
@Override
public VideoExtractor getExtractorInstance(String url, Downloader downloader) {
if(acceptUrl(url)) {
return new YoutubeVideoExtractor(url, downloader);
}
else {
throw new IllegalArgumentException("supplied String is not a valid Youtube URL");
}
}
@Override
public SearchEngine getSearchEngineInstance() {
return new YoutubeSearchEngine();
}
@Override
public boolean acceptUrl(String videoUrl) {
return videoUrl.contains("youtube") ||
videoUrl.contains("youtu.be");
}
}

View file

@ -0,0 +1,651 @@
package org.schabi.newpipe.crawler.services.youtube;
import android.util.Log;
import android.util.Xml;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.crawler.Downloader;
import org.schabi.newpipe.crawler.VideoExtractor;
import org.schabi.newpipe.crawler.MediaFormat;
import org.schabi.newpipe.crawler.VideoInfo;
import org.schabi.newpipe.crawler.VideoPreviewInfo;
import org.xmlpull.v1.XmlPullParser;
import java.io.StringReader;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by Christian Schabesberger on 06.08.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* YoutubeVideoExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeVideoExtractor extends VideoExtractor {
private static final String TAG = YoutubeVideoExtractor.class.toString();
private final Document doc;
private JSONObject jsonObj;
private JSONObject playerArgs;
private int errorCode = VideoInfo.NO_ERROR;
private String errorMessage = "";
// static values
private static final String DECRYPTION_FUNC_NAME="decrypt";
// cached values
private static volatile String decryptionCode = "";
private Downloader downloader;
public YoutubeVideoExtractor(String pageUrl, Downloader dl) {
//most common videoInfo fields are now set in our superclass, for all services
super(pageUrl, dl);
downloader = dl;
String pageContent = downloader.download(cleanUrl(pageUrl));
doc = Jsoup.parse(pageContent, pageUrl);
//attempt to load the youtube js player JSON arguments
try {
String jsonString = matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
//todo: implement this by try and catch. TESTING THE STRING AGAINST EMPTY IS CONSIDERED POOR STYLE !!!
if(jsonString.isEmpty()) {
errorCode = findErrorReason(doc);
return;
}
jsonObj = new JSONObject(jsonString);
playerArgs = jsonObj.getJSONObject("args");
} catch (Exception e) {//if this fails, the video is most likely not available.
// Determining why is done later.
videoInfo.errorCode = VideoInfo.ERROR_NO_SPECIFIED_ERROR;
Log.e(TAG, "Could not load JSON data for Youtube video \""+pageUrl+"\". This most likely means the video is unavailable");
}
//----------------------------------
// load and parse description code, if it isn't already initialised
//----------------------------------
if (decryptionCode.isEmpty()) {
try {
// The Youtube service needs to be initialized by downloading the
// js-Youtube-player. This is done in order to get the algorithm
// for decrypting cryptic signatures inside certain stream urls.
JSONObject ytAssets = jsonObj.getJSONObject("assets");
String playerUrl = ytAssets.getString("js");
if (playerUrl.startsWith("//")) {
playerUrl = "https:" + playerUrl;
}
decryptionCode = loadDecryptionCode(playerUrl);
} catch (Exception e){
Log.e(TAG, "Could not load decryption code for the Youtube service.");
e.printStackTrace();
}
}
}
@Override
public String getTitle() {
try {//json player args method
return playerArgs.getString("title");
} catch(JSONException je) {//html <meta> method
je.printStackTrace();
Log.w(TAG, "failed to load title from JSON args; trying to extract it from HTML");
} try { // fall through to fall-back
return doc.select("meta[name=title]").attr("content");
} catch (Exception e) {
Log.e(TAG, "failed permanently to load title.");
e.printStackTrace();
return "";
}
}
@Override
public String getDescription() {
try {
return doc.select("p[id=\"eow-description\"]").first().html();
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to load description.");
e.printStackTrace();
return "";
}
}
@Override
public String getUploader() {
try {//json player args method
return playerArgs.getString("author");
} catch(JSONException je) {
je.printStackTrace();
Log.w(TAG, "failed to load uploader name from JSON args; trying to extract it from HTML");
} try {//fall through to fallback HTML method
return doc.select("div.yt-user-info").first().text();
} catch (Exception e) {
e.printStackTrace();
Log.e(TAG, "failed permanently to load uploader name.");
return "";
}
}
@Override
public int getLength() {
try {
return playerArgs.getInt("length_seconds");
} catch (JSONException je) {//todo: find fallback method
Log.e(TAG, "failed to load video duration from JSON args");
je.printStackTrace();
return -1;
}
}
@Override
public long getViews() {
try {
String viewCountString = doc.select("meta[itemprop=interactionCount]").attr("content");
return Long.parseLong(viewCountString);
} catch (Exception e) {//todo: find fallback method
Log.e(TAG, "failed to number of views");
e.printStackTrace();
return -1;
}
}
@Override
public String getUploadDate() {
try {
return doc.select("meta[itemprop=datePublished]").attr("content");
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to get upload date.");
e.printStackTrace();
return "";
}
}
@Override
public String getThumbnailUrl() {
//first attempt getting a small image version
//in the html extracting part we try to get a thumbnail with a higher resolution
// Try to get high resolution thumbnail if it fails use low res from the player instead
try {
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch(Exception e) {
Log.w(TAG, "Could not find high res Thumbnail. Using low res instead");
//fall through to fallback
} try {
return playerArgs.getString("thumbnail_url");
} catch (JSONException je) {
je.printStackTrace();
Log.w(TAG, "failed to extract thumbnail URL from JSON args; trying to extract it from HTML");
return "";
}
}
@Override
public String getUploaderThumbnailUrl() {
try {
return doc.select("a[class*=\"yt-user-photo\"]").first()
.select("img").first()
.attr("abs:data-thumb");
} catch (Exception e) {//todo: add fallback method
Log.e(TAG, "failed to get uploader thumbnail URL.");
e.printStackTrace();
return "";
}
}
@Override
public VideoInfo.AudioStream[] getAudioStreams() {
try {
String dashManifest = playerArgs.getString("dashmpd");
return parseDashManifest(dashManifest, decryptionCode);
} catch (NullPointerException e) {
Log.e(TAG, "Could not find \"dashmpd\" upon the player args (maybe no dash manifest available).");
} catch (Exception e) {
e.printStackTrace();
}
return new VideoInfo.AudioStream[0];
}
@Override
public VideoInfo.VideoStream[] getVideoStreams() {
try{
//------------------------------------
// extract video stream url
//------------------------------------
String encoded_url_map = playerArgs.getString("url_encoded_fmt_stream_map");
Vector<VideoInfo.VideoStream> videoStreams = new Vector<>();
for(String url_data_str : encoded_url_map.split(",")) {
Map<String, String> tags = new HashMap<>();
for(String raw_tag : Parser.unescapeEntities(url_data_str, true).split("&")) {
String[] split_tag = raw_tag.split("=");
tags.put(split_tag[0], split_tag[1]);
}
int itag = Integer.parseInt(tags.get("itag"));
String streamUrl = URLDecoder.decode(tags.get("url"), "UTF-8");
// if video has a signature: decrypt it and add it to the url
if(tags.get("s") != null) {
streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
}
if(resolveFormat(itag) != -1) {
videoStreams.add(new VideoInfo.VideoStream(
streamUrl,
resolveFormat(itag),
resolveResolutionString(itag)));
}
}
return videoStreams.toArray(new VideoInfo.VideoStream[videoStreams.size()]);
} catch (Exception e) {
Log.e(TAG, "Failed to get video stream");
e.printStackTrace();
return new VideoInfo.VideoStream[0];
}
}
/**These lists only contain itag formats that are supported by the common Android Video player.
However if you are looking for a list showing all itag formats, look at
https://github.com/rg3/youtube-dl/issues/1687 */
@SuppressWarnings("WeakerAccess")
public static int resolveFormat(int itag) {
switch(itag) {
// video
case 17: return MediaFormat.v3GPP.id;
case 18: return MediaFormat.MPEG_4.id;
case 22: return MediaFormat.MPEG_4.id;
case 36: return MediaFormat.v3GPP.id;
case 37: return MediaFormat.MPEG_4.id;
case 38: return MediaFormat.MPEG_4.id;
case 43: return MediaFormat.WEBM.id;
case 44: return MediaFormat.WEBM.id;
case 45: return MediaFormat.WEBM.id;
case 46: return MediaFormat.WEBM.id;
default:
//Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported.");
return -1;
}
}
@SuppressWarnings("WeakerAccess")
public static String resolveResolutionString(int itag) {
switch(itag) {
case 17: return "144p";
case 18: return "360p";
case 22: return "720p";
case 36: return "240p";
case 37: return "1080p";
case 38: return "1080p";
case 43: return "360p";
case 44: return "480p";
case 45: return "720p";
case 46: return "1080p";
default:
//Log.i(TAG, "Itag " + Integer.toString(itag) + " not known or not supported.");
return null;
}
}
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoId(String url) {
String id;
String pat;
if(url.contains("youtube")) {
pat = "youtube\\.com/watch\\?v=([\\-a-zA-Z0-9_]{11})";
}
else if(url.contains("youtu.be")) {
pat = "youtu\\.be/([a-zA-Z0-9_-]{11})";
}
else {
Log.e(TAG, "Error could not parse url: " + url);
return "";
}
id = matchGroup1(pat, url);
if(!id.isEmpty()){
//Log.i(TAG, "string \""+url+"\" matches!");
return id;
}
//Log.i(TAG, "string \""+url+"\" does not match.");
return "";
}
@SuppressWarnings("WeakerAccess")
@Override
public String getVideoUrl(String videoId) {
return "https://www.youtube.com/watch?v=" + videoId;
}
/**Attempts to parse (and return) the offset to start playing the video from.
* @return the offset (in seconds), or 0 if no timestamp is found.*/
@Override
public int getTimeStamp(){
String timeStamp = matchGroup1("((#|&)t=\\d{0,3}h?\\d{0,3}m?\\d{1,3}s?)", pageUrl);
//TODO: test this
if(!timeStamp.isEmpty()) {
String secondsString = matchGroup1("(\\d{1,3})s", timeStamp);
String minutesString = matchGroup1("(\\d{1,3})m", timeStamp);
String hoursString = matchGroup1("(\\d{1,3})h", timeStamp);
if(secondsString.isEmpty()//if nothing was got,
&& minutesString.isEmpty()//treat as unlabelled seconds
&& hoursString.isEmpty())
secondsString = matchGroup1("t=(\\d{1,3})", timeStamp);
int seconds = (secondsString.isEmpty() ? 0 : Integer.parseInt(secondsString));
int minutes = (minutesString.isEmpty() ? 0 : Integer.parseInt(minutesString));
int hours = (hoursString.isEmpty() ? 0 : Integer.parseInt(hoursString));
int ret = seconds + (60*minutes) + (3600*hours);//don't trust BODMAS!
//Log.d(TAG, "derived timestamp value:"+ret);
return ret;
//the ordering varies internationally
}//else, return default 0
return 0;
}
@Override
public VideoInfo getVideoInfo() {
//todo: @medovax i like your work, but what the fuck:
videoInfo = super.getVideoInfo();
if(errorCode == VideoInfo.NO_ERROR) {
//todo: replace this with a call to getVideoId, if possible
videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", pageUrl);
if (videoInfo.audioStreams == null
|| videoInfo.audioStreams.length == 0) {
Log.e(TAG, "uninitialised audio streams!");
}
if (videoInfo.videoStreams == null
|| videoInfo.videoStreams.length == 0) {
Log.e(TAG, "uninitialised video streams!");
}
videoInfo.age_limit = 0;
//average rating
try {
videoInfo.average_rating = playerArgs.getString("avg_rating");
} catch (JSONException e) {
e.printStackTrace();
}
//---------------------------------------
// extracting information from html page
//---------------------------------------
/* Code does not work here anymore.
// Determine what went wrong when the Video is not available
if(videoInfo.errorCode == VideoInfo.ERROR_NO_SPECIFIED_ERROR) {
if(doc.select("h1[id=\"unavailable-message\"]").first().text().contains("GEMA")) {
videoInfo.videoAvailableStatus = VideoInfo.VIDEO_UNAVAILABLE_GEMA;
}
}
*/
String likesString = "";
String dislikesString = "";
try {
// likes
likesString = doc.select("button.like-button-renderer-like-button").first()
.select("span.yt-uix-button-content").first().text();
videoInfo.like_count = Integer.parseInt(likesString.replaceAll("[^\\d]", ""));
// dislikes
dislikesString = doc.select("button.like-button-renderer-dislike-button").first()
.select("span.yt-uix-button-content").first().text();
videoInfo.dislike_count = Integer.parseInt(dislikesString.replaceAll("[^\\d]", ""));
} catch (NumberFormatException nfe) {
Log.e(TAG, "failed to parse likesString \"" + likesString + "\" and dislikesString \"" +
dislikesString + "\" as integers");
} catch (Exception e) {
// if it fails we know that the video does not offer dislikes.
e.printStackTrace();
videoInfo.like_count = 0;
videoInfo.dislike_count = 0;
}
// next video
videoInfo.nextVideo = extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first()
.select("li").first());
// related videos
Vector<VideoPreviewInfo> relatedVideos = new Vector<>();
for (Element li : doc.select("ul[id=\"watch-related\"]").first().children()) {
// first check if we have a playlist. If so leave them out
if (li.select("a[class*=\"content-link\"]").first() != null) {
relatedVideos.add(extractVideoPreviewInfo(li));
}
}
//todo: replace conversion
videoInfo.relatedVideos = relatedVideos;
//videoInfo.relatedVideos = relatedVideos.toArray(new VideoPreviewInfo[relatedVideos.size()]);
}
return videoInfo;
}
@Override
public int getErrorCode() {
return errorCode;
}
@Override
public String getErrorMessage() {
return errorMessage;
}
private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) {
if(!dashManifest.contains("/signature/")) {
String encryptedSig = matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifest);
String decryptedSig;
decryptedSig = decryptSignature(encryptedSig, decryptoinCode);
dashManifest = dashManifest.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
}
String dashDoc = downloader.download(dashManifest);
Vector<VideoInfo.AudioStream> audioStreams = new Vector<>();
try {
XmlPullParser parser = Xml.newPullParser();
parser.setInput(new StringReader(dashDoc));
String tagName = "";
String currentMimeType = "";
int currentBandwidth = -1;
int currentSamplingRate = -1;
boolean currentTagIsBaseUrl = false;
for(int eventType = parser.getEventType();
eventType != XmlPullParser.END_DOCUMENT;
eventType = parser.next() ) {
switch(eventType) {
case XmlPullParser.START_TAG:
tagName = parser.getName();
if(tagName.equals("AdaptationSet")) {
currentMimeType = parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "mimeType");
} else if(tagName.equals("Representation") && currentMimeType.contains("audio")) {
currentBandwidth = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "bandwidth"));
currentSamplingRate = Integer.parseInt(
parser.getAttributeValue(XmlPullParser.NO_NAMESPACE, "audioSamplingRate"));
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = true;
}
break;
case XmlPullParser.TEXT:
if(currentTagIsBaseUrl &&
(currentMimeType.contains("audio"))) {
int format = -1;
if(currentMimeType.equals(MediaFormat.WEBMA.mimeType)) {
format = MediaFormat.WEBMA.id;
} else if(currentMimeType.equals(MediaFormat.M4A.mimeType)) {
format = MediaFormat.M4A.id;
}
audioStreams.add(new VideoInfo.AudioStream(parser.getText(),
format, currentBandwidth, currentSamplingRate));
}
//missing break here?
case XmlPullParser.END_TAG:
if(tagName.equals("AdaptationSet")) {
currentMimeType = "";
} else if(tagName.equals("BaseURL")) {
currentTagIsBaseUrl = false;
}//no break needed here
}
}
} catch(Exception e) {
e.printStackTrace();
}
return audioStreams.toArray(new VideoInfo.AudioStream[audioStreams.size()]);
}
/**Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a VideoPreviewInfo object,
* which is a subset of the fields in a full VideoInfo.*/
private VideoPreviewInfo extractVideoPreviewInfo(Element li) {
VideoPreviewInfo info = new VideoPreviewInfo();
info.webpage_url = li.select("a.content-link").first()
.attr("abs:href");
try {
info.id = matchGroup1("v=([0-9a-zA-Z-]*)", info.webpage_url);
} catch (Exception e) {
e.printStackTrace();
}
//todo: check NullPointerException causing
info.title = li.select("span.title").first().text();
//this page causes the NullPointerException, after finding it by searching for "tjvg":
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
//this line is unused
//String views = li.select("span.view-count").first().text();
//Log.i(TAG, "title:"+info.title);
//Log.i(TAG, "view count:"+views);
try {
info.view_count = Long.parseLong(li.select("span.view-count")
.first().text().replaceAll("[^\\d]", ""));
} catch (NullPointerException e) {//related videos sometimes have no view count
info.view_count = 0;
}
info.uploader = li.select("span.g-hovercard").first().text();
info.duration = li.select("span.video-time").first().text();
Element img = li.select("img").first();
info.thumbnail_url = img.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we caught such an item.
if(info.thumbnail_url.contains(".gif")) {
info.thumbnail_url = img.attr("data-thumb");
}
if(info.thumbnail_url.startsWith("//")) {
info.thumbnail_url = "https:" + info.thumbnail_url;
}
return info;
}
private String loadDecryptionCode(String playerUrl) {
String playerCode = downloader.download(playerUrl);
String decryptionFuncName = "";
String decryptionFunc = "";
String helperObjectName;
String helperObject = "";
String callerFunc = "function " + DECRYPTION_FUNC_NAME + "(a){return %%(a);}";
String decryptionCode;
try {
decryptionFuncName = matchGroup1("\\.sig\\|\\|([a-zA-Z0-9$]+)\\(", playerCode);
String functionPattern = "(" + decryptionFuncName.replace("$", "\\$") +"=function\\([a-zA-Z0-9_]*\\)\\{.+?\\})";
decryptionFunc = "var " + matchGroup1(functionPattern, playerCode) + ";";
helperObjectName = matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(", decryptionFunc);
String helperPattern = "(var " + helperObjectName.replace("$", "\\$") + "=\\{.+?\\}\\};)";
helperObject = matchGroup1(helperPattern, playerCode);
} catch (Exception e) {
e.printStackTrace();
}
callerFunc = callerFunc.replace("%%", decryptionFuncName);
decryptionCode = helperObject + decryptionFunc + callerFunc;
return decryptionCode;
}
private String decryptSignature(String encryptedSig, String decryptionCode) {
Context context = Context.enter();
context.setOptimizationLevel(-1);
Object result = null;
try {
ScriptableObject scope = context.initStandardObjects();
context.evaluateString(scope, decryptionCode, "decryptionCode", 1, null);
Function decryptionFunc = (Function) scope.get("decrypt", scope);
result = decryptionFunc.call(context, scope, scope, new Object[]{encryptedSig});
} catch (Exception e) {
e.printStackTrace();
}
Context.exit();
return (result == null ? "" : result.toString());
}
private String cleanUrl(String complexUrl) {
return getVideoUrl(getVideoId(complexUrl));
}
private String matchGroup1(String pattern, String input) {
Pattern pat = Pattern.compile(pattern);
Matcher mat = pat.matcher(input);
boolean foundMatch = mat.find();
if (foundMatch) {
return mat.group(1);
}
else {
Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
new Exception("failed to find pattern \""+pattern+"\"").printStackTrace();
return "";
}
}
private int findErrorReason(Document doc) {
errorMessage = doc.select("h1[id=\"unavailable-message\"]").first().text();
if(errorMessage.contains("GEMA")) {
return VideoInfo.ERROR_BLOCKED_BY_GEMA;
}
return VideoInfo.ERROR_NO_SPECIFIED_ERROR;
}
}