YouTube Kommentare auslesen…

Diese Posting im AppleScript-Forum hat mich auf die Idee gebracht, eine Skript zu schreiben, welches die Kommentare zu einem YouTube-Film ausließt…

Keine Ahnung wofür das am Ende gut sein mag, aber der Rechner surft nun ohne mein Zutun im Netz und schreibt was in Text-Edit, sieht richtig nach Arbeit aus ;-)
Code zum markieren einmal anklicken

(**

16.08.2009

YoutubeCommentCopy v0.1 by hubionmac.com

Open a youtube site with Text Comments open or view all comments

The Script will copy them to a TextEdit file (autho&comment)

**)

Change this if you are sure new comments are loaded faster/slower

property reload_delay : 10

Change this if you are use another languagge than english

property nextString : “Next”

set timestamp to (do shell script “date +%Y-%m-%d”) & ” @ “ & (do shell script “date +%H:%M”)

set headline to timestamp & return & get_info_headline()

tell application “TextEdit”

set newdoc to make new document

end tell

set current_comments to getComment_list()

tell application “TextEdit”

set text of newdoc to text of newdoc & headline & current_comments

end tell

scroll_down_text_edit()

repeat until 1 = 0

if check_if_next_exists() = 1 then

delay reload_delay

set current_comments to getComment_list()

tell application “TextEdit”

set text of newdoc to text of newdoc & current_comments

end tell

scroll_down_text_edit()

else

exit repeat

end if

end repeat

on get_info_headline()

tell application “Safari”

tell document 1

return do JavaScript

var link2Movie = document.getElementById(‘comment-video-info’).getElementsByTagName(‘span’)[0].getElementsByTagName(‘a’)[0];

‘—————————————————-\\nMovieName:\\t’ + link2Movie.innerHTML + ‘\\nURL:\\t’ + link2Movie.href + ‘\\n—————————————————-\\nComments:\\n—————————————————-\\n’;

end tell

end tell

end get_info_headline

on scroll_down_text_edit()

tell application “TextEdit”

activate

tell application “System Events”

keystroke (ASCII character 31) using command down

end tell

end tell

end scroll_down_text_edit

on check_if_next_exists()

tell application “Safari”

tell document 1

return do JavaScript

//this variable has to be changed when you use a different language

var nextString = ‘” & nextString & “‘;

var clickedit = 0;

var comment_area = document.getElementById(‘recent_comments’);

for (var i = 0; i < comment_area.getElementsByTagName(‘div’).length; i++)

 {

    if (comment_area.getElementsByTagName(‘div’)[i].className == ‘watch-comment-pagination’)

    {

        for (var k = 0; k < comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘a’).length; k++)

        {

            if (comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘a’)[k].innerHTML == nextString)

            {//somehow the onlick events produces an error when called… so try it an even go on on an error

                try {

                    eval(comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘a’)[k].getAttribute(‘onclick’));

                    clickedit = 1;

                    break;

                } catch(e) {

                    clickedit = 1;

                    break;

                }

            }

        }

    }

}

//return clicked status was there something to click or not

clickedit;

end tell

end tell

end check_if_next_exists

on getComment_list()

tell application “Safari”

tell document 1

set h to do JavaScript

//that is where the comments are

var comment_body_id_name = ‘comment_body_’

var comment_area = document.getElementById(‘recent_comments’);

var myoutput = new Array();

//go through all divs to find the comment_div

for (var i = 0; i < comment_area.getElementsByTagName(‘div’).length; i++)

 {

    if (comment_area.getElementsByTagName(‘div’)[i].id.search(/comment_body_.+/) > -1)

    {

        var comment_id = comment_area.getElementsByTagName(‘div’)[i].id.slice(comment_body_id_name.length, comment_area.getElementsByTagName(‘div’)[i].id.length)

//go through the comment div to find the author’s name div

        for (var j = 0; j < document.getElementById(comment_id).getElementsByTagName(‘a’).length; j++)

        {

            if (document.getElementById(comment_id).getElementsByTagName(‘a’)[j].className == ‘watch-comment-auth’)

            {

                var current_author = document.getElementById(comment_id).getElementsByTagName(‘a’)[j].innerHTML;

                break;

            }

        }

//now use the the latest comment id to get the comment

        for (var k = 0; k < comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘div’).length; k++)

        {

            if (comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘div’)[k].className == ‘watch-comment-body’)

            {

//push everything into an array and replace some stupid stuff (tabs, spaces, br

                myoutput.push(current_author + ‘ ‘ + comment_area.getElementsByTagName(‘div’)[i].getElementsByTagName(‘div’)[k].getElementsByTagName(‘div’)[0].innerHTML.replace(/\\t/g, ”).replace(/\\n/g, ”).replace(/<br>/g, ”) + ‘\\n’);

                myoutput.push();

                break;

            }

        }

    }

}

//just output the array as text

myoutput.join(”);

return h as text

end tell

end tell

end getComment_list