Hi all, finally MyMovies.it scraper seems to be finished.
I used include common tmdb.xml for fanart part
Tested by me and KoTiX with very good results.
In settings u can disable trailer and fanart (slow due to tmdb)
It can get thumbs, fanart (tmdb) and trailers.....all.
This can't be more complete, considering also that mymovies.it is not
too "stable" due to site still change.
Code:
<?xml version="1.0" encoding="utf-8"?>
<scraper name="MyMovies.it" date="2009-09-09" content="movies" framework="1.0" thumb="MyMovies.png" language="it">
<include>common/tmdb.xml</include>
<GetSettings dest="3">
<RegExp input="$$5" output="<settings>\1</settings>" dest="3">
<RegExp input="$$1" output="<setting label="Get TMDB Backdrops (Very slow)" type="bool" id="backdrops" default="true"></setting>" dest="5+">
<expression></expression>
</RegExp>
<RegExp input="$$1" output="<setting label="Get Trailer" type="bool" id="trailer" default="true"></setting>" dest="5+">
<expression></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetSettings>
<NfoUrl dest="3">
<RegExp input="$$1" output="\1" dest="3">
<expression noclean="1">(http://www\.mymovies\.it/dizionario/recensione\.asp\?id=[0-9]+)</expression>
</RegExp>
</NfoUrl>
<CreateSearchUrl dest="3">
<RegExp input="$$1" output="http://www.mymovies.it/database/ricerca/default.asp?q=\1" dest="3">
<expression noclean="1"></expression>
</RegExp>
</CreateSearchUrl>
<GetSearchResults dest="8">
<RegExp input="$$5" output="<?xml version="1.0" encoding="iso-8859-1" standalone="yes"?><results>\1</results>" dest="8">
<RegExp input="$$1" output="<entity><title>\2 (\5, \4)</title><url>http://www.mymovies.it/dizionario/recensione.asp?id=\1</url><id>\1</id></entity>" dest="5">
<expression repeat="yes" noclean="1,3"><h3 style="margin:0px;">[^<]*<a href="http://www\.mymovies\.it/dizionario/recensione\.asp\?id=([0-9]+)" title="[^"]+">([^<]+)</a>.+?<div class="linkblu2" style="padding-right:7px; text-align:justify;">[\s]+Un film di <b>[^<]*<a href="http://www\.mymovies\.it/biografia/\?r=([0-9]+)">([^<]+)</[ab]>[^;]+anno=([^"]+)</expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetSearchResults>
<GetDetails clearbuffers="no" dest="3">
<RegExp input="$$5" output="<details>\1</details>" dest="3">
<RegExp input="$$1" output="<title>\1</title><year>\2</year>" dest="5">
<expression noclean="1,2"><title>([^\(]+) \(([0-9]{4})</expression>
</RegExp>
<RegExp input="$$1" output="<genre>\2</genre>" dest="5+">
<expression noclean="1"><a title="Film ([^"]+)" href="http://www.mymovies.it/film/\1/">([^<]+)</expression>
</RegExp>
<RegExp input="$$1" output="<runtime>\1</runtime>" dest="5+">
<expression noclean="1">durata ([0-9]+) min\.</expression>
</RegExp>
<RegExp input="$$1" output="<director>\1</director>" dest="5+">
<expression noclean="1" trim="1">Un film di (.+?) con</expression>
</RegExp>
<RegExp input="$$1" output="<tagline>\1</tagline>" dest="5+">
<expression noclean="1" trim="1"> <strong class="rec_lancio" >([^<]+)</strong></expression>
</RegExp>
<RegExp input="$$1" output="<plot>\1</plot>" dest="5+">
<expression repeat="yes" trim="1"><td rowspan="2" valign="top">[\s]+<p>[\s]+[^>]+>[\s]+[^>]+/>[\s]+</a>[\s]+(.+) </p></expression>
</RegExp>
<RegExp input="$$1" output="<thumb>\1</thumb>" dest="5+">
<expression noclean="1"><img style="float:left; border:solid 1px gray; padding:3px; margin:5px;" src="([^"]+)" width="[0-9]+px" height="[0-9]+px" alt="[^"]+" /></expression>
</RegExp>
<RegExp input="$$1" output="<url function="GetPosters">\1</url>" dest="5+">
<expression noclean="1"><td class="rec_link_disattivo"><a title="[^"]+" href="([^"]+)">Poster</a></td></expression>
</RegExp>
<RegExp input="$$1" output="<url function="GetMovieTrailer">\1</url>" dest="5+">
<expression><td class="rec_link_disattivo"><a title="[^"]+" href="([^"]+)">Trailer</a></td></expression>
</RegExp>
<RegExp conditional="backdrops" input="$$7" output="<url function="GetTMDBFanartByIMDBId">http://www.imdb.it/find?s=all&q=\1</url>" dest="5+">
<RegExp input="$$1" output="\1" dest="4">
<expression noclean="1" trim="1"><title>([^\(]+) \(</expression>
</RegExp>
<RegExp input="$$1" output="$$4 (\1)" dest="4">
<expression noclean="1"><title>[^\(]+ \(([0-9]{4})</expression>
</RegExp>
<RegExp input="$$4" output="\1+\2" dest="6">
<expression repeat="yes" noclean="1,2" trim="1">(.*?) ([^ ]*)</expression>
</RegExp>
<RegExp input="$$6" output="\1\2" dest="7">
<expression repeat="yes" noclean="1,2" trim="1">(.*?)([^&]*)</expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
<RegExp input="$$1" output="<rating>\1\2</rating>" dest="5+">
<expression><div style="text-align:center; font-size:23px; font-weight:bold; letter-spacing:1px; margin:0px 11px 7px 11px">([0-9]+)\,([0-9]+)<span style="font-size:11px">/([^<]+)</span</expression>
</RegExp>
<RegExp input="$$1" output="<url function="GetMovieCast">\1</url>" dest="5+">
<expression><td class="rec_link_disattivo"><a title="[^"]+" href="([^"]+)">Cast</a></td></expression>
</RegExp>
<RegExp input="$$1" output="<studio>\1</studio>" dest="5+">
<expression noclean="1" trim="1">>[0-9]+</a></strong>.[^-]+-([^<]+)<strong></expression>
</RegExp>
<RegExp input="$$1" output="<mpaa>\1</mpaa>" dest="5+">
<expression noclean="1">ratings: [^=]+=[^=]+=[^>]+>([^<]+)</a></strong></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetDetails>
<GetMovieTrailer dest="5">
<RegExp conditional="trailer" input="$$1" output="<details><trailer>\1</trailer></details>" dest="5+">
<expression noclean="1">"file=([^&]+)</expression>
</RegExp>
</GetMovieTrailer>
<GetPosters dest="5">
<RegExp input="$$6" output="<details>\1</details>" dest="5+">
<RegExp input="$$1" output="<thumb>http://www.mymovies.it/filmclub/\2/\3/\4/locandina\5</thumb>" dest="6+">
<RegExp input="$$1" output="<thumb>\1</thumb>" dest="6">
<expression repeat="yes" noclean="1"><td align="center" valign="middle" style="background-color:#eeeeee; border:solid 1px #AEAEAE;">[\s]+<img width="[0-9]+" style="margin-top:[0-9]+px; margin-bottom:[0-9]+px;" title="[^"]+" alt="[^"]+" src="([^"]+)" /></expression>
</RegExp>
<expression repeat="yes" noclean="1"><td align="center" valign="middle" style="background-color:#eeeeee; border:solid 1px #AEAEAE;">[\s]+<a href="([^"]+)"><[^"]+"[^"]+"[^"]+"[^"]+"[^"]+"[^"]+"[^"]+"[^"]+"[\s]+src="http://www.mymovies.it/filmclub/([0-9]+)/([0-9]+)/([0-9]+)/imm([^"]+)"</expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetPosters>
<GetMovieCast dest="5">
<RegExp input="$$2" output="<details>\1</details>" dest="5+">
<RegExp input="$$1" output="<actor><name>\2</name><role>\3</role><thumb>\1</thumb></actor>" dest="2+">
<expression repeat="yes" noclean="1">src="([^"]+)"[\s]+alt="([^"]+)" />[\s]+</a>[\s]+<div style=[^>]+>[\s]+<a href="[^>]+>[^<]+</a>[\s]+<div style="[^>]+>([^<]+)</div></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetMovieCast>
</scraper>
vdrfan Wrote:Once there's a updated and working scraper please create a new trac ticket and attach the scraper so we can push it to SVN. Thanks.
Please, if u wanna push it to svn ...i'm not so friend to Trac.
Bye alll