Thread Rating:
  • 0 Vote(s) - 0 Average
New Adult Movie Scraper - AEBN
#1
See Ticket #9367

Code:
<?xml version="1.0" encoding="utf-8"?><scraper framework="1.1" date="2010-06-09" name="AEBN.net" content="movies" thumb="aebn.jpg" language="en">
    <GetSettings dest="3">
        <RegExp input="$$5" output="&lt;settings&gt;\1&lt;/settings&gt;" dest="3">
            <RegExp input="$$1" output="&lt;setting label=&quot;Use Series Name As Movie Set&quot; type=&quot;bool&quot; default=&quot;false&quot; id=&quot;movieset&quot;&gt;&lt;/setting&gt;" dest="5">
                <expression />
            </RegExp>
            <RegExp input="$$1" output="&lt;setting label=&quot;Get Actor/Actress Images?&quot; type=&quot;bool&quot; default=&quot;false&quot; id=&quot;actorthumbs&quot;&gt;&lt;/setting&gt;" dest="5+">
                <expression />
            </RegExp>
            <RegExp input="" output="&lt;setting label=&quot;Use Alternative URL&quot; type=&quot;labelenum&quot; default=&quot;theater.aebn.net&quot; values=&quot;theater.aebn.net|www.freeonesondemand.com|vod.adultemart.com|dvd.sexyshare.net&quot; id=&quot;url&quot;&gt;&lt;/setting&gt;" dest="5+">
                <expression />
            </RegExp>
            <expression noclean="1" />
        </RegExp>
    </GetSettings>
    <CreateSearchUrl dest="3">
        <RegExp input="$$5" output="&lt;url&gt;\1&lt;/url&gt;" dest="3">
            <RegExp input="$$6" output="http://$INFO[url]/dispatcher/fts?userQuery=\1&amp;theaterId=822&amp;targetSearchMode=basic&amp;isSearchCriteriaReset=true&amp;searchType=movie&amp;count=120&amp;imageType=Large&amp;sortType=Relevance" dest="5">
                <RegExp input="$$1" output="\1+\2" dest="6">
                    <expression repeat="yes">(.*?)\%20([^(?:\%20)]*)</expression>
                </RegExp>
                <expression />
            </RegExp>
            <expression noclean="1" />
        </RegExp>
    </CreateSearchUrl>
    <GetSearchResults dest="3">
        <RegExp input="$$5" output="&lt;results&gt;\1&lt;/results&gt;" dest="3">
            <RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;id&gt;\1&lt;/id&gt;&lt;url&gt;http://$INFO[url]/dispatcher/movieDetail?movieId=\1&amp;tab=Description&amp;theaterId=822&lt;/url&gt;&lt;thumb&gt;\3&lt;/thumb&gt;&lt;/entity&gt;" dest="5">
                <expression repeat="yes" trim="1,2,3,4" noclean="1,4">&lt;a id="FTSMovieSearch_link_image_detail_[0-9]+" href="/dispatcher/movieDetail\?movieId=([0-9+]*)?&amp;amp;theaterId=822" title="([^\"]+)"&gt;&lt;img src="(http://pic.aebn.net/Stream/Movie/Boxcovers/[^\"]+)" alt="Box Cover" /&gt;&lt;/a&gt;</expression>
            </RegExp>
            <expression noclean="1" />
        </RegExp>
    </GetSearchResults>
    <GetDetails dest="12">
        <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="12">
            <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5">
                <expression trim="1">&lt;div class="movieDetailTitle"&gt;(.*?)&lt;/div&gt;</expression>
            </RegExp>
            <RegExp input="$$2" output="&lt;id&gt;\1&lt;/id&gt;&lt;thumb spoof=&quot;http://$INFO[url]&quot;&gt;http://pic.aebn.net/Stream/Movie/Boxcovers/a\1_xlf.jpg&lt;/thumb&gt;&lt;thumb spoof=&quot;$INFO[url]&quot;&gt;http://pic.aebn.net/Stream/Movie/Boxcovers/a\1_xlb.jpg&lt;/thumb&gt;" dest="5+">
                <expression />
            </RegExp>
            <RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
                <expression>&lt;div class="detailsLabel"&gt;Running Time:&lt;/div&gt;\n[\s]*&lt;div class="runTime"&gt;([^&lt;]*)&lt;/div&gt;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;premiered&gt;\1&lt;/premiered&gt;&lt;year&gt;\2\3&lt;/year&gt;" dest="5+">
                <expression>&lt;div class="detailsLabel"&gt;Released:&lt;/div&gt;[\s]*&lt;div class="detailsLink"&gt;([0-9]{0,2}/*[0-9]{1,2}/([0-9]{4})|([0-9]{0,4}))&lt;/div&gt;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
                <expression trim="1">&lt;div class="detailsLabel"&gt;Studio:&lt;/div&gt;[\s]*?&lt;div class="detailsLink"&gt;[\s]*?&lt;a href="/dispatcher/studioDetail\?theaterId=822&amp;amp;studioId=[0-9]*"&gt;([^&lt;]*)&lt;/a&gt;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
                <expression>&lt;div class="detailsLabel"&gt;Director:&lt;/div&gt;[\s]*&lt;div class="detailsLink"&gt;\n[\s]*&lt;a href="/dispatcher/movieSearch\?directorId=[0-9]*&amp;amp;theaterId=822"&gt;([^&lt;]*)&lt;/a&gt;&amp;</expression>
            </RegExp>
            <RegExp conditional="movieset" input="$$1" output="&lt;set&gt;\1&lt;/set&gt;" dest="5+">
                <expression trim="1">&lt;div class="detailsLabel"&gt;Series:&lt;/div&gt;[\s]*&lt;div class="detailsLink"&gt;&lt;a href="/dispatcher/movieSearch\?theaterId=822&amp;amp;seriesId=[0-9]*" class="series"&gt;([^&lt;]*)&lt;/a&gt;</expression>
            </RegExp>
            <RegExp input="$$7" output="&lt;genre&gt;\2&lt;/genre&gt;" dest="5+">
                <!--copies the genres block to buffer $$7-->
                <RegExp input="$$1" output="\1" dest="7">
                    <expression noclean="1">&lt;div class="detailsLabel"&gt;Categories:&lt;/div&gt;(.*?)&lt;br class="clear" /&gt;</expression>
                </RegExp>
                <expression repeat="yes" trim="2">&lt;a href="/dispatcher/categoryDetail\?categoryId=([0-9]*).*?\1', \1, event\)" onmouseout="killPopUp\(\)"&gt;([^&lt;]*)&lt;/a&gt;</expression>
            </RegExp>
            <!--This path handles actors accordint to the "GetMultiple Actor Thumbs-->
            <RegExp input="$$7" output="\1" dest="5+">
                <!--Copies Actors block to $$8-->
                <RegExp input="$$1" output="\1\2" dest="8">
                    <expression clear="yes" noclean="1,2">(&lt;div class="starsFull detailsLink longDescription"&gt;.*?&lt;/div&gt;)|(&lt;div class="detailsLabel"&gt;Stars:&lt;/div&gt;[^&lt;]+&lt;div class="detailsLink shortDescription"&gt;.*?&lt;/div&gt;)</expression>
                </RegExp>
                <!--Process actors that have no thumbnails-->
                <RegExp input="$$8" output="&lt;actor&gt;&lt;name&gt;\2&lt;/name&gt;&lt;/actor&gt;" dest="7">
                    <expression repeat="yes" clear="yes" trim="2">&lt;a href="/dispatcher/starDetail\?theaterId=822&amp;amp;starId=([0-9]*)" &gt;([^&lt;]*)&lt;/a&gt; </expression>
                </RegExp>
                <RegExp conditional="actorthumbs" input="$$8" output="&lt;url function=&quot;GetActorInfo&quot;&gt;http://$INFO[url]/dispatcher/starDetail?theaterId=822&amp;amp;starId=\1&lt;/url&gt;" dest="7+">
                    <expression repeat="yes">&lt;a href="/dispatcher/starDetail\?theaterId=822&amp;amp;starId=([0-9]*)"  class="linkWithPopup".*?&gt;([^&lt;]*)&lt;/a&gt; </expression>
                </RegExp>
                <RegExp conditional="!actorthumbs" input="$$8" output="&lt;actor&gt;&lt;name&gt;\2&lt;/name&gt;&lt;/actor&gt;" dest="7+">
                    <expression repeat="yes">&lt;a href="/dispatcher/starDetail\?theaterId=822&amp;amp;starId=([0-9]*)"  class="linkWithPopup".*?&gt;([^&lt;]*)&lt;/a&gt; </expression>
                </RegExp>
                <expression repeat="yes" noclean="1" />
            </RegExp>
            <RegExp input="$$1" output="&lt;plot&gt;\2\4&lt;/plot&gt;" dest="5+">
                <expression trim="2,4">(&lt;div class="movieDetailDescriptionFull longDescription"&gt;(.*?)&lt;span class="nowrap_link"&gt;)|( &lt;div class="movieDetailDescriptionOnly"&gt;(.*?)&lt;/div&gt;)</expression>
            </RegExp>
            <expression noclean="1" />
        </RegExp>
    </GetDetails>
    <GetActorInfo dest="3">
        <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
            <RegExp input="$$1" output="&lt;actor&gt;&lt;name&gt;\1&lt;/name&gt;&lt;/actor&gt;" dest="5">
                <expression trim="1">&lt;div class="searchDetailStarName"&gt;(.*?) (?:Movies|Scenes)&lt;/div&gt;</expression>
            </RegExp>
            <RegExp input="$$6" output="&lt;actor&gt;\1&lt;/actor&gt;" dest="5">
                <RegExp input="$$1" output="&lt;name&gt;\1&lt;/name&gt;" dest="6">
                    <expression trim="1">&lt;div class="name"&gt;([^&lt;]*)&lt;/div&gt;\n</expression>
                </RegExp>
                <RegExp input="$$1" output="&lt;thumb spoof=&quot;$INFO[url]&quot;&gt;\1&lt;/thumb&gt;" dest="6+">
                    <expression trim="1">&lt;a href="(http://pic.aebn.net/stream/movie/stars/s[0-9]*_i[0-9]*_l.jpg)" target="_blank" title=".*?"&gt;</expression>
                </RegExp>
                <expression trim="1" noclean="1" />
            </RegExp>
            <expression noclean="1" />
        </RegExp>
    </GetActorInfo>
</scraper>
ScraperXML Open Source Web Scraper Library compatible with XBMC XML Scrapers


I Suck, and if you act now by sending only $19.95 and a self addressed stamped envelop, so can you!

Image
Reply
#2
Nice work.

You're going to have to setup your own repository for this one - XBMC won't be hosting any adult addons in our official repo, primarily as these are hosted by our donated mirrors around the world. The good news is that this is easy to do - all you need is an http link to an addons.xml that is just the concat'd addon.xml's.

Cheers,
Jonathan
Always read the XBMC online-manual, FAQ and search the forum before posting.
Do not e-mail XBMC-Team members directly asking for support. Read/follow the forum rules.
For troubleshooting and bug reporting please make sure you read this first.


Image
Reply
#3
Ideally someone starts one or two 3rd-party repositories. One for adult stuff and one for scrapers that might override user-agent strings eg. that makes it impossible for us to host them officially.
Always read the online manual (wiki), FAQ (wiki) and search the forum before posting.
Do not PM or e-mail Team-Kodi members directly asking for support. Read/follow the forum rules (wiki).
Please read the pages on troubleshooting (wiki) and bug reporting (wiki) before reporting issues.
Reply
#4
Ok didn't know the details/specifics of new info, I'm sorta in the dark as far as the add-ons stuff goes so far.

if someone can point me in the direction of some info on the add-on system, that would be nice Smile
ScraperXML Open Source Web Scraper Library compatible with XBMC XML Scrapers


I Suck, and if you act now by sending only $19.95 and a self addressed stamped envelop, so can you!

Image
Reply
#5
The easiest thing to do is take a nosy through the addons folder in SVN - in particular, repository.xbmc.org is the official repository addon, which contains this extension:
Code:
<extension point="xbmc.addon.repository"
                name="Official XBMC.org Add-on Repository">
                <info compressed="true">http://mirrors.xbmc.org/addons/dharma-pre/addons.xml</info>
                <checksum>http://mirrors.xbmc.org/addons/dharma-pre/addons.xml.md5</checksum>
                <datadir zip="true">http://mirrors.xbmc.org/addons/dharma-pre</datadir>
        </extension>
Feel free to take a nosy through those links - anything setup in a similar manner will work just fine. The <checksum> isn't required - it's just an md5 of the addons.xml file. The attributes compressed="true" and zip="true" just indicate that addons.xml and the actual data for each addon are zipped or not - again, not required - XBMC will link just fine to what is basically a direct dump of your addons - one per folder.

We'd be more than happy to host the repository addons (i.e. the addon that points to your repository) so that users can easily install your repository, thus can easily install your addons.

Cheers,
Jonathan
Always read the XBMC online-manual, FAQ and search the forum before posting.
Do not e-mail XBMC-Team members directly asking for support. Read/follow the forum rules.
For troubleshooting and bug reporting please make sure you read this first.


Image
Reply
#6
looking at the links this seems so simple to do, i was afraid that it would be a pain in the butt, to adjust the scraper for the addon-system. But it seems here i don't need to change the scraper at all, just supply a list of prerequisites, and other info about it and then point out the link to the xml file.
ScraperXML Open Source Web Scraper Library compatible with XBMC XML Scrapers


I Suck, and if you act now by sending only $19.95 and a self addressed stamped envelop, so can you!

Image
Reply
#7
- also there's no GetSettings function any longer - resources/settings.xml is used.
- strings are translatable - resources/language/<lang>/strings.xml.
- the clean only remove tags, it doesn't replace html chars. a new tag is coming to have the old behaviour.

and maybe a few other small ones i forgot.
Reply
#8
wow, ok, that will give me a little work to do. but i don't see any trouble implementing that on the fly, unless those other things you are mentioning are biggies Big Grin
ScraperXML Open Source Web Scraper Library compatible with XBMC XML Scrapers


I Suck, and if you act now by sending only $19.95 and a self addressed stamped envelop, so can you!

Image
Reply
#9
is there a repository yet for adult....

even if we just get an addon in a zip file that can be installed and posted on

rapidshare or something that would be nice.

This scraper looks nice.
Reply
#10
Maybe your looking for something like this:
http://forum.xbmc.org/showpost.php?p=569784&postcount=6
Reply
#11
Thumbs up! Looking to get some official adult scrapers to work with the add-on system. I miss the Excalibur scraper...

NS
[Image: all-thin-banner.jpg]
Reply
#12
the link above works fine. I dropped it in the addons directory.

Excalibur definately works. Adult empire works but fonts are screwed up a little in the plot decriptions.

http://forum.xbmc.org/showpost.php?p=569784&postcount=6
Reply
#13
Quote:Excalibur definately works. Adult empire works but fonts are screwed up a little in the plot decriptions.

How you did it?I'm on Dharma branch
Reply
#14
slux Wrote:How you did it?I'm on Dharma branch

I think he's referring to the method I used in the thread he linked to. Got me excited for a second because I thought he meant he used excalibur in dharma too Sad

NS
[Image: all-thin-banner.jpg]
Reply
#15
NotShorty Wrote:I think he's referring to the method I used in the thread he linked to. Got me excited for a second because I thought he meant he used excalibur in dharma too Sad

NS

i use these scrapers: http://forum.xbmc.org/showpost.php?p=568329&postcount=3
With the new version of EMM (it supports xml scraping these days). the Excalibur scraper didn't work at first, but is now working fine.
Reply



New Adult Movie Scraper - AEBN00