'."\n");?> AppStore::Scraper アップデート - 勤務先と無関係なことを書く日記

Home > Perl > AppStore::Scraper アップデート

AppStore::Scraper アップデート

ちょっと前、 Search API からユニコードエスケープされず生 UTF8 が返ってくるようになった & iTunes Store だけじゃなくて Mac App Store の情報も欲しい

。。。ってことで、仕事としてやる時間をもらって修正したので、こっちのも修正しておく
なんか、ごちゃっとした XML だけで泣きそうだったところに JSON やら HTML やら加わって、カオスだな。この手の (App Store のランキング取得したりする) ライブラリを一時期に比べて見かけなくなったのも納得
カオスになったついでに、レート毎の件数もそのうち再実装するかもしれません

使い方。今までと変わらんけど、ident として 'mac' を指定できるようにした

use AppStore::Scraper;
use Data::Dumper;

my $obj = AppStore::Scraper->new(wait => 5);
my $info = $obj->app_info(
                          app => ['444303913'],
                          store => ['jp','us'],
                          lang => 9,
                          review_number => 1,
                          review_order => 4,
                          ident => 'mac',
                         );

warn Dumper $info;

実行結果

$VAR1 = {
          '444303913' => {
                           'jp' => {
                                     'review_number' => 2,
                                     'total_rank' => undef,
                                     'store_code' => 143462,
                                     'app_name' => 'OS X Lion',
                                     'genre_rank' => undef,
                                     'store_name' => 'Japan',
                                     'artist_id' => 284417353,
                                     'genre_name' => 'Productivity',
                                     'ranks' => {
                                                'total_rank_mac' => 1,
                                                'genre_rank_mac' => 1,
                                                'genre_rank_ipad' => undef,
                                                'total_rank_ipad' => undef,
                                                'total_rank_iphone' => undef,
                                                'genre_rank_iphone' => undef
                                              },
                                     'lang' => 9,
                                     'reviews' => [
                                                  {
                                                    'date' => '10-Mar-2012',
                                                    'message' => " \x{3084}\x{3081}\x{3068}\x{3051}\x{3070}\x{3088}\x{304b}\x{3063}\x{305f}\x{3002}\x{305b}\x{3063}\x{304b}\x{304f}\x{30b5}\x{30af}\x{30b5}\x{30af}\x{52d5}\x{3044}\x{3066}\x{3044}\x{305f}\x{3082}\x{306e}\x{3092}icloud\x{4f7f}\x{3048}\x{308b}\x{304b}\x{3082}\x{3068}\x{601d}\x{3063}\x{3066}lion\x{306b}\x{3057}\x{305f}\x{306e}\x{304c}\x{9593}\x{9055}\x{3044}\x{3002}m2tv\x{306f}\x{3060}\x{3081}\x{306b}\x{306a}\x{308b}\x{3057}\x{3001}printmusic\x{3082}\x{6483}\x{6c88}\x{3002}\x{6642}\x{9593}\x{304b}\x{3051}\x{308c}\x{3070}\x{76f4}\x{308b}\x{306e}\x{304b}\x{306a}\x{30fc}\x{3001}\x{3084}\x{3063}\x{3066}\x{3089}\x{308c}\x{306a}\x{3044}\x{3002}iphot\x{307e}\x{3067}\x{3060}\x{3081}\x{3063}\x{3066}\x{3001}\x{306a}\x{3093}\x{3067}\x{ff1f}\x{30b5}\x{30dd}\x{30fc}\x{30c8}\x{306e}\x{304a}\x{59c9}\x{3055}\x{3093}\x{983c}\x{308a}\x{306b}\x{306a}\x{3089}\x{305a}\x{3002}\x{3068}\x{307b}\x{307b}\x{3001}\x{81ea}\x{696d}\x{81ea}\x{5f97}\x{304b}\x{3002} "
                                                  },
                                                  {
                                                    'date' => '07-Mar-2012',
                                                    'message' => " MacBook 2008Early(4GB)\x{306b}\x{30a4}\x{30f3}\x{30b9}\x{30c8}\x{30fc}\x{30eb}\x{3057}\x{305f}\x{5834}\x{5408}\x{306e}\x{611f}\x{60f3}\x{3067}\x{3059}\x{3002}\x{30b9}\x{30bf}\x{30c3}\x{30af}\x{3084}\x{30cd}\x{30c3}\x{30c8}\x{30b5}\x{30fc}\x{30d5}\x{30a3}\x{30f3}\x{3001}\x{30c1}\x{30e3}\x{30c3}\x{30c8}\x{7b49}\x{3001}\x{4f55}\x{6545}\x{304b}\x{30ec}\x{30b9}\x{30dd}\x{30f3}\x{30b9}\x{306b}\x{4e00}\x{547c}\x{5438}\x{9593}\x{304c}\x{5165}\x{308a}SnowLeopard\x{306b}\x{6bd4}\x{3079}\x{82e5}\x{5e72}\x{306e}\x{30b9}\x{30c8}\x{30ec}\x{30b9}\x{304c}\x{3042}\x{308a}\x{307e}\x{3057}\x{305f}\x{3002}\x{3059}\x{3067}\x{306b}10.7.3\x{3067}\x{3059}\x{304c}\x{6539}\x{5584}\x{3055}\x{308c}\x{3066}\x{3044}\x{306a}\x{3044}\x{3068}\x{3044}\x{3046}\x{306e}\x{306f}\x{30cf}\x{30fc}\x{30c9}\x{9762}\x{3067}\x{9650}\x{754c}\x{304c}\x{6765}\x{3066}\x{3044}\x{308b}\x{3068}\x{3044}\x{3046}\x{3053}\x{3068}\x{306a}\x{306e}\x{3067}\x{3057}\x{3087}\x{3046}\x{3002}\x{307e}\x{305f}\x{3001}MBP\x{4ee5}\x{4e0a}\x{306e}PC\x{3067}\x{306f}\x{306a}\x{3044}\x{305f}\x{3081}\x{30d5}\x{30ea}\x{30c3}\x{30af}\x{3067}\x{306e}\x{64cd}\x{4f5c}\x{306f}SnowLeopard\x{304b}\x{3089}\x{5909}\x{5316}\x{306f}\x{3042}\x{308a}\x{307e}\x{305b}\x{3093}\x{3067}\x{3057}\x{305f}\x{3002}\x{305d}\x{3057}\x{3066}\x{554f}\x{984c}\x{306f}\x{3001}Lion\x{306f}\x{6a19}\x{6e96}\x{3067}64BitOS\x{306b}\x{5bfe}\x{5fdc}\x{3057}\x{3066}\x{304a}\x{308a}Core2Duo\x{4ee5}\x{4e0a}\x{306e}PC\x{3067}\x{306a}\x{3044}\x{3068}\x{4f7f}\x{7528}\x{3067}\x{304d}\x{306a}\x{3044}\x{3068}\x{306a}\x{3063}\x{3066}\x{3044}\x{308b}\x{304c}\x{3001}MacBook 2008Early\x{306f}EFI\x{60c5}\x{5831}\x{7684}\x{306b}\x{3082}64bit\x{3067}\x{52d5}\x{4f5c}\x{3059}\x{308b}\x{306f}\x{305a}\x{306a}\x{306e}\x{306b}\x{30ed}\x{30c3}\x{30af}\x{304c}\x{639b}\x{3051}\x{3089}\x{308c}\x{3066}\x{304a}\x{308a}32bit\x{304b}\x{3089}\x{5909}\x{66f4}\x{4e0d}\x{53ef}\x{3067}\x{3042}\x{308b}\x{3053}\x{3068}\x{3002}\x{307e}\x{305f}\x{3001}\x{8d77}\x{52d5}\x{30ab}\x{30fc}\x{30cd}\x{30eb}\x{3092}\x{5909}\x{66f4}\x{3059}\x{308b}App\x{3067}\x{3082}\x{5909}\x{66f4}\x{304c}\x{4e0d}\x{53ef}\x{80fd}\x{3067}\x{3057}\x{305f}\x{3002}\x{ff08}Air\x{306f}Lion\x{5c0e}\x{5165}\x{6642}\x{306b}\x{30ab}\x{30fc}\x{30cd}\x{30eb}\x{304c}64bit\x{306b}\x{5909}\x{66f4}\x{3055}\x{308c}\x{3066}\x{3044}\x{305f}\x{3068}\x{3044}\x{3046}\x{4eba}\x{304c}\x{304a}\x{3089}\x{308c}\x{307e}\x{3057}\x{305f}\x{306e}\x{3067}\x{671f}\x{5f85}\x{3057}\x{3066}\x{3044}\x{305f}\x{306e}\x{3067}\x{3059}\x{304c}\x{30fb}\x{30fb}\x{30fb}\x{ff09}\x{73fe}\x{72b6}\x{3001}\x{30d6}\x{30e9}\x{30c3}\x{30af}\x{30e2}\x{30c7}\x{30eb}\x{6700}\x{5f8c}\x{306e}\x{6a5f}\x{7a2e}\x{306a}\x{306e}\x{3067}\x{5927}\x{4e8b}\x{306b}\x{4f7f}\x{3063}\x{3066}\x{3044}\x{3053}\x{3046}\x{3068}\x{601d}\x{3044}\x{307e}\x{3057}\x{305f}\x{304c}\x{305d}\x{308d}\x{305d}\x{308d}\x{6f6e}\x{6642}\x{306e}\x{3088}\x{3046}\x{3067}\x{3059}\x{3002} "
                                                  }
                                                ],
                                     'ident' => 'mac',
                                     'genre_id' => 12014,
                                     'review_order' => 4,
                                     'ratings' => {
                                                  'userRatingCount' => 1697,
                                                  'averageUserRatingForCurrentVersion' => '3.5',
                                                  'averageUserRating' => '4',
                                                  'userRatingCountForCurrentVersion' => 39
                                                },
                                     'price' => '2600'
                                   },
                           'us' => {
                                     'review_number' => 2,
                                     'total_rank' => undef,
                                     'store_code' => 143441,
                                     'app_name' => 'OS X Lion',
                                     'genre_rank' => undef,
                                     'store_name' => 'United States',
                                     'artist_id' => 284417353,
                                     'genre_name' => 'Productivity',
                                     'ranks' => {
                                                'total_rank_mac' => 1,
                                                'genre_rank_mac' => 1,
                                                'genre_rank_ipad' => undef,
                                                'total_rank_ipad' => undef,
                                                'total_rank_iphone' => undef,
                                                'genre_rank_iphone' => undef
                                              },
                                     'lang' => 9,
                                     'reviews' => [
                                                  {
                                                    'date' => 'Mar 12, 2012',
                                                    'message' => ' The current version (10.7.3) causes my computer to crash sometimes 3 or more times a day while doing simple tasks, E.G. using Safari to watch a movie preview on the Apple Trailer site. Please Fix!iCould synchronizing and new track pad gestures are great. Using a 2.66 GHz Intel Core i7 2010 MacBook Pro with 8 Gigs of ram. '
                                                  },
                                                  {
                                                    'date' => 'Mar 12, 2012',
                                                    'message' => ' Not ready for prime time yet. Not sure it is worth upgrading. 1. Pages and Nubmers (Apple Products) do NOT work after the upgrade. The error message is "You can't open the application Pages.app because PowerPC applications are no longer supported? Huh?? This is an Aapple product I purchased.2. My Word and Excel not longer work. They are the Apple version of Work and Excel. Will Apple reimburse me for taking away my purchased programs?If I could undo this I would. Apple is really sticking it to users. You can't move to iCould without upgrading to Lion. And you are forced to change to iCould because MobileMe is being terminated. We're kind of stuck between a rock and a hard place now. :-( And iCloud does not support iWeb. I was such a fan of Apple until now. '
                                                  }
                                                ],
                                     'ident' => 'mac',
                                     'genre_id' => 12014,
                                     'review_order' => 4,
                                     'ratings' => {
                                                  'userRatingCount' => 24648,
                                                  'averageUserRatingForCurrentVersion' => '4',
                                                  'averageUserRating' => '4',
                                                  'userRatingCountForCurrentVersion' => 382
                                                },
                                     'price' => '29.99'
                                   }
                         }
        };

ソースはこちら

package AppStore::Scraper;

use strict;
use utf8;
use warnings;
use Data::Dumper;

use LWP::UserAgent;
use XML::Simple;
use JSON;
use Web::Scraper;

sub new {
    my $class = shift;
    my @args = @_;
    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};

    my $self = bless{}, ref $class || $class;

    $self->{__STORE_CODES} = _init_countries();
    $self->{__URL_PREF} = 'http://ax.itunes.apple.com/WebObjects/';
    $self->{__SCRAPING_URL_PREF}  = $self->{__URL_PREF} . 'MZStore.woa/wa/';
    $self->{__SEARCHAPI_URL_PREF} = $self->{__URL_PREF} . 'MZStoreServices.woa/wa/';
    #$self->{__URL_PREF} = 'http://itunes.apple.com/WebObjects/MZStore.woa/wa/';
    $self->{__UA} = 'iTunes/9.1.1 (Macintosh; Intel Mac OS X 10.6.3';
    #$self->{__UA} = 'MacAppStore/1.0.2 (Macintosh; Intel Mac OS X 10.6.8';

    $self->{ua} = LWP::UserAgent->new();
    $self->{ua}->timeout(30);
    $self->{ua}->env_proxy;
    $self->{ua}->agent( $self->{__UA} );
    $self->{__WAIT} = $args_ref->{wait} || '1';

#    $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::PurePerl';
    $self->{__XML_PREFERRED_PARSER} = 'XML::Parser';
#    $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::Expat';
#    $self->{__XML_PREFERRED_PARSER} = 'XML::LibXML::SAX';

    $self;
}

sub app_info {
    my $self = shift;
    my @args = @_;

    # get info from app page
    my $base = $self->app_base_info( @args );

    my $ret = {};
    for my $app ( keys %$base ) {
        for my $store ( keys %{$base->{$app}} ) {
            my $info = $base->{$app}->{$store};
            next unless $info->{genre_id};

            my $genre_rank_iphone;
            my $total_rank_iphone;
            my $genre_rank_ipad;
            my $total_rank_ipad;
            my $genre_rank_mac;
            my $total_rank_mac;

            if ( $info->{ident} eq 'mac' ) {
                $genre_rank_mac = $self->genre_rank(
                                                    app => $app,
                                                    info => $info
                                                   );
                $total_rank_mac = $self->total_rank(
                                                    app => $app,
                                                    info => $info
                                                   );
            }
            else {
                if ( $info->{ident} ne 'ipad' ) {
                    my $tmp = $info->{ident};
                    $info->{ident} = 'iphone';
                    $genre_rank_iphone = $self->genre_rank(
                                                           app => $app,
                                                           info => $info
                                                          );
                    $total_rank_iphone = $self->total_rank(
                                                           app => $app,
                                                           info => $info
                                                          );
                    $info->{ident} = $tmp;
                }
                if ( $info->{ident} ne 'iphone' ) {
                    my $tmp = $info->{ident};
                    $info->{ident} = 'ipad';
                    $genre_rank_ipad = $self->genre_rank(
                                                         app => $app,
                                                         info => $info
                                                        );
                    $total_rank_ipad = $self->total_rank(
                                                         app => $app,
                                                         info => $info
                                                        );
                    $info->{ident} = $tmp;
                }
            }
            my $reviews = $self->app_reviews(
                                             app => $app,
                                             info => $info
                                            );
            $ret->{$app}->{$store} = {
                                      %$info,
                                      genre_rank => $genre_rank_iphone,
                                      total_rank => $total_rank_iphone,
                                      ranks => {
                                                genre_rank_iphone => $genre_rank_iphone,
                                                total_rank_iphone => $total_rank_iphone,
                                                genre_rank_ipad => $genre_rank_ipad,
                                                total_rank_ipad => $total_rank_ipad,
                                                genre_rank_mac => $genre_rank_mac,
                                                total_rank_mac => $total_rank_mac,
                                               },
                                      reviews => $reviews,
                                      store_name => $self->{__STORE_CODES}->{$store}->{name},
                                     };
            sleep $self->{__WAIT};
        }
    }

    $ret;
}

sub app_base_info {
    my $self = shift;
    my @args = @_;

    my $args = $self->_validate_args(@args);

    my $ret = {};
    for my $app ( @{$args->{apps}} ) {
        for my $store ( keys %{$args->{stores}} ) {
            my $tmp;
            $tmp->{store_code} = $args->{stores}->{$store}->{code};
            $tmp->{lang} = $args->{lang};
            $tmp->{ident} = $args->{ident};
            $tmp->{review_number} = $args->{review_number};
            $tmp->{review_order} = $args->{review_order};

            my $store_tmp = $store eq 'uk' ? 'gb' : $store;
            my $uri = $self->{__SEARCHAPI_URL_PREF} . 'wsLookup?id='.$app.'&entity=software&country='.$store_tmp;
            my $res = $self->{ua}->get( $uri );
            # Error Check
            unless ( $res->is_success ) {
                warn 'request failed: ', $uri, ': ', $res->status_line, ': ', $store, '-', $args->{lang};
                next;
            }
            my $jsondata = $res->content;
            if ( utf8::is_utf8($jsondata) ) {
                utf8::encode($jsondata);
            }

            my $hash;
            my $json = JSON->new->utf8;
            eval { $hash = $json->decode($jsondata) };

            #
            # genre_id, artist_id, app_name, genre_name
            #

            $tmp->{genre_id}  = $hash->{results}->[0]->{primaryGenreId};
            $tmp->{artist_id} = $hash->{results}->[0]->{artistId};
            $tmp->{app_name} = $hash->{results}->[0]->{trackName};
            if ( $tmp->{app_name} ) {
                $tmp->{app_name} =~ s/^\s+(.*)\s+$/$1/;
            }
            $tmp->{genre_name}  = $hash->{results}->[0]->{primaryGenreName};

            #
            # price
            #

            $tmp->{price}  = $hash->{results}->[0]->{price};


            #
            # star
            #

            $tmp->{ratings} = {
                              averageUserRating => $hash->{results}->[0]->{averageUserRating},
                              userRatingCount => $hash->{results}->[0]->{userRatingCount},
                              averageUserRatingForCurrentVersion => $hash->{results}->[0]->{averageUserRatingForCurrentVersion},
                              userRatingCountForCurrentVersion => $hash->{results}->[0]->{userRatingCountForCurrentVersion},
                             };

            $ret->{$app}->{$store} = $tmp;
        }
    }
    $ret;
}


#
# for rank
#

sub genre_rank {
    my $self = shift;
    my @args = @_;

    $self->_get_rank(@args);
}

sub total_rank {
    my $self = shift;
    my @args = @_;

    $self->_get_rank(@args);
}

sub _rank_uri {
    my $self = shift;
    my $price = shift;
    my $ident = shift;

    # iphone 30:27, ipad 47:44, mac 30:27
    my $popId = $price ? 30: 27;
    $popId += 17 if $ident eq 'ipad';
    my $id = 25209;
    $id = 29562 if $ident eq 'mac';
    my $uri = $self->{__SCRAPING_URL_PREF} . 'viewTop?id=' . $id . '&popId='. $popId;

    $uri;
}

sub _get_rank {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
    my $caller =  (caller(1))[3];

    my $info;
    if ( $args_ref->{info} ) {
        $info = $args_ref->{info};
    }
    else {
        my $base_info = $self->app_base_info($args_ref);
        $info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
    }
    my $uri = $self->_rank_uri( $info->{price}, $info->{ident} );
    $uri .= '&genreId=' . $info->{genre_id} if $caller =~ /genre_rank$/;
    my $ret;
    my @arrays;
    if ( $info->{ident} ne 'mac' ) {
        my $xmlobj = $self->_get_xml($uri, $info->{store_code}, $info->{lang});
        @arrays = split /\n+/, Dumper($xmlobj->{View}->{ScrollView}->{VBoxView}->{View});
    }
    else {
        my $html = $self->_get_html($uri, $info->{store_code}, $info->{lang});
        my $rule = scraper {
            process '//div[@class="lockup small detailed option application mac-application"]','apps[]' => 'HTML';
        };
        my $apps;
        eval { $apps = $rule->scrape($html); };
        if ( $@ ) {
            warn $info->{store_code},',',$info->{lang},',',$@;
        }
        else {
            @arrays = @{ $apps->{apps} };
        }
    }
    my $i;
    for ( @arrays ) {
        next unless /salableAdamId=(\d+)/;
        $i++;
        next unless $1 == $args_ref->{app};
        $ret = $i;
        last;
    }

    $ret;
}

#
# for reviews
#

sub app_reviews {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
    my $ret = [];

    my $info;
    if ( $args_ref->{info} ) {
        $info = $args_ref->{info};
    }
    else {
        my $base_info = $self->app_base_info($args_ref);
        $info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
    }

    my $order = $info->{review_order};
    my $uri = $self->{__SCRAPING_URL_PREF} . 'customerReviews?page=1&id='.$args_ref->{app}.'&displayable-kind=30&appVersion=current&sort='.$order;

    $uri =~ s|sort=\d+|sort=$order|x;

    # pagenation
    if ( $uri =~ /(?:\?|&)page=\d+/ ) {
        my $i = 1;
        while ( scalar(@$ret) <= $info->{review_number} ) {
            $uri =~ s|page=\d+|page=$i|;
            my $tmp = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
            last unless scalar(@$tmp);
            $ret = [@$ret, @$tmp];
            $i++;
        }
    }
    else {
        $ret = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
    }

    @$ret = splice @$ret, 0, $info->{review_number};

    $ret;
}

sub _app_reviews {
    my $self = shift;
    my $uri = shift;
    my $store_code = shift;
    my $lang = shift;

    my $ret = [];

    my $html = $self->_get_html($uri, $store_code, $lang);
    my $rule = scraper {
        process '//div[@class="paginate current-reviews"]//div[@class="da-review customer-review"]','reviews[]' => scraper {
            process '//span[@class="user-info"]', date => 'HTML';
            process '//p[@class="content"]', message => 'TEXT';
        };
    };

    my $reviews;
    eval { $reviews = $rule->scrape($html); };
    if ( $@ ) {
        warn $store_code,',',$lang,',',$@;
    }
    else {
        for my $review ( @{ $reviews->{reviews} } ) {
            my @tmps = split / - /, $review->{date};
            push @$ret, {
                         message => $review->{message},
                         date => pop @tmps,
                        }
        }
    }

    $ret;
}

#
# common
#

sub _validate_args {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};

    #
    # prepare array by target apps
    #

    die 'app code MUST be needed' unless $args_ref->{app};

    my @appcode = ref $args_ref->{app} eq 'ARRAY' ? @{$args_ref->{app}}
        : ($args_ref->{app});
    for (@appcode) {
        die 'app code MUST be numerical: ',$_ unless m|^\d+$|;
    }
    my $apps_array = [@appcode];

    #
    # prepare array by target countries
    #

    my $stores_hash;
    if ( $args_ref->{store} ) {
        my @storename = ref $args_ref->{store} eq 'ARRAY' ? @{$args_ref->{store}}
            : ($args_ref->{store});
        for ( @storename ) {
            my $s = lc $_;
            if ( exists $self->{__STORE_CODES}->{ $s } ) {
                $stores_hash->{ $s } = $self->{__STORE_CODES}->{ $s };
            }
            else {
                die 'cannot found appstore on "', $s, '"';
            }
        }
    }
    else {
        $stores_hash = $self->{__STORE_CODES};
    }

    #
    # prepare digit by target lang
    #

    my $lang = ( exists $args_ref->{lang} and $args_ref->{lang} =~ /^\d+$/ ) ? $args_ref->{lang} : 1;

    #
    # prepare identifier
    #
#    my $ident = ( exists $args_ref->{ident} and $args_ref->{ident} eq 'ipad' ) ? 'ipad' : 'iphone';
    my $ident = 'both';
    if ( exists $args_ref->{ident} ) {
        if ( $args_ref->{ident} eq 'ipad' ) {
            $ident = 'ipad';
        }
        elsif ( $args_ref->{ident} eq 'iphone' ) {
            $ident = 'iphone';
        }
        elsif ( $args_ref->{ident} eq 'mac' ) {
            $ident = 'mac';
        }
    }

    #
    # prepare reviews max number
    #

    my $review_number = ( exists $args_ref->{review_number} and $args_ref->{review_number} =~ /^\d+$/ ) ? $args_ref->{review_number} :25;

    #
    # prepare reviews order
    #     1..Most Helpful
    #     2..Most Favourable
    #     3..Most Critical
    #     4..Most Recent
    #

    my $review_order = ( exists $args_ref->{review_order} and $args_ref->{review_order} =~ /^\d+$/ ) ? $args_ref->{review_order} :1;

    return {
            apps => $apps_array,
            stores => $stores_hash,
            lang => $lang,
            ident => $ident,
            review_number => $review_number,
            review_order => $review_order,
           };
}

sub _get_xml {
    my $self = shift;
    my ($uri,$store,$lang) = @_;

    $self->{ua}->default_header('X-Apple-Store-Front' => $store . '-' . $lang);
    my $res = $self->{ua}->get( $uri );

    # Error Check
    unless ( $res->is_success ) {
        warn 'request failed: ', $uri, ': ', $res->status_line, ': ', $store, '-', $lang;
        return;
    }
    unless ( $res->headers->header('Content-Type') =~ m|/xml| ) {
        warn 'content is not xml: ', $uri, ': ', $res->headers->header('Content-Type'), ': ', $store, '-', $lang;
        return;
    }
    local $XML::Simple::PREFERRED_PARSER = $self->{__XML_PREFERRED_PARSER};
    my $xmlobj = XMLin( $res->content );

    $xmlobj;
}

sub _get_html {
    my $self = shift;
    my ($uri,$store,$lang) = @_;

    $self->{ua}->default_header('X-Apple-Store-Front' => $store . '-' . $lang . ',13');
    my $res = $self->{ua}->get( $uri );

    # Error Check
    unless ( $res->is_success ) {
        warn 'request failed: ', $uri, ': ', $res->status_line, ': ', $store, '-', $lang;
        return;
    }
    unless ( $res->headers->header('Content-Type') =~ m|/html| ) {
        warn 'content is not html: ', $uri, ': ', $res->headers->header('Content-Type'), ': ', $store, '-', $lang;
        return;
    }

    $res->decoded_content;
}

sub _init_countries {

    my $c = {
             jp => {
                    name => 'Japan',
                    code => 143462,
                   },
             us => {
                    name => 'United States',
                    code => 143441,
                   },
             ar => {
                    name => 'Argentine',
                    code => 143505,
                   },
             au => {
                    name => 'Autstralia',
                    code => 143460,
                   },
             be => {
                    name => 'Belgium',
                    code => 143446,
                   },
             br => {
                    name => 'Brazil',
                    code => 143503,
                   },
             ca => {
                    name => 'Canada',
                    code => 143455,
                   },
             cl => {
                    name => 'Chile',
                    code => 143483,
                   },
             cn => {
                    name => 'China',
                    code => 143465,
                   },
             co => {
                    name => 'Colombia',
                    code => 143501,
                   },
             cr => {
                    name => 'Costa Rica',
                    code => 143495,
                   },
             hr => {
                    name => 'Croatia',
                    code => 143494,
                   },
             cz => {
                    name => 'Czech Republic',
                    code => 143489,
                   },
             dk => {
                    name => 'Denmark',
                    code => 143458,
                   },
             de => {
                    name => 'Germany',
                    code => 143443,
                   },
             sv => {
                    name => 'El Salvador',
                    code => 143506,
                   },
             es => {
                    name => 'Spain',
                    code => 143454,
                   },
             fi => {
                    name => 'Finland',
                    code => 143447,
                   },
             fr => {
                    name => 'France',
                    code => 143442,
                   },
             gr => {
                    name => 'Greece',
                    code => 143448,
                   },
             gt => {
                    name => 'Guatemala',
                    code => 143504,
                   },
             hk => {
                    name => 'Hong Kong',
                    code => 143463,
                   },
             hu => {
                    name => 'Hungary',
                    code => 143482,
                   },
             in => {
                    name => 'India',
                    code => 143467,
                   },
             id => {
                    name => 'Indonesia',
                    code => 143476,
                   },
             ie => {
                    name => 'Ireland',
                    code => 143449,
                   },
             il => {
                    name => 'Israel',
                    code => 143491,
                   },
             it => {
                    name => 'Italia',
                    code => 143450,
                   },
             kr => {
                    name => 'Korea',
                    code => 143466,
                   },
             kw => {
                    name => 'Kuwait',
                    code => 143493,
                   },
             lb => {
                    name => 'Lebanon',
                    code => 143497,
                   },
             lu => {
                    name => 'Luxembourg',
                    code => 143451,
                   },
             my => {
                    name => 'Malaysia',
                    code => 143473,
                   },
             mx => {
                    name => 'Mexico',
                    code => 143468,
                   },
             nl => {
                    name => 'Nederland',
                    code => 143452,
                   },
             nu => {
                    name => 'New Zealand',
                    code => 143461,
                   },
             no => {
                    name => 'Norway',
                    code => 143457,
                   },
             at => {
                    name => 'Osterreich',
                    code => 143445,
                   },
             pk => {
                    name => 'Pakistan',
                    code => 143477,
                   },
             pa => {
                    name => 'Panama',
                    code => 143485,
                   },
             pe => {
                    name => 'Peru',
                    code => 143507,
                   },
             ph => {
                    name => 'Phillipines',
                    code => 143474,
                   },
             pl => {
                    name => 'Poland',
                    code => 143478,
                   },
             pt => {
                    name => 'Portugal',
                    code => 143453,
                   },
             qa => {
                    name => 'Qatar',
                    code => 143498,
                   },
             ro => {
                    name => 'Romania',
                    code => 143487,
                   },
             ru => {
                    name => 'Russia',
                    code => 143469,
                   },
             sa => {
                    name => 'Saudi Arabia',
                    code => 143479,
                   },
             ch => {
                    name => 'Switzerland',
                    code => 143459,
                   },
             sg => {
                    name => 'Singapore',
                    code => 143464,
                   },
             sk => {
                    name => 'Slovakia',
                    code => 143496,
                   },
             si => {
                    name => 'Slovenia',
                    code => 143499,
                   },
             za => {
                    name => 'South Africa',
                    code => 143472,
                   },
             lk => {
                    name => 'Sri Lanka',
                    code => 143486,
                   },
             se => {
                    name => 'Sweden',
                    code => 143456,
                   },
             tw => {
                    name => 'Taiwan',
                    code => 143470,
                   },
             th => {
                    name => 'Thailand',
                    code => 143475,
                   },
             tr => {
                    name => 'Turkey',
                    code => 143480,
                   },
             ae => {
                    name => 'United Arab Emirates',
                    code => 143481,
                   },
             uk => {
                    name => 'United Kingdom',
                    code => 143444,
                   },
             ve => {
                    name => 'Venezuela',
                    code => 143502,
                   },
             vn => {
                    name => 'Vietnam',
                    code => 143471,
                   },
            };
}

1;

Home > Perl > AppStore::Scraper アップデート

Feeds

Return to page top