'."\n");?> AppStore のレビューとか順位とか - 勤務先と無関係なことを書く日記

Home > Perl > AppStore のレビューとか順位とか

AppStore のレビューとか順位とか

仕事でちょっと作ったスクリプトが、一年を経過しエライことになってきて、メンテ不能になってきた。XML からデタラメに引っこ抜いてるので頻繁に色々修正せなあかんしね
で、せめてこんな感じで使えるようにしたいなぁ、と

use AppStore::Scraper;
use Data::Dumper;

my $obj = AppStore::Scraper->new(wait => 5);
my $info = $obj->app_info(
                          app => ['322894440','331069023','331225704'],
                          store => ['jp','us'],
                          lang => 9,
                          ident => 'ipad',
                          review_number => 100,
                          review_order => 4
                         );

warn Dumper $info;

仕事中にこんなことやってるのも時間もったいないので、ひとまず家でガワだけ作った。中身は整理しきれてないけど、動いているので、ベタッと貼っとく。長いけど
動いているだけなので、誰かキレイに修正して ><

2010/06/18: bug fix しましたよ
2010/06/23: wait 秒数指定できるようにしたよ
2011/01/04: iPhone / iPad を指定できるようにしたよ
2011/02/03: レビューの取得件数とオーダーを指定できるようにしたよ
2011/02/17: その国のストアで公開されてない App の場合はスルーするようにしたよ

package AppStore::Scraper;

use strict;
use utf8;
use warnings;
use Data::Dumper;

use LWP::UserAgent;
use XML::Simple;

sub new {
    my $class = shift;
    my @args = @_;
    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};

    my $self = bless{}, ref $class || $class;

    $self->{__STORE_CODES} = _init_countries();
    $self->{__URL_PREF} = 'http://ax.itunes.apple.com/WebObjects/MZStore.woa/wa/';
    $self->{__UA} = 'iTunes/9.1.1 (Macintosh; Intel Mac OS X 10.6.3',

    $self->{ua} = LWP::UserAgent->new();
    $self->{ua}->timeout(30);
    $self->{ua}->env_proxy;
    $self->{ua}->agent( $self->{__UA} );
    $self->{__WAIT} = $args_ref->{wait} || '1';

#    $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::PurePerl';
    $self->{__XML_PREFERRED_PARSER} = 'XML::Parser';
#    $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::Expat';
#    $self->{__XML_PREFERRED_PARSER} = 'XML::LibXML::SAX';

    $self;
}

sub app_info {
    my $self = shift;
    my @args = @_;

    # get info from app page
    my $base = $self->app_base_info( @args );

    my $ret = {};
    for my $app ( keys %$base ) {
        for my $store ( keys %{$base->{$app}} ) {
            my $info = $base->{$app}->{$store};
            my $genre_rank = $self->genre_rank(
                                               app => $app,
                                               info => $info
                                              );
            my $total_rank = $self->total_rank(
                                               app => $app,
                                               info => $info
                                              );
            my $reviews = $self->app_reviews(
                                             app => $app,
                                             info => $info
                                            );

            $ret->{$app}->{$store} = {
                                      %$info,
                                      genre_rank => $genre_rank,
                                      total_rank => $total_rank,
                                      reviews => $reviews,
                                      store_name => $self->{__STORE_CODES}->{$store}->{name},
                                     };
            sleep $self->{__WAIT};
        }
    }

    $ret;
}

sub app_base_info {
    my $self = shift;
    my @args = @_;

    my $args = $self->_validate_args(@args);

    my $ret = {};
    for my $app ( @{$args->{apps}} ) {
        for my $store ( keys %{$args->{stores}} ) {
            my $tmp;

            $tmp->{store_code} = $args->{stores}->{$store}->{code};
            $tmp->{lang} = $args->{lang};
            $tmp->{ident} = $args->{ident};
	    $tmp->{review_number} = $args->{review_number};
	    $tmp->{review_order} = $args->{review_order};

            my $uri = $self->{__URL_PREF} . 'viewSoftware?id=' . $app . '&mt=8';
            my $xmlobj = $self->_get_xml($uri, $tmp->{store_code}, $tmp->{lang});
            for ( split /\n/, Dumper( $xmlobj ) ) {
                if ( /'(http:\/\/[^']+)'/ ) {
                    $uri = $1;
                    last;
                }
            }
            next unless $uri =~ m|^http://(?:ax\.)?itunes\.apple\.com|;
            $xmlobj = $self->_get_xml($uri, $tmp->{store_code}, $tmp->{lang});

            #
            # genre_id, artist_id, app_name, genre_name
            #

            next unless exists $xmlobj->{iTunes};
            $tmp->{genre_id}  = $xmlobj->{genreId};
            $tmp->{artist_id} = $xmlobj->{artistId};
            ($tmp->{app_name} = $xmlobj->{iTunes}) =~ s/^\s+(.*)\s+$/$1/;

            for my $c ( @{ $xmlobj->{Path}->{PathElement} } ) {
                if ( $c->{content} =~ m|/genre/(?:.*/)?id$tmp->{genre_id}| ) {
                    $tmp->{genre_name} = $c->{displayName};
                    last;
                }
#                elsif ( $c->{content} =~ m|/app/(?:.*/)?id$app|  ) {
#                    $tmp->{app_name} = $c->{displayName};
#                }
            }

            #
            # price
            #

            for ( split /\n/, Dumper( $xmlobj->{View} ) ) {
                if ( /buyParams.*price=(\d+)/ ) {
                    $tmp->{price} = $1;
                    last;
                }
            }

            my $treetmp = $xmlobj->{View}->{ScrollView}->{VBoxView}->{View}->{MatrixView}->{VBoxView}->[0]->{View}->{MatrixView}->{VBoxView}->[0]->{VBoxView}->[1];

            #
            # review
            #

            if ( $treetmp->{VBoxView}->{HBoxView} ) {
                $tmp->{review_url} = $treetmp->{VBoxView}->{HBoxView}->[0]->{VBoxView}->{HBoxView}->[0]->{VBoxView}->[0]->{GotoURL}->{url};
            }

            #
            # star
            #

            $tmp->{stars} = [];
            if ( ref $treetmp->{View} eq 'HASH' ) {
                for my $i ( 0 .. 4 ) {
                    my $s = $treetmp->{View}->{View}->{View}->{VBoxView}->{Test};
                    if ( ref $s eq 'ARRAY' ) {
                        eval{$tmp->{stars}->[4-$i] = $s->[1]->{VBoxView}->[0]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; # p1 obsolete ?
                    }
                    elsif ( ref $s eq 'HASH' ) {
                        eval{$tmp->{stars}->[4-$i] = $s->{VBoxView}->[1]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; # p2
                        unless ( $tmp->{stars}->[4-$i] ) {
                            for ( keys %$s ) {
                                next unless ( ref $s->{$_} eq 'HASH' );
                                eval{$tmp->{stars}->[4-$i] = $s->{$_}->{VBoxView}->[0]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; #p3
                                last if $tmp->{stars}->[4-$i];
                            }
                        }
                    }
                }
            }
            delete $tmp->{stars} unless ( scalar @{$tmp->{stars}} );
            delete $tmp->{stars} if ( ! $tmp->{stars}->[0]
                                      and ! $tmp->{stars}->[1]
                                      and ! $tmp->{stars}->[2]
                                      and ! $tmp->{stars}->[3]
                                      and ! $tmp->{stars}->[4]
                                    );
            $ret->{$app}->{$store} = $tmp;
        }
    }
    $ret;
}


#
# for rank
#

sub genre_rank {
    my $self = shift;
    my @args = @_;

    $self->_get_rank(@args);
}

sub total_rank {
    my $self = shift;
    my @args = @_;

    $self->_get_rank(@args);
}

sub _rank_uri {
    my $self = shift;
    my $price = shift;
    my $ident = shift;

    # iphone 30:27, ipad 47:44
    my $popId = $price ? 30: 27;
    $popId += 17 if $ident eq 'ipad';
    my $uri = $self->{__URL_PREF} . 'viewTop?id=25209&popId='. $popId;

    $uri;
}

sub _get_rank {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
    my $caller =  (caller(1))[3];

    my $info;
    if ( $args_ref->{info} ) {
        $info = $args_ref->{info};
    }
    else {
        my $base_info = $self->app_base_info($args_ref);
        $info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
    }
    my $uri = $self->_rank_uri( $info->{price}, $info->{ident} );
    $uri .= '&genreId=' . $info->{genre_id} if $caller =~ /genre_rank$/;

    my $ret;

    my $xmlobj = $self->_get_xml($uri, $info->{store_code}, $info->{lang});
    my @arrays = split /\n+/, Dumper($xmlobj->{View}->{ScrollView}->{VBoxView}->{View});

    my $i;
    for ( @arrays ) {
        next unless /salableAdamId=(\d+)/;
        $i++;
        next unless $1 == $args_ref->{app};
        $ret = $i;
        last;
    }

    $ret;
}

#
# for reviews
#

sub app_reviews {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
    my $ret = [];

    my $info;
    if ( $args_ref->{info} ) {
        $info = $args_ref->{info};
    }
    else {
        my $base_info = $self->app_base_info($args_ref);
        $info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
    }

    my $order = $info->{review_order};
    my $uri = $info->{review_url} || $self->{__URL_PREF} . 'viewContentsUserReviews?pageNumber=0&type=Purple+Software&id='.$args_ref->{app}.'&sortOrdering='.$order;

    $uri =~ s|sortOrdering=\d+|sortOrdering=$order|x;

    # pagenation
    if ( $uri =~ /(?:\?|&)pageNumber=\d+/ ) {
	my $i = 0;
	while ( scalar(@$ret) <= $info->{review_number} ) {
	    $uri =~ s|pageNumber=\d+|pageNumber=$i|;
	    my $tmp = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
	    last unless scalar(@$tmp);
	    $ret = [@$ret, @$tmp];
	    $i++;
	}
    }
    else {
	$ret = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
    }

    @$ret = splice @$ret, 0, $info->{review_number};

    $ret;
}

sub _app_reviews {
    my $self = shift;
    my $uri = shift;
    my $store_code = shift;
    my $lang = shift;

    my $ret = [];

    my $xmlobj = $self->_get_xml($uri, $store_code, $lang);
    my $treetmp = $xmlobj->{View}->{ScrollView}->{VBoxView}->{View}->{MatrixView}->{VBoxView}->[0]->{VBoxView}->{VBoxView};

    if ( ref $treetmp eq 'HASH' ) {
        my($date, $mes) = $self->_get_review_message( $treetmp );
        push @$ret, {
                     message => $mes,
                     date => $date,
                    };
    }
    elsif ( ref $treetmp eq 'ARRAY' ) {
        for ( @$treetmp ) {
            my($date, $mes) = $self->_get_review_message( $_ );
            push @$ret, {
                         message => $mes,
                         date => $date,
                        };
        }
    }

    $ret;
}

sub _get_review_message {
    my $self = shift;
    my $args = shift;

    my $mes = $args->{TextView}->{SetFontStyle}->{content};
    my $tmp = $args->{HBoxView}->[1]->{TextView}->{SetFontStyle}->{content} || '';
    my $datetmp = ref $tmp eq 'ARRAY' ? $tmp->[scalar(@$tmp) -1] : $tmp;
    my $date;
    if ( $datetmp ) {
        chomp $datetmp;
        my @tmps =  split /\n\s+/, $datetmp;
        $date = pop @tmps;
    }
    if ( ref $mes eq 'ARRAY' ) {
        $mes = join "\n", @{$mes};
    }

    return ($date, $mes);
}

#
# common
#

sub _validate_args {
    my $self = shift;
    my @args = @_;

    my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};

    #
    # prepare array by target apps
    #

    die 'app code MUST be needed' unless $args_ref->{app};

    my @appcode = ref $args_ref->{app} eq 'ARRAY' ? @{$args_ref->{app}}
        : ($args_ref->{app});
    for (@appcode) {
        die 'app code MUST be numerical: ',$_ unless m|^\d+$|;
    }
    my $apps_array = [@appcode];

    #
    # prepare array by target countries
    #

    my $stores_hash;
    if ( $args_ref->{store} ) {
        my @storename = ref $args_ref->{store} eq 'ARRAY' ? @{$args_ref->{store}}
            : ($args_ref->{store});
        for ( @storename ) {
            my $s = lc $_;
            if ( exists $self->{__STORE_CODES}->{ $s } ) {
                $stores_hash->{ $s } = $self->{__STORE_CODES}->{ $s };
            }
            else {
                die 'cannot found appstore on "', $s, '"';
            }
        }
    }
    else {
        $stores_hash = $self->{__STORE_CODES};
    }

    #
    # prepare digit by target lang
    #

    my $lang = ( exists $args_ref->{lang} and $args_ref->{lang} =~ /^\d+$/ ) ? $args_ref->{lang} : 1;

    #
    # prepare identifier
    #

    my $ident = ( exists $args_ref->{ident} and $args_ref->{ident} eq 'ipad' ) ? 'ipad' : 'iphone';

    #
    # prepare reviews max number
    #

    my $review_number = ( exists $args_ref->{review_number} and $args_ref->{review_number} =~ /^\d+$/ ) ? $args_ref->{review_number} :25;

    #
    # prepare reviews order
    #     1..Most Helpful
    #     2..Most Favourable
    #     3..Most Critical
    #     4..Most Recent
    #

    my $review_order = ( exists $args_ref->{review_order} and $args_ref->{review_order} =~ /^\d+$/ ) ? $args_ref->{review_order} :1;

    return {
            apps => $apps_array,
            stores => $stores_hash,
            lang => $lang,
            ident => $ident,
	    review_number => $review_number,
	    review_order => $review_order,
           };
}

sub _get_xml {
    my $self = shift;
    my ($uri,$store,$lang) = @_;

    $self->{ua}->default_header('X-Apple-Store-Front' => $store . '-' . $lang);
    my $res = $self->{ua}->get( $uri );

    # Error Check
    unless ( $res->is_success ) {
        warn 'request failed: ', $uri, ': ', $res->status_line, ': ', $store, '-', $lang;
        next;
    }
    unless ( $res->headers->header('Content-Type') =~ m|/xml| ) {
        warn 'content is not xml: ', $uri, ': ', $res->headers->header('Content-Type'), ': ', $store, '-', $lang;
        next;
    }
    local $XML::Simple::PREFERRED_PARSER = $self->{__XML_PREFERRED_PARSER};
    my $xmlobj = XMLin( $res->content );

    $xmlobj;
}

sub _init_countries {

    my $c = {
             jp => {
                    name => 'Japan',
                    code => 143462,
                   },
             us => {
                    name => 'United States',
                    code => 143441,
                   },
             ar => {
                    name => 'Argentine',
                    code => 143505,
                   },
             au => {
                    name => 'Autstralia',
                    code => 143460,
                   },
             be => {
                    name => 'Belgium',
                    code => 143446,
                   },
             br => {
                    name => 'Brazil',
                    code => 143503,
                   },
             ca => {
                    name => 'Canada',
                    code => 143455,
                   },
             cl => {
                    name => 'Chile',
                    code => 143483,
                   },
             cn => {
                    name => 'China',
                    code => 143465,
                   },
             co => {
                    name => 'Colombia',
                    code => 143501,
                   },
             cr => {
                    name => 'Costa Rica',
                    code => 143495,
                   },
             hr => {
                    name => 'Croatia',
                    code => 143494,
                   },
             cz => {
                    name => 'Czech Republic',
                    code => 143489,
                   },
             dk => {
                    name => 'Denmark',
                    code => 143458,
                   },
             de => {
                    name => 'Germany',
                    code => 143443,
                   },
             sv => {
                    name => 'El Salvador',
                    code => 143506,
                   },
             es => {
                    name => 'Spain',
                    code => 143454,
                   },
             fi => {
                    name => 'Finland',
                    code => 143447,
                   },
             fr => {
                    name => 'France',
                    code => 143442,
                   },
             gr => {
                    name => 'Greece',
                    code => 143448,
                   },
             gt => {
                    name => 'Guatemala',
                    code => 143504,
                   },
             hk => {
                    name => 'Hong Kong',
                    code => 143463,
                   },
             hu => {
                    name => 'Hungary',
                    code => 143482,
                   },
             in => {
                    name => 'India',
                    code => 143467,
                   },
             id => {
                    name => 'Indonesia',
                    code => 143476,
                   },
             ie => {
                    name => 'Ireland',
                    code => 143449,
                   },
             il => {
                    name => 'Israel',
                    code => 143491,
                   },
             it => {
                    name => 'Italia',
                    code => 143450,
                   },
             kr => {
                    name => 'Korea',
                    code => 143466,
                   },
             kw => {
                    name => 'Kuwait',
                    code => 143493,
                   },
             lb => {
                    name => 'Lebanon',
                    code => 143497,
                   },
             lu => {
                    name => 'Luxembourg',
                    code => 143451,
                   },
             my => {
                    name => 'Malaysia',
                    code => 143473,
                   },
             mx => {
                    name => 'Mexico',
                    code => 143468,
                   },
             nl => {
                    name => 'Nederland',
                    code => 143452,
                   },
             nu => {
                    name => 'New Zealand',
                    code => 143461,
                   },
             no => {
                    name => 'Norway',
                    code => 143457,
                   },
             at => {
                    name => 'Osterreich',
                    code => 143445,
                   },
             pk => {
                    name => 'Pakistan',
                    code => 143477,
                   },
             pa => {
                    name => 'Panama',
                    code => 143485,
                   },
             pe => {
                    name => 'Peru',
                    code => 143507,
                   },
             ph => {
                    name => 'Phillipines',
                    code => 143474,
                   },
             pl => {
                    name => 'Poland',
                    code => 143478,
                   },
             pt => {
                    name => 'Portugal',
                    code => 143453,
                   },
             qa => {
                    name => 'Qatar',
                    code => 143498,
                   },
             ro => {
                    name => 'Romania',
                    code => 143487,
                   },
             ru => {
                    name => 'Russia',
                    code => 143469,
                   },
             sa => {
                    name => 'Saudi Arabia',
                    code => 143479,
                   },
             ch => {
                    name => 'Switzerland',
                    code => 143459,
                   },
             sg => {
                    name => 'Singapore',
                    code => 143464,
                   },
             sk => {
                    name => 'Slovakia',
                    code => 143496,
                   },
             si => {
                    name => 'Slovenia',
                    code => 143499,
                   },
             za => {
                    name => 'South Africa',
                    code => 143472,
                   },
             lk => {
                    name => 'Sri Lanka',
                    code => 143486,
                   },
             se => {
                    name => 'Sweden',
                    code => 143456,
                   },
             tw => {
                    name => 'Taiwan',
                    code => 143470,
                   },
             th => {
                    name => 'Thailand',
                    code => 143475,
                   },
             tr => {
                    name => 'Turkey',
                    code => 143480,
                   },
             ae => {
                    name => 'United Arab Emirates',
                    code => 143481,
                   },
             uk => {
                    name => 'United Kingdom',
                    code => 143444,
                   },
             ve => {
                    name => 'Venezuela',
                    code => 143502,
                   },
             vn => {
                    name => 'Vietnam',
                    code => 143471,
                   },
            };
}

1;

Comments:5

うめゆき 2010年6月19日 23:37

本題とそれますが、縦長のキャプチャってどうやってとってるんですの?

ひげまる Author Profile Page 2010年6月20日 01:48

ん? 縦長キャプチャって、どれのことですの?

うめゆき 2010年6月20日 10:00

609行ものプログラムの画面をどうやってとっているのかしらと思いまして。
コマンド+シフト+4 space なんですねー8-D

ひげまる Author Profile Page 2010年6月20日 11:14

あー、そういうことか
画像じゃないですよ? つ http://code.google.com/p/syntaxhighlighter/

うめゆき 2010年6月20日 16:13

お!参考になります。

Home > Perl > AppStore のレビューとか順位とか

Feeds

Return to page top