Perl AliExpress 产品信息、SKU、销量信息提取,导出EXCEL

There's more than one way to do it!
https://metacpan.org http://perlmonks.org
回复
头像
523066680
Administrator
Administrator
帖子: 573
注册时间: 2016年07月19日 12:14
联系:

Perl AliExpress 产品信息、SKU、销量信息提取,导出EXCEL

帖子 523066680 »

用途:线上数百个产品,底价、海外仓库存设置各不相同,在线编辑非常繁琐。
同时平台营销活动的折扣率也需要针对性设置。因此需要一份完整的产品数据表,包含每个产品的ID、主图、最新的SKU、销量信息。

Login.pm 模块需要自行实现,使用 Mojo::UserAgent 登录。
=info
2019-07 平台新版本 在线SKU数据、销量数据整合 生成EXCEL表
523066680/vicyang
V2 预先收集所有数据,排序后导出EXCEL,含产品主图
=cut

use Encode;
use Modern::Perl;
use File::Slurp;
use Mojo::UserAgent;
use Mojo::JSON qw/encode_json decode_json/;
use Data::Dump qw/dd/;
use File::Basename;
use Date::Format;
use Spreadsheet::WriteExcel;
use Imager;
use FindBin;
use lib "$FindBin::Bin/../lib";
use Login;
use SkuDetail;
STDOUT->autoflush(1);

mkdir "img" unless -e "img";

my $ua = Mojo::UserAgent->new();
Login::init($ua);

our %sales = (
"22881" => "nancy",
"22988" => "alice",
"18586" => "bob",
"22876" => "lucy",
"nancy" => "22881",
"alice" => "22988",
"bob" => "18586",
"lucy" => "22876",
);

my $seller = "nancy";
my $sid = $sales{$seller};
my $url = "https://gsp-gw.aliexpress.com/openapi/param2/1/gateway.seller/api.product.manager.render.list";
my $template = '{"filter":{"queryGroup":null,"queryCategory":null,"queryOwner":{"text":"seller_name","value":"seller_id"},"queryRegionalPricing":null,"queryStock":null,"queryShippingTemplate":null,"querySelectInput":{"key":1,"value":""}},"pagination":{"current":current_page,"pageSize":50},"table":{"sort":{}},"tab":"online_product"}';
my %args = ( 'jsonBody' => undef );
$template =~s/seller_name/$seller/;
$template =~s/seller_id/$sid/;

my $today = time2str("%Y-%m-%d", time());
my $excel = "${today} ${seller}.xls";

mkdir $today unless -e $today;

my $pgcode = 1;
my $total = 1;
my $list = [];
my $res;

while ( $pgcode <= $total )
{
say "Current Page: $pgcode";
$args{jsonBody} = $template;
$args{jsonBody} =~s/current_page/$pgcode/;
$res = $ua->post( $url, form => \%args )->result;
say "false" unless $res->is_success();

# 返回的 JSON 中的数据节点经过镶嵌封装,需要再次解码
my $data = decode_json( utf8($res->json->{data}) );
my $node = $data->{table}{dataSource};
get_list( $node, $list );

$total = $data->{pagination}{pageShowCount};
$pgcode++;
}

# 写入 Excel
write_excel( $list, $excel );

sub get_list
{
my ($node, $ref) = @_;
say "Abstract Data from JSON ... ";
for my $e ( @$node )
{
printf "%s\n", $e->{productId};
my $info = {
'id' => $e->{productId},
'subject' => match( $e->{itemDesc}{desc}, "uiType", "link", "text" ),
'img' => $e->{itemDesc}{img},
'group' => $e->{group}{desc}[0]{text},
};
get_detail($info);
push @$ref, $info;
}
}

sub get_detail
{
my ($ref) = @_;

my $html;
my $file = $today ."/". $ref->{id} .".html";
if (-e $file) {
$html = read_file( $file );
} else {
$html = SkuDetail::get_html( $ua, $ref->{id} );
write_file( $file, {binmode=>':raw'}, $html );
}

my $data = SkuDetail::extract_data( $html );

my $orders = SkuDetail::get_orders( $data );
my $wish = SkuDetail::get_wish_count( $data );
my ($sku, $slen) = SkuDetail::get_sku( $data );

$ref->{'orders'} = $orders;
$ref->{'wish'} = $wish;
$ref->{'sku'} = $sku;
# sku = { 'country' => [[color, qty, price], [...] ] }
}


sub init_sheet
{
my ($book, $group, $fmt) = @_;
my $sheet = $book->add_worksheet($group);

$sheet->set_column(0, 0, 4);
$sheet->set_column(1, 1, 22);
$sheet->set_column(2, 2, 22);
$sheet->set_column(3, 3, 50, undef, 1); #url
$sheet->set_column(4, 4, 18); # image
$sheet->set_column(5, 5, 7.5); #orders
$sheet->set_column(6, 6, 22); #colors
$sheet->set_column(9, 9, 22); #colors
$sheet->set_column(12, 12, 22); #colors
grep { $sheet->set_column($_, $_, 8); } (7,8,10,11,13,14); # SKU
#grep { $sheet->set_row($_, 30) } ( 1 .. 200);

$sheet->write( 0, 1, "Group", $fmt->{center});
$sheet->write( 0, 2, "ProductID", $fmt->{center});
$sheet->write( 0, 3, "Pictures", $fmt->{center});
$sheet->write( 0, 5, "Orders", $fmt->{center});
$sheet->write( 0, 6, "CN", $fmt->{center});
$sheet->write( 0, 9, "ES", $fmt->{center});
$sheet->write( 0, 12, "RU", $fmt->{center});

#$sheet->autofilter("A1:D200");
#$sheet->autofilter(0, 0, 0, 8);
return $sheet;
}

sub write_excel
{
our (%group, @groups, %groups_key);
my ($list, $excel) = @_;
my $book = Spreadsheet::WriteExcel->new($excel);
my $sheet = {};

say "Export to Excel ...";

my %font = ( font => 'Arial', size => 12 );
my %bold = ( font => 'Arial', size => 12, bold => 1 );
my %fmt;
$fmt{merge} = $book->add_format( %font, valign=>'vcenter', align=>'center' );
$fmt{url} = $book->add_format( %font, valign => 'vcenter', align => 'left', underline => 1, color => "blue");
$fmt{mg_url} = $book->add_format( %font, valign => 'vcenter', align => 'center', underline => 1, color => "blue");
$fmt{left} = $book->add_format( %font, valign => 'vcenter' );
$fmt{center} = $book->add_format( %font, valign => 'vcenter', align => 'center' );
$fmt{left}->set_text_wrap();
$fmt{sku} = $book->add_format( font=>'Arial', size=>12, valign=>'vcenter', align=>'left', text_wrap=>1);

#format
my $row = {};
my $iter = {};
for my $g ( @groups ) {
$sheet->{$g} = init_sheet( $book, $g, \%fmt );
$row->{$g} = 1;
$iter->{$g} = 1;
}

my $image;
my $link;
my $shref;
my $row_add;
for my $e ( sort { $b->{orders} <=> $a->{orders} } @$list )
{
say $e->{id};
my $g = $e->{group};
next if (not exists $groups_key{$g}); # 只处理指定分组

$shref = $sheet->{ $g };

# 根据颜色数量判断
my $colors = scalar( @{$e->{sku}{CN}} );
my $height = int(100/$colors);
$height = 20 if ($height < 20);
my $ofst = 0;
for my $s ( sort { $a->[0] cmp $b->[0] } @{$e->{sku}{CN}} ) {
$shref->set_row($row->{$g} + $ofst, $height );
$ofst++;
}

$link = "https://aliexpress.com/item//". $e->{id} .".html";
if ( $colors > 1 ) {
$shref->merge_range( $row->{$g}, 0, $row->{$g}+$colors-1, 0, $iter->{$g}, $fmt{merge} );
$shref->merge_range( $row->{$g}, 1, $row->{$g}+$colors-1, 1, $g, $fmt{merge} );
$shref->merge_range( $row->{$g}, 2, $row->{$g}+$colors-1, 2, $e->{id}, $fmt{merge} );
$shref->merge_range( $row->{$g}, 3, $row->{$g}+$colors-1, 3, $link, $fmt{mg_url} );
$shref->merge_range( $row->{$g}, 4, $row->{$g}+$colors-1, 4, '', $fmt{merge} );
$shref->merge_range( $row->{$g}, 5, $row->{$g}+$colors-1, 5, $e->{orders}, $fmt{merge} );
} else {
$shref->write( $row->{$g}, 0, $iter->{$g}, $fmt{center});
$shref->write( $row->{$g}, 1, $g, $fmt{center});
$shref->write( $row->{$g}, 2, $e->{id}, $fmt{center});
$shref->write_url( $row->{$g}, 3, $link, $e->{id}, $fmt{url});
$shref->write( $row->{$g}, 5, $e->{orders}, $fmt{center});
}

#$shref->write_url( $row->{$g}, 2, $link, $e->{id}, $fmt{url});
$image = get_image( $e->{img}, 200 ); # file
$shref->insert_image($row->{$g}, 4, $image, 5, 1, 0.6, 0.6 );
#$shref->write( $row->{$g}, 4, $e->{subject}, $fmt{left});

my %pos = ( 'CN'=>6, 'ES'=>9, "RU"=>12 );
for my $ct ( keys %{$e->{sku}} )
{
$ofst = 0;
for my $s ( sort { $a->[0] cmp $b->[0] } @{$e->{sku}{$ct}} )
{
$shref->write( $row->{$g} + $ofst, $pos{$ct}+0, $s->[0], $fmt{center});
$shref->write( $row->{$g} + $ofst, $pos{$ct}+1, $s->[1], $fmt{center});
$shref->write( $row->{$g} + $ofst, $pos{$ct}+2, $s->[2], $fmt{center});
$ofst++;
}
}

$row->{$g} += $colors;
$iter->{$g} ++;
}
$book->close();
}

sub get_image
{
my ($url, $pixel) = @_;

my $file = "./img/". basename($url);
my $res;

unless ( -e $file )
{
$res = $ua->get($url)->result;
say "get image false" unless ( $res->is_success() );
write_file( $file, {binmode=>'raw'}, $res->body );
}

my $read_image = Imager->new;
my $img = $read_image->read( file => $file );
my ($h, $w) = ( $img->getheight(), $img->getwidth() );
return $file if ( $w <= $pixel and $h <= $pixel );

say $file;
my $scale = $h > $w ? "ypixels" : "xpixels";
my $modify = $img->scale( $scale => $pixel );
$modify->write( file => $file );
return $file;
}

sub match
{
my ( $arr, $key, $value, $item ) = @_;
for my $e ( @$arr ) {
return $e->{$item} if ( exists $e->{$key} and $e->{$key} =~ /$value/ );
}
return "NULL";
}

sub gbk { encode('gbk', $_[0]); }
sub utf8 { encode('utf8', $_[0]); }
sub u2gbk { encode('gbk', decode('utf8', $_[0])); }

BEGIN
{
use Storable qw/retrieve/;
our %group = (
"509608" => "model1",
"509420" => "model2",
"515657" => "model3",
"515546" => "model4",
"508982" => "model5",
"509310" => "model6",
"510063" => "model7",
);

our @groups = (
"model1", "model2", "model3", "model4", "model5", "model6", "model7",
);

our %groups_key = ( map { $_ => 1 } @groups );
our $COUNTRY = retrieve("CountryName_EN2CN.perldb");
$COUNTRY->{'China'} = ['中国', 'CN'];
}
回复

在线用户

正浏览此版面之用户: Ahrefs [Bot] 和 0 访客