#!/bin/perl
#
# forward_scopus.pl: A script with forward references all the papers
# citing a given paper, A paper is identified by the exact match of the
# title (This title is got from the pubmed).
#
# vamsik@engr.uconn.edu , Apr 15, 2008.
#

use LWP::UserAgent;
use HTTP::Cookies;
#$#ARGV == 1 or die("Usage: perl forward_scopus.pl <search term's file>");

#
# Create the SCOPUS request.
#
$ua = LWP::UserAgent->new;
$ua->cookie_jar({});
=for
(HTTP::Cookies->new(
'file' => 'scopus_cookies.txt',
'autosave' => 1,
));
=cut

@ns_headers = (
'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
'Accept' => 'image/gif, image/x-xbitmap, image/jpeg,image/pjpeg, image/png, */*',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Language' => 'en-US',
);

#allow redirects for POST.
push @{ $ua->requests_redirectable }, 'POST';


$search_form = "http://www.scopus.com/scopus/search/form.url";
my $res = $ua->get($search_form,@ns_headers);
$form_name = "BasicValidatedSearchForm";
$res->is_success or die("query_scopus: FAILED:".$res->status_line."\n");
#
# Get the post location
#
$base_url = "http://www.scopus.com";

open(OUT_FILE,">out.html") or die("Error:$!");

print OUT_FILE $res->content;
close(OUT_FILE);
system("dos2unix -l out.html");
open(OUT_FILE,"out.html");

%param_hash;
while(<OUT_FILE>){
	$line = $_;
	if($line =~ /<form name="([\w]+)" method="[\w]+" action="([^"]*)"/){
		$name = $1;
		$action =$2;
		if($name eq $form_name){
			$post_url = $base_url.$action;
		}
	}
	if($line =~ /<input[^>]*? id="([^"]*)"[^>]*? value="([^"]*)"[^>]*?>/){
		$id = $1; $id_value = $2;
		$param_hash{$id} = $id_value;
	}elsif($line =~ /<input[^>]*? name="([^"]*?)"[^>]*? value="([^"]*?)"[^>]*?>/){ 
		$n=$1; $v=$2;
		$param_hash{$n} = $v;
	}
}
$param_hash{"searchterm1"}= $ARGV[0];
$param_hash{"subjects"}="LFSC";
$param_hash{"dateType"}="Publication_Date_Type";
$param_hash{"yearFrom"}="1960";
$param_hash{"yearTo"}="Present";
$param_hash{"field1"}="TITLE";

print "URL:$post_url\n";
$content_string = "";
foreach $key (keys %param_hash){
	$value = $param_hash{$key};
	if($content_string eq ""){
		$content_string = "$key=$value"; 
	}else{
		$content_string = $content_string."&$key=$value";
	}
}
print "POSTING THE CONTENT\n";
print "$content_string\n";

$post_req = HTTP::Request->new('POST',$post_url,\@ns_headers);
$post_req->content_type('application/x-www-form-urlencoded');
$post_req->content($content_string);

$res = $ua->request($post_req);

$res->is_success or die("Query to scopus failed:".$res->status_line."\n");

print "LOCATION:".$res->base();

if($res->status_line =~ /302/){
	$loc = $res->header("Content-Location");
	print "Redirecting To URI location: $loc";
}

if($res->is_error){
	print $res->error_as_HTML;
}
close(OUT_FILE);
open(OUT_FILE,">search_result.html");
print OUT_FILE $res->content;
close(OUT_FILE);
system("dos2unix -l search_result.html");

open(OUT_FILE,"search_result.html");
print "\n";

$content_string ="";
%param_hash = {};
#Create a New Content String...by reading search_results.html
while(<OUT_FILE>){
	$line = $_;
	if($line =~ /<input[^>]*? id="([^"]*)"[^>]*? value="([^"]*)"[^>]*?>/){
		$id = $1; $id_value = $2;
		$param_hash{$id} = $id_value;
	}elsif($line =~ /<input[^>]*? name="([^"]*?)"[^>]*? value="([^"]*?)"[^>]*?>/){ 
		$n=$1; $v=$2;
		$param_hash{$n} = $v;
	}
}
$param_hash{"displayPerPageFlag"}="t";
$param_hash{"resultsPerPage"}="200";
$param_hash{"sortField"}="cp-f";
foreach $key (keys %param_hash){
	$value = $param_hash{$key};
	if($content_string eq ""){
		$content_string = "$key=$value"; 
	}else{
		$content_string = $content_string."&$key=$value";
	}
}

#initial search will show only 20 results repost to the following URL
$results_post = "http://www.scopus.com/scopus/results/handle.url";

$post_req = HTTP::Request->new('POST',$results_post,\@ns_headers);
$post_req->content_type('application/x-www-form-urlencoded');
$post_req->content($content_string);
$res = $ua->request($post_req);

$res->is_success or die("Query to scopus failed:".$res->status_line."\n");

close(OUT_FILE);

open(OUT_FILE,">new_result.html");

print OUT_FILE $res->content;
close(OUT_FILE);
system("dos2unix -l new_result.html");
#
# First collect all the citations at depth 1
# and do a depth first search on all of these
#
@first_level_citations;
$level_1_count=0;
$has_more_citations=1;
$pages=1;
$results_post = "http://www.scopus.com/scopus/results/results.url";
while($has_more_citations==1){
	$has_more_citations = 0;
	open(OUT_FILE,"new_result.html");
	%param_hash = {};
	while(<OUT_FILE>){
		$line = $_;
		if($line =~ /<input[^>]*? id="([^"]*)"[^>]*? value="([^"]*)"[^>]*?>/){
			$id = $1; $id_value = $2;
			$param_hash{$id} = $id_value;
		}elsif($line =~ /<input[^>]*? name="([^"]*?)"[^>]*? value="([^"]*?)"[^>]*?>/){ 
			$n=$1; $v=$2;
			$param_hash{$n} = $v;
		}
		# Citation reference
		if($line =~ /("[^"]*?citedby\.url[^"]*)"/){
			$cite_url = $1;
			$has_more_citations =1;
			$level_1_count++;
			push(@first_level_citations,$cite_url);
		}
	}
#Are we done?
	$total_hits = $param_hash{"count"};
	$read_till_now = $param_hash{"nextPageOffset"};
	print "PROCESSING RESULT OFFSET $read_till_now\n";
	if(!($read_till_now < $total_hits)){
		print "Done..";
		last;
	}
#Or if we dont have any more citations.
	if($has_more_citations == 0){
		last;
	}
#Build the content_string and get the next page
	$param_hash{"offset"}=(200*$pages);
#	$param_hash{"clickedLink"} = "nextPageButton";
	$param_hash{"prevPageOffset"} = $param_hash{"nextPageOffset"};
	$param_hash{"nextPageOffset"} = 200*($pages+1)+1;
#	$param_hash{"searchWithinResultsDefault"}="nextPage";
	$pages++;

	$content_string = "";
	foreach $key (keys %param_hash){
		$value = $param_hash{$key};
		if($content_string eq ""){
			$content_string = "$key=$value"; 
		}else{
			$content_string = $content_string."&$key=$value";
		}
	}
	$post_req = HTTP::Request->new('POST',$results_post,\@ns_headers);
	$post_req->content_type('application/x-www-form-urlencoded');
	$post_req->content($content_string);
	$res = $ua->request($post_req);
	
	$res->is_success or die("Query to scopus failed:".$res->status_line."\n");
	close(OUT_FILE);

	open(OUT_FILE,">new_result.html");
	print OUT_FILE $res->content;
	close(OUT_FILE);
	system("dos2unix -l new_result.html");
}

