#!/usr/bin/perl# massage B.E. ciders XML files# MUST RUN ON SOLARIS TO DEAL WITH UNICODE ISSUESprint "\ncdl-ciders v1.1.0\n\n";print "C F Baum  Apr 2002\n\n";use LWP::Simple;use Text::Wrap;$str = get("http://repositories.cdlib.org/cgi/oai.cgi?verb=ListRecords&metadataPrefix=repec&set=publication:6369646572");$out[0]=">ciders.rdf";$jnl[0]="Center for International and Development Economics Research, Working Paper Series";$han[0]="ciders";open(OUTA,$out[0]) || die "Cannot open $outa for output";$nrtem=0;$str =~ s/\n/ /ig;$str =~ s/\r/ /ig;$str =~ s/&quot;/"/ig;$str =~ s/&amp;/&/ig;$str =~ s/&#8220;/"/ig;$str =~ s/&#8221;/"/ig;$str =~ s/&#8211;/:/ig;$str =~ s/&#8212;/--/ig;$str =~ s/^.+?<record>//;(@str) = split("<record>",$str);$nstr=@str;print "\n $nstr articles...\n\n";$debug=1;foreach $str (@str) {	$str=&utf8_to_iso8859($str);	($id) = ($str =~ /<identifier>oai:cdlib1:(.+?)<\/identifier/);	($credt) = ($str =~ /<datestamp>(.+?)<\/datestamp/);	($pubyr) = ($credt =~ /^(\d\d\d\d)/);	($ti) = ($str =~ /<ti>(.+?)<\/ti/);	(@au) = ($str =~ /<au>(.+?)<\/au/ig);	$nau = @au;	($ab) = ($str =~ /<ab>(.+?)<\/ab/);	($jnl) = ($str =~ /<jrnti>(.+?)<\/jrnti/);	($vol) = ($str =~ /<vol>(.+?)<\/vol/);#	($iss) = ($str =~ /<iss>(.+?)<\/iss/);	($ppf) = ($str =~ /<ppf>(.+?)<\/ppf/);	($ppl) = ($str =~ /<ppl>(.+?)<\/ppl/);	($url) = ($str =~ /<url>(.+?)<\/url/);	$wh = 0;	$jj="A";#	print "$id $jnl \n";	(@kw) = ($str =~ /<kwd>(.+?)<\/kwd/ig);	$nkw = @kw;	($jel) = ($str =~ /<categ>(.+?)<\/categ/);	$oo = "OUT".$jj;	$jt = $jnl[$wh];	$hl = $han[$wh];	print $oo "\nTemplate-Type: ReDIF-Paper 1.0\n";	print $oo "Title: $ti\n";	foreach $i (@au) {		($f) = ($i =~ m/fnm>(.+?)<\/fnm/ig);		($l) = ($i =~ m/snm>(.+?)<\/snm/ig);		($afl) = ($i =~ m/affl>(.+?)<\/affl/ig);	print $oo "Author-Name: $f $l \n";	print $oo "Author-Workplace-Name: $afl \n";	}	if (length $ab > 1) {		print $oo  "Abstract:\n";		print $oo  wrap(" "," ",$ab);		print $oo "\n";	}	if ($nkw > 0) {		print $oo "Keywords: ";		foreach $k (@kw) {			print $oo "$k, ";			}		print $oo "\n";	}	print $oo "Classification-JEL: $jel \n";	print $oo "Series: $jt \n";	print $oo "Number: $vol$ppf \n";    print $oo "Creation-Date: $credt \n";#	print $oo "Volume: $vol \n";#	print $oo "Pages: $ppf-$ppl \n";	print $oo "Note: oai:cdlib1:$id\n";	print $oo "File-URL: $url \n";	print $oo "File-Format: application/pdf \n";	print $oo "Handle: RePEc:cdl:$hl:$vol$ppf \n";	$nrtem++;}print " \n$nrtem templates processed... \n\n";exit;sub utf8_to_iso8859{# Converts UTF-8 as long as it just encodes ISO-8859-1	my ($str) = @_;	my $retval;	my @chars = split(//,$str);	while (@chars) {		my $ch = shift(@chars);		my $val1 = ord($ch);		if($val1 >= 128) {			# XXXX could add error checking to make sure there's a char available.			my $val2 = ord(shift(@chars));			$val1 = ($val1 & 31) << 6;			$val2 &= 63;			my $realch = chr($val1 + $val2);			print STDERR "Hello: converted upper ascii char $realch(", ord($realch), ")\n" if ($debug);			$retval .= $realch;		} else {			$retval .= $ch;		}	}	return $retval;}__END__