משתמש:Ori229/תוכנית פרל

###############################################
# Upload of tanach.us in he.wikisource.org (by ori229)
###############################################
use strict;
use warnings;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;
use Encode qw(encode);
use utf8;

######################### data ###################
my %books;
$books{'Amos'} = 'עמוס';$books{'Chronicles_1'} = 'דברי הימים א';$books{'Chronicles_2'} = 'דברי הימים ב';
$books{'Daniel'} = 'דניאל';$books{'Deuteronomy'} = 'דברים';$books{'Ecclesiastes'} = 'קהלת';
$books{'Esther'} = 'אסתר';$books{'Exodus'} = 'שמות';$books{'Ezekiel'} = 'יחזקאל';
$books{'Ezra'} = 'עזרא';$books{'Genesis'} = 'בראשית';$books{'Habakkuk'} = 'חבקוק';
$books{'Haggai'} = 'חגי';$books{'Hosea'} = 'הושע';$books{'Isaiah'} = 'ישעיה';
$books{'Jeremiah'}        = 'ירמיהו';$books{'Job'} = 'איוב';$books{'Joel'} = 'יואל';
$books{'Jonah'}        = 'יונה';$books{'Joshua'} = 'יהושע';$books{'Judges'} = 'שופטים';
$books{'Kings_1'} = 'מלכים א';$books{'Kings_2'} = 'מלכים ב';$books{'Lamentations'} = 'איכה';
$books{'Leviticus'} = 'ויקרא';$books{'Malachi'} = 'מלאכי';$books{'Micah'} = 'מיכה';
$books{'Nahum'} = 'נחום';$books{'Nehemiah'} = 'נחמיה';$books{'Numbers'} = 'במדבר';
$books{'Obadiah'} = 'עובדיה';$books{'Proverbs'}        = 'משלי';$books{'Psalms'}        = 'תהילים';
$books{'Ruth'} = 'רות';$books{'Samuel_1'} = 'שמואל א';$books{'Samuel_2'} = 'שמואל ב';
$books{'Song_of_Songs'} = 'שיר השירים';$books{'Zechariah'} = 'זכריה';$books{'Zephaniah'} = 'צפניה';
           
	  my $Etnahta=pack "C2",0xD6,0x91;      my $Segol=pack "C2",0xD6,0x92;  
    my $Shalshelet=pack "C2",0xD6,0x93;    my $Zaqef_Qatan=pack "C2",0xD6,0x94;  
    my $Zaqef_Gadol=pack "C2",0xD6,0x95;      my $Tipeha=pack "C2",0xD6,0x96;  
    my $Revia=pack "C2",0xD6,0x97;      my $Zarqa=pack "C2",0xD6,0x98;  
    my $Pashta=pack "C2",0xD6,0x99;      my $Yetiv=pack "C2",0xD6,0x9A;  
    my $Tevir=pack "C2",0xD6,0x9B;      my $Geresh=pack "C2",0xD6,0x9C;  
    my $Geresh_Muqdam=pack "C2",0xD6,0x9D;      my $Gershayim=pack "C2",0xD6,0x9E;  
    my $Qarney_Para=pack "C2",0xD6,0x9F;      my $Telisha_Gedola=pack "C2",0xD6,0xA0;  
	  my $Paser=pack "C2",0xD6,0xA1;      #my $reserved=pack "C2",0xD6,0xA2; ####### not relevant
    my $Munah=pack "C2",0xD6,0xA3;      my $Makapakh=pack "C2",0xD6,0xA4;  
    my $Merkha=pack "C2",0xD6,0xA5;      my $Merkha_Kefula=pack "C2",0xD6,0xA6;  
    my $Darga=pack "C2",0xD6,0xA7;      my $Qadma=pack "C2",0xD6,0xA8;  
    my $Telisha_Qetana=pack "C2",0xD6,0xA9;     my $Yerah_ben_Yomo=pack "C2",0xD6,0xAA;  
    my $Ole=pack "C2",0xD6,0xAB;      my $Iluy=pack "C2",0xD6,0xAC;  
    my $Dehi=pack "C2",0xD6,0xAD;      my $Zinor=pack "C2",0xD6,0xAE;  
    #my $Masora_Circle=pack "C2",0xD6,0xAF; ####### not relevant
    my $Meteg=pack "C2",0xD6,0xBD;    my $Paseq=pack "C2",0xD7,0x80;

    my $patach=pack "C2",0xd6,0xb7;    my $kamatz=pack "C2",0xd6,0xb8;
    my $chirik=pack "C2",0xd6,0xb4;        my $holam =pack "C2",0xd6,0xb9;
    my $dagesh =pack "C2",0xd6,0xbc;    my $right_shin =pack "C2",0xd7,0x81;
    my $left_shin =pack "C2",0xd7,0x82;
    
    my $aleph=pack "C2",0xd7,0x90; my $bet=pack "C2",0xd7,0x91; my $gimel=pack "C2",0xd7,0x92;
    my $dalet=pack "C2",0xd7,0x93; my $he=pack "C2",0xd7,0x94;  my $vav=pack "C2",0xd7,0x95;
    my $zain=pack "C2",0xd7,0x96; my $het=pack "C2",0xd7,0x97;  my $tet=pack "C2",0xd7,0x98;
    my $yud=pack "C2",0xd7,0x99; my $final_kaf=pack "C2",0xd7,0x9a; my $kaf=pack "C2",0xd7,0x9b;  
    my $lamed=pack "C2",0xd7,0x9c;  my $final_mem=pack "C2",0xd7,0x9d; my $mem=pack "C2",0xd7,0x9e;
    my $final_nun=pack "C2",0xd7,0x9f; my $nun=pack "C2",0xd7,0xa0;  my $samech=pack "C2",0xd7,0xa1;
    my $ayin=pack "C2",0xd7,0xa2; my $final_pe =pack "C2",0xd7,0xa3; my $pe=pack "C2",0xd7,0xa4;
    my $final_tazdik=pack "C2",0xd7,0xa5; my $tzadik=pack "C2",0xd7,0xa6; my $kuf=pack "C2",0xd7,0xa7;
    my $reysh=pack "C2",0xd7,0xa8;  my $shin=pack "C2",0xd7,0xa9;
    my $tav=pack "C2",0xd7,0xaa;
##################################################

my $book; my $perek=''; my $pasuk=''; my $pasuk_number=0; my $add_after_pasuk='nothing';
my @responses; my $page=''; my $in_file =''; my $mahadura; my $type;
# 
my $out_file = "ready4upload.txt";
open( OUT_F, ">$out_file") or die "Cannot open $out_file as output\n" ;
#make_a_book('Amos');
#make_a_book('Chronicles_1');
#make_a_book('Chronicles_2');
#make_a_book('Daniel');
#make_a_book('Deuteronomy');
#make_a_book('Ecclesiastes');
#make_a_book('Esther');
#make_a_book('Exodus');
#make_a_book('Exodus.DH');
#make_a_book('Ezekiel');
#make_a_book('Ezra');
#make_a_book('Genesis');
#make_a_book('Habakkuk');
#make_a_book('Haggai');
#make_a_book('Hosea');
#make_a_book('Isaiah');
#make_a_book('Jeremiah');
#make_a_book('Job');
#make_a_book('Joel');
#make_a_book('Jonah');
make_a_book('Joshua');
#make_a_book('Judges');
#make_a_book('Kings_1');
#make_a_book('Kings_2');
#make_a_book('Lamentations');
#make_a_book('Leviticus');
#make_a_book('Malachi');
#make_a_book('Micah');
#make_a_book('Nahum');
#make_a_book('Nehemiah');
#make_a_book('Numbers');
#make_a_book('Obadiah');
#make_a_book('Proverbs');
#make_a_book('Psalms');
#make_a_book('Ruth');
#make_a_book('Samuel_1');
#make_a_book('Samuel_2');
#make_a_book('Song_of_Songs');
#make_a_book('Tanach');
#make_a_book('Zechariah');
#make_a_book('Zephaniah');
close (OUT_F);

exit;
##################################################################################



################################################################
sub make_a_book {  
  my $file_name = shift;
  $book=$books{$file_name};
  $in_file = 'us/books/'.$file_name.'.xml';
  $mahadura='ניקוד'; $type='main'; make_a_page();
  $mahadura='ניקוד'; $type='normal'; make_a_page();
  $mahadura='ניקוד'; $type='lines'; make_a_page();
  $mahadura='טעמים'; $type='main'; make_a_page();
  $mahadura='טעמים'; $type='normal'; make_a_page();
  $mahadura='טעמים'; $type='lines'; make_a_page();
  #$mahadura='כתיב'; $type='normal'; make_a_page();
  #$mahadura='כתיב'; $type='lines'; make_a_page();
}

################################################################
sub make_a_page {  
  $type = 'main' if $mahadura eq 'ביאור';
  $page='';

  print_func ( "#####"."$book $mahadura");
  print_func ( " מסומן") if $type eq 'normal';
  print_func ( " מסומן בשורות") if $type eq 'lines'; 
  print_func ( "\n");

  print_func ( "{{כותרת עליונה תנך עם טעמים|$book}}\n") if $mahadura eq 'טעמים';
  print_func ( "{{כותרת עליונה תנך עם ניקוד|$book}}\n") if $mahadura eq 'ניקוד';
  print_func ( "{{כותרת עליונה תנך ללא ניקוד|$book}}\n") if $mahadura eq 'כתיב';
  
  open( IN_F, $in_file) or die "Cannot open $in_file as input\n" ;
  while (<IN_F>) {
    chomp;
    my $line = $_;
    process_line($line);
  }
  close (IN_F);
  
  print_func ( "\n{{כותרת תחתונה תנך עם טעמים|$book}}\n") if $mahadura eq 'טעמים';
  print_func ( "\n{{כותרת תחתונה תנך עם ניקוד|$book}}\n") if $mahadura eq 'ניקוד';
  print_func ( "\n{{כותרת תחתונה תנך ללא ניקוד|$book}}\n") if $mahadura eq 'כתיב';
  print_func ( "ENDOFFILE\n");
}

################################################################
sub print_func {
  my $line = shift;
  print OUT_F $line;
}


################################################################
sub process_line {
  my $line = shift;
  #################### פרק
  if ($line =~ /<c n="(\d+)">/) {
    $perek = number2hebrew($1);
    if ($type eq 'main') {
        print_func ( "{{אות-פרק-שקוף|" . $book .'|' . $perek . "}}");
    } else {
        print_func ( "{{אות-פרק|" . $book .'|' . $perek . "}}");
    }
    print_func ( "<BR/>") if $type eq 'lines';
    print_func ( "<section begin=פרק $perek/>\n");
  }
  if ($line =~ /<\/c>/) {
    print_func ( "<section end=פרק $perek/>");
    #print_func ( "\n");
  }

  ################################## פסוק
  if ($line =~ /<v n="(\d+)">/) {
    $pasuk_number = $1;
    $pasuk = number2hebrew($pasuk_number);
    if ($type eq 'main') {
        print_func ( "{{אות-פסוק-שקוף|" . $book .'|' . $perek .'|'. $pasuk . "}}\n");
    } else {
        print_func ( "{{אות-פסוק|" . $book .'|' . $perek .'|'. $pasuk . "}}\n");
    }
    print_func ( "<section begin=$perek$pasuk_number/>") if $type eq 'main' && $mahadura eq 'ניקוד';
    print_func ( "<section begin=$perek $pasuk/>") if $type eq 'main';
    print_func ( "{{#section:$book $mahadura|$perek $pasuk}}\n") if $type ne 'main';
  }  
  if ($line =~ /<\/v>/) {
    print_func ( "<section end=$perek$pasuk_number/>") if $type eq 'main' && $mahadura eq 'ניקוד';
    print_func ( "<section end=$perek $pasuk/>\n") if $type eq 'main';
    if ($add_after_pasuk eq 'samekh') {
      print_func ( "{{ס}} ")  if $type eq 'main';
      print_func ( "{{ס}} ")  if $type eq 'normal';
      print_func ( "{ס} ")  if $type eq 'lines';
    }
    if ($add_after_pasuk eq 'pe') {
      print_func ( "{{פ}} ")  if $type eq 'main';
      print_func ( "{{פ}} ")  if $type eq 'normal';
      print_func ( "{פ} ")  if $type eq 'lines';
    }
    $add_after_pasuk = 'nothing';
    print_func ( "<BR/>") if $type eq 'lines';
  }
    
  ########################################### מילה
  if ($line =~ /<([wkq])>(.*?)<\/[wkq]>/) {  # w=word  k=ktiv  q=kri
    my $kind = $1;
    my $word = $2;
    $word =~ s|<x>.</x>||g;
    $word =~ s|<s t='small'>|<small>|g;$word =~ s|</s>|<end_small>|g;   #<s t='small'>ן</s>
    $word=~s|/||g;  $word=~s|end_|/|g;
    $word=~s| ||g; 
    
    if ($mahadura eq 'ביאור') {    
      $word = "$word," if $word=~/$Etnahta/;
      $word = "<small>($word)</small> "     if $kind eq 'k';
    } else {
      $word = "[$word] "                  if $kind eq 'q';
      $word = "<small>$word</small> "     if $kind eq 'k';
    }
    $word = "$word "                    if $kind eq 'w';
     
    my $makaf=pack "C2",0xd6,0xbe;     # instead of $word=~s|־| |g;
    $word=~s|$makaf |$makaf| if $mahadura eq 'טעמים';
    $word=~s|$makaf | |      if $mahadura ne 'טעמים';$word=~s|$makaf|| if $mahadura ne 'טעמים';
    
    my $sof_pasuk=pack "C2",0xd7,0x83; # instead of $word=~s|׃||g;
    $word=~s|$sof_pasuk |$sof_pasuk|  if $mahadura eq 'טעמים';
    $word=~s|$sof_pasuk\]|]$sof_pasuk| if $mahadura eq 'טעמים'; #in case of ktiv in last word
    $word=~s|$sof_pasuk ?|.|           if $mahadura ne 'טעמים'; $word=~s|\.\]|].| if $mahadura ne 'טעמים';
    
    $word = remove_teamim($word) if $mahadura ne 'טעמים';
    
    #$word=~s|([$patach$kamatz])$chirik|$1\{\{ZWJ\}\}$chirik|;  # Jerusalem    
    $word=~s|([$patach$kamatz])$chirik|$1\{\{HIRIK WITHOUT LETTER\}\}|;  # Jerusalem    
    #$word=~s|([^$dagesh$right_shin$left_shin$aleph$bet$gimel$dalet$he$vav$zain$het$tet$yud$kaf$lamed$mem$nun$samech$ayin$pe$tzadik$kuf$reysh$shin$tav$Etnahta$Segol$Shalshelet$Zaqef_Qatan$Zaqef_Gadol$Tipeha$Revia$Zarqa$Pashta$Yetiv$Tevir$Geresh$Geresh_Muqdam$Gershayim$Qarney_Para$Telisha_Gedola$Paser$Munah$Makapakh$Merkha$Merkha_Kefula$Darga$Qadma$Telisha_Qetana$Yerah_ben_Yomo$Ole$Iluy$Dehi$Zinor$Meteg$Paseq])$vav$holam|$1$vav\{\{ZWJ\}\}$holam|g;  # holam haser on vav
    $word=~s|([^$dagesh$right_shin$left_shin$aleph$bet$gimel$dalet$he$vav$zain$het$tet$yud$kaf$lamed$mem$nun$samech$ayin$pe$tzadik$kuf$reysh$shin$tav$Etnahta$Segol$Shalshelet$Zaqef_Qatan$Zaqef_Gadol$Tipeha$Revia$Zarqa$Pashta$Yetiv$Tevir$Geresh$Geresh_Muqdam$Gershayim$Qarney_Para$Telisha_Gedola$Paser$Munah$Makapakh$Merkha$Merkha_Kefula$Darga$Qadma$Telisha_Qetana$Yerah_ben_Yomo$Ole$Iluy$Dehi$Zinor$Meteg$Paseq])$vav$holam|$1\{\{VAV WITH HOLAM HASER\}\}|g;  # holam haser on vav
    
    print_func ( $word) if $type eq 'main';
  }
  
  ########################################### פרשה פתוחה וסגורה
  if ($line =~ /<samekh\/>/) {
    if ($mahadura eq 'ביאור' ) {
      print_func ( "{{ביאור:פרשה סגורה}}");
    } else {
      $add_after_pasuk = 'samekh';
    }
  }
  if ($line =~ /<pe\/>/) {
    if ($mahadura eq 'ביאור' ) {
      print_func ( "{{ביאור:פרשה פתוחה}}");
    } else {
      $add_after_pasuk = 'pe';
    }
  }
  
}

################################################################
################################################################
sub remove_teamim {
	my $word = shift;   
    $word =~ s/$Etnahta//g;$word =~ s/$Segol//g;$word =~ s/$Shalshelet//g;$word =~ s/$Zaqef_Qatan//g;
    $word =~ s/$Zaqef_Gadol//g;$word =~ s/$Tipeha//g;$word =~ s/$Revia//g;$word =~ s/$Zarqa//g;
    $word =~ s/$Pashta//g;$word =~ s/$Yetiv//g;$word =~ s/$Tevir//g;$word =~ s/$Geresh//g;
    $word =~ s/$Geresh_Muqdam//g;$word =~ s/$Gershayim//g;$word =~ s/$Qarney_Para//g;$word =~ s/$Telisha_Gedola//g;
    $word =~ s/$Paser//g;$word =~ s/$Munah//g;$word =~ s/$Makapakh//g;$word =~ s/$Merkha//g;$word =~ s/$Merkha_Kefula//g;
    $word =~ s/$Darga//g;$word =~ s/$Qadma//g; $word =~ s/$Telisha_Qetana//g;$word =~ s/$Yerah_ben_Yomo//g;
    $word =~ s/$Ole//g;$word =~ s/$Iluy//g;$word =~ s/$Dehi//g;$word =~ s/$Zinor//g; $word =~ s/$Paseq//g;
    $word =~ s/$Meteg//g; # in last word in pasuk
	return $word;
}
	
################################################################
# גמטריה ממספרים לאותיות - נכתב על ידי אראל סגל
sub number2hebrew {
	my $num = shift;   
	my $heb = "";
	my @letters1 = split(//,'אבגדהוזחטי');
  my @letters2 = split(//,'יכלמנסעפצק');
  my @letters3 = split(//,'קרשת');
	while ($num > 400) {
		$heb .= "ת";
		$num -= 400;
	}
	if ($num >= 100) {
		$heb .= $letters3[ ($num / 100) - 1 ];
		$num %= 100;
	}
	if ($num >= 10) {
		if ($num == 15) {
			$heb .= "טו";
			$num = 0;
		}
		elsif ($num == 16) {
			$heb .= "טז";
			$num = 0;
		}
		else {
			$heb .= $letters2[ ($num / 10) - 1 ];
			$num %= 10;
		}
	}
	if ($num >= 1) {
		$heb .= $letters1[ $num - 1 ];
	}
	return $heb;
}