<?php
if (!defined("INC_FB2_PARSER_H")){
   define("INC_FB2_PARSER_H", TRUE);

define("FB2_TT_STRING", 0x00000000);
define("FB2_TT_OPEN", 0x00000001);
define("FB2_TT_CLOSE", 0x00000002);
define("FB2_TT_SINGLE", 0x00000003);
define("FB2_TT_XML", 0x00000004);

$GLOBALS['FB2_Elements']=array('xml', 'FictionBook','a','annotation','author','binary','body','book-name',
'book-title','cite','city','code','coverpage','custom-info','date','description','document-info','email',
'emphasis','empty-line','epigraph','first-name','genre','history','home-page','id','isbn','image','keywords',
'lang','last-name','middle-name','nickname','output-document-class','output','p','part','poem','program-used',
'publish-info','publisher','section','sequence','src-lang','src-ocr','src-title-info','src-url','stanza',
'strikethrough','strong','style','stylesheet','sub','subtitle','sup','table','td','text-author','th','title',
'title-info','tr','translator','v','version','year');

function fb2p_els2hash($els) {
  $hash=array();
  $elen=count($GLOBALS['FB2_Elements']);
  for ($i=0;$i<$elen;$i++) $hash[strtolower($GLOBALS['FB2_Elements'][$i])]=true;
  return $hash;
}

function fb2p_get_tag_pattern() {
//   $tag_name_pat='[a-z\.\:\-\_\d]{1,50}';
//   $tag_content_pat='[\s]+[^\<\>]{0,1024}?';
   $tag_name_pat='[a-z\.\:\-\_\d]+';
   $tag_content_pat='[\s]+[^\<\>]*?';
   return '/\<([\?]?)([\/]?)('.$tag_name_pat.')('.$tag_content_pat.')?([\/]?)(\1)\>/is';
}

function fb2p_get_tag_attr_pattern() {
   return '/([^\s]+)\=(?:([\\\'\"]{1})(.*?)(\2)|([^\s]+))/is';
}

$GLOBALS['FB2_Hash']=fb2p_els2hash($GLOBALS['FB2_Elements']);
$GLOBALS['FB2_TagPattern']=fb2p_get_tag_pattern();
$GLOBALS['FB2_TagAttrPattern']=fb2p_get_tag_attr_pattern();

function fb2p_pr_err($str) {
   fprintf(STDERR, $str);
   fflush(STDERR);
}

function fb2p_unquot_name($name) {
  $s=trim(html_entity_decode($name, ENT_QUOTES, 'cp1251'));
  if (preg_match('/^([\\\'\"]{1})(.*)(\1)$/is', $s, $matches)) $s=trim($matches[2]);
  return $s;
}

function fb2p_un_utf($str) {
  return fb2p_unquot_name(@iconv( 'UTF-8', 'windows-1251', $str));
}

function fb2p_to_1251($str, $src_enc='UTF-8') {
//  fb2p_pr_err("fb2p_to_1251: [".$str."]\n");
  $dest_enc='windows-1251';
  $src_enc=strtolower($src_enc);
  $s=($src_enc==$dest_enc)?$str:@iconv($src_enc, $dest_enc, $str);
  return fb2p_unquot_name($s);
}

function fb2p_get_tag_attributes(&$tag_text) {
  $attr=array();
  $r=preg_match_all($GLOBALS['FB2_TagAttrPattern'], $tag_text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER, 0);
  if ($r==false || $r<=0) return false;
  $mlen=$r;//count($matches);
  for ($i=0;$i<$mlen;$i++) {
     $name=(string)$matches[$i][1][0];
     $val=(string)$matches[$i][3][0];
     if (count($matches[$i])==6) $val=(string)$matches[$i][5][0];
     $attr[$name]=$val;
  }
  if (count($attr)==0) return false;
  return $attr;
}

function fb2p_new_string_tag($offset, $length, $key=-1) {
   $tag=array();
   $tag['offset']=$offset;
   $tag['length']=$length;
   $tag['end_offset']=$tag['offset']+$tag['length'];
   if ($key>=0) $tag['ind']=$key;

   $tag['type']=FB2_TT_STRING;
   return $tag;
}

function fb2p_parse_tag_match($match, $key=-1) {
   $tag=array();
   $tag['offset']=$match[0][1];
   $tag['length']=strlen($match[0][0]);
   $tag['end_offset']=$tag['offset']+$tag['length'];
   if ($key>=0) $tag['ind']=$key;
   $tag['name']=$match[3][0];
   $tag['lname']=strtolower($tag['name']);

   $tag['type']=FB2_TT_OPEN;     
   if ($match[5][0]=='/') $tag['type']=FB2_TT_SINGLE;
   if ($match[2][0]=='/') $tag['type']=FB2_TT_CLOSE;
   if ($match[1][0]=='?') $tag['type']=FB2_TT_XML;

   $tag['attributes']=fb2p_get_tag_attributes($match[4][0]);
   if (!$tag['attributes']) unset($tag['attributes']);
   return $tag;
}

function fb2p_get_tags(&$data, $offset=0) {
   $r=preg_match_all($GLOBALS['FB2_TagPattern'], $data, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER, $offset);
   if ($r && $r>0) array_walk($matches, create_function('&$item, $key','$item = fb2p_parse_tag_match($item, $key);'));
   return $matches;
}

function fb2p_get_tag(&$data, $offset=0) {
   if (!preg_match($GLOBALS['FB2_TagPattern'], $data, $matches, PREG_OFFSET_CAPTURE, $offset)) return false;
   return fb2p_parse_tag_match($matches);
}

function fb2p_substr(&$data, $ind, $count, $as_substr=false) {
  if ($as_substr) {
     $s=substr($data, $ind, $count);
  } else {
     $end=$ind+$count;
//     $s=str_pad('', $count); $j=0; for ($i=$ind;$i<$end;$i++)  $s{$j++}=$data{$i};
//  if (preg_match('/.{0,'. $count .'}/s', $data, $matches, PREG_OFFSET_CAPTURE, $ind)) return html_entity_decode($matches[0][0], ENT_QUOTES, 'cp1251');
  $s=''; for ($i=$ind;$i<$end;$i++) $s.=$data{$i};
  }

  return html_entity_decode($s, ENT_QUOTES, 'cp1251');
//  return htmlspecialchars($s, ENT_QUOTES, 'ISO-8859-1', false);
//  return htmlentities($s, ENT_QUOTES, 'ISO-8859-1', false);
//  return htmlentities($s, ENT_QUOTES, 'cp1251', false);
}

function fb2p_data2struct(&$data, $config=false) {
  if (!$config || !is_array($config)) $config=array();
  $config['err_suffix']=fb2p_get_prop($config, 'err_suffix', false);
  $config['skip_unknown_tags']=fb2p_get_prop($config, 'skip_unknown_tags', false);
  $config['cut_unknown_tags']=fb2p_get_prop($config, 'cut_unknown_tags', false);
  $config['fail_on_errors']=fb2p_get_prop($config, 'fail_on_errors', false);
  
  $tags=array();
  $matches=array();
  $r=preg_match_all($GLOBALS['FB2_TagPattern'], $data, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER, 0);
//  $r=preg_match_all($GLOBALS['FB2_TagPattern'], $data, $matches, PREG_OFFSET_CAPTURE);
//  return $matches;
  if ($r==false || $r<=0) {
//     unset($data);
     return false;
  }

  $pr_ind=-1;
  $p_ind=-1;
  $res=array();
  $arr=&$res;
  $tlen=$r;//count($matches);
  $parent_stack=array();
  $psti=-1;
  $close_tags_counter=0;
  $unclose_tags_counter=0;
//  fb2p_pr_err("tag count: ".$tlen."\n");
  $p_tag=false;
  $pr_tag=false;
  for ($i=0;$i<$tlen;$i++) {
    $tags[$i] = fb2p_parse_tag_match($matches[$i], $i);
    if ($config['skip_unknown_tags'] && !fb2p_get_prop($GLOBALS['FB2_Hash'], $tags[$i]['lname'], false)) continue;
    if ($pr_ind>=0) {
      $gstart=$tags[$pr_ind]['end_offset'];
      $glen=$tags[$i]['offset']-$tags[$pr_ind]['end_offset'];
      if ($glen>0) $arr[]=fb2p_new_string_tag($gstart, $glen);
    } else if (count($arr)==0) {
      $gstart=($p_ind>=0)?$tags[$p_ind]['end_offset']:0;
      $glen=($p_ind>=0)?$tags[$i]['offset']-$tags[$p_ind]['end_offset']:$tags[$i]['offset'];
      if ($glen>0) $arr[]=fb2p_new_string_tag($gstart, $glen);
    }
    if (!$config['cut_unknown_tags'] || fb2p_get_prop($GLOBALS['FB2_Hash'], $tags[$i]['lname'], false)) switch ($tags[$i]['type']) {
      case FB2_TT_XML:
      case FB2_TT_SINGLE:
        $arr[]=&$tags[$i];
        break;
      case FB2_TT_OPEN:
        $arr[]=&$tags[$i];
        $tags[$i]['content']=array();
        $arr=&$tags[$i]['content'];
        $psti++;
        $parent_stack[$psti]=$p_ind;
        $p_ind=$i;
        break;
      case FB2_TT_CLOSE:
        if ($p_ind<0 || $tags[$p_ind]['lname']!=$tags[$i]['lname']) {
            if ($config['err_suffix']!==false) fb2p_pr_err("Unknown closing tag: ".$tags[$i]['name']."\n");
            $close_tags_counter++;
            break;
        }
        if ($psti>=0) {
           $p_ind=$parent_stack[$psti];
           unset($parent_stack[$psti]);
           $psti--;
           $arr=&$tags[$p_ind]['content'];
        } else {
           $p_ind=-1;
           $arr=&$res;
        }
        break;
    }
    $pr_ind=$i; if ($p_ind==$pr_ind) $pr_ind=-1;
  }
  $psti++;
//  fb2p_pr_err("psti: ".$psti."\n");
  if ($config['err_suffix']!==false) {
    if ($psti>0) fb2p_pr_err("FB2 parser error. Unclosed tags: ".$psti." [".$config['err_suffix']."]\n");
    if ($close_tags_counter>0) fb2p_pr_err("FB2 parser error. Unknown closing tags: ".$close_tags_counter." [".$config['err_suffix']."]\n");
  }
  if ($config['fail_on_errors'] && ($psti>0 || $close_tags_counter>0)) {
    unset($res);
//    unset($data);
    return false;
  }
//  unset($data);
  
  return $res;
}

function fb2p_get_tags_path(&$struct, $karr) {
  if (!is_array($struct)) return false;
  if (!is_array($karr)) return false;
  $kmax=count($karr)-1;
  if ($kmax<0) return false;

  $result=array();
  $stack=array();
  $sind=0;
  $stack[$sind]=array();
  $stack[$sind]['arr']=&$struct;
  $stack[$sind]['ind']=0;
  $stack[$sind]['max']=count($stack[$sind]['arr'])-1;
  $tag=false;

  while (true) {
     if ($sind<0) break; //   .   .
     if ($stack[$sind]['ind']>$stack[$sind]['max']) { //      -   .    .
        $sind--;
        continue;
     }
     if ($sind>$kmax) { //   .  .
        $sind--;
        continue;
     }
     $tag=&$stack[$sind]['arr'][$stack[$sind]['ind']];

     if (!is_array($tag)) { //    ().
        $stack[$sind]['ind']++;
        continue;
     }
     if ($tag['type']==FB2_TT_STRING) { //    ().
        $stack[$sind]['ind']++;
        continue;
     }

     if ($karr[$sind]!=$tag['lname']) { //   ,    - ,  .
        $stack[$sind]['ind']++;
        continue;
     }

     //     .

     $stack[$sind]['ind']++;
     if ($sind==$kmax) { //  ( ).   .
        $result[]=&$tag;
     } else if (array_key_exists('content',$tag) && is_array($tag['content'])) { //  ( ).   .
        $sind++;
        $stack[$sind]=array();
        $stack[$sind]['arr']=&$tag['content'];
        $stack[$sind]['ind']=0;
        $stack[$sind]['max']=count($stack[$sind]['arr'])-1;
     }
  }
  if (count($result)==0) return false;
  return $result;
}


function fb2p_enc_str($str) {
  return htmlspecialchars((string)$str, ENT_QUOTES, 'ISO-8859-1', false);
}

function fb2p_compile_tag(&$tag, &$arr, &$data) {
   $tag_ar=array();
   $tag_ar[]='<';
   if ($tag['type']==FB2_TT_XML) $tag_ar[]='?';
   $tag_ar[]=$tag['name'];
   if (array_key_exists('attributes',$tag)) foreach($tag['attributes'] as $key=>$val) $tag_ar[]=' '.$key.'="'.fb2p_enc_str($val).'"';
   if ($tag['type']==FB2_TT_XML) $tag_ar[]='?';
   if ($tag['type']==FB2_TT_SINGLE) $tag_ar[]='/';
   $tag_ar[]='>';
   $arr[]=implode('',$tag_ar);
   if ($tag['type']==FB2_TT_OPEN) {
      if (array_key_exists('content',$tag)) fb2p_compile_struct($tag['content'], $arr, $data);
      $arr[]='</'.$tag['name'].'>';
   }                                                                           
}

function fb2p_get_string_tag(&$tag, &$data) {
   $ofs=fb2p_get_prop($tag, 'offset');
   $len=fb2p_get_prop($tag, 'length');
   if ($len>0) return fb2p_substr($data, $ofs, $len);
   return '';                    
}

function fb2p_compile_struct(&$struct, &$arr, &$data) {
   if (!$struct) return false;
   if (!is_array($struct)) return false;           
   $slen=count($struct);
   $res='';
   for ($i=0;$i<$slen;$i++) {
      if (!$struct[$i]) continue;
      if (!is_array($struct[$i])) continue;
      if ($struct[$i]['type']==FB2_TT_STRING) {
         $arr[]=fb2p_enc_str(fb2p_get_string_tag($struct[$i],$data));
      } else {       
         fb2p_compile_tag($struct[$i], $arr, $data);
      }
   }
   return true;
}

function fb2p_struct2data(&$struct, &$data) {
  $out_arr=array();
  if (!fb2p_compile_struct($struct, $out_arr, $data)) return false;
  return implode('',$out_arr);
}

function fb2p_get_inner_text(&$tag, &$data) {
   if (!array_key_exists('content', $tag)) return '';
   $slen=count($tag['content']);
   $result='';
   for ($i=0;$i<$slen;$i++) {       
      if ($tag['content'][$i]['type']==FB2_TT_STRING) {
        $result.=fb2p_get_string_tag($tag['content'][$i], $data);
      } else {
        $result.=fb2p_get_inner_text($tag['content'][$i]);
      }

   }
   return $result;
}

function fb2p_find_1tag_val(&$struct, $karr, &$data, $default='') {
   $tags=fb2p_get_tags_path($struct, $karr);
//   fb2p_pr_err("fb2p_find_1tag_val: [".$tags."]\n");
   if (!$tags) return $default;
   $res=fb2p_get_inner_text($tags[0], $data);
   return ($res=='')?$default:$res;
}

function fb2p_get_prop(&$tag, $prop_name, $default=false) {
   if (!array_key_exists($prop_name, $tag)) return $default;
   return $tag[$prop_name];
}

function fb2p_struct2book(&$struct, &$data) {
  $book=array();
//  $book['struct']=&$struct;
  $book['xml_header']=false;
  $book['xml_version']=1.0;
  $book['encoding']='UTF-8';
  $tags=fb2p_get_tags_path($struct, array('xml'));
  if ($tags) {
     $book['xml_header']=true;
     if ($attr=fb2p_get_prop($tags[0], 'attributes')) {
       $book['xml_version']=(float)fb2p_get_prop($attr, 'version', 0);
       $book['encoding']=(string)fb2p_get_prop($attr, 'encoding', 'UTF-8');
     }
  }

  $d_tags=fb2p_get_tags_path($struct, array('fictionbook','description')); if (!$d_tags) return false; $d_tags=fb2p_get_prop($d_tags[0],'content'); if (!$d_tags) return false;
  $ti_tags=fb2p_get_tags_path($d_tags, array('title-info')); if ($ti_tags) $ti_tags=fb2p_get_prop($ti_tags[0],'content');
  $di_tags=fb2p_get_tags_path($d_tags, array('document-info')); if ($di_tags) $di_tags=fb2p_get_prop($di_tags[0],'content');
  $pi_tags=fb2p_get_tags_path($d_tags, array('publish-info')); if ($pi_tags) $pi_tags=fb2p_get_prop($pi_tags[0],'content');
  $sti_tags=fb2p_get_tags_path($d_tags, array('src-title-info')); if ($sti_tags) $sti_tags=fb2p_get_prop($sti_tags[0],'content');

  $book['id']=fb2p_to_1251(fb2p_find_1tag_val($di_tags, array('id'), $data), $book['encoding']);
  $book['isbn']=fb2p_to_1251(fb2p_find_1tag_val($pi_tags, array('isbn'), $data), $book['encoding']);

  $book['title']=fb2p_to_1251(fb2p_find_1tag_val($ti_tags, array('book-title'), $data), $book['encoding']);
  $book['lang']=fb2p_to_1251(fb2p_find_1tag_val($ti_tags, array('lang'), $data), $book['encoding']);
  $book['src_lang']=fb2p_to_1251(fb2p_find_1tag_val($ti_tags, array('src-lang'), $data), $book['encoding']);

  $book['genres']=array();
  fb2p_parse_genres($ti_tags, $book, $data);
  fb2p_parse_genres($di_tags, $book, $data);
  fb2p_parse_genres($pi_tags, $book, $data);
  fb2p_parse_genres($sti_tags, $book, $data);
   
  $book['authors']=array();
  fb2p_parse_autors($ti_tags, $book, $data);
  fb2p_parse_autors($sti_tags, $book, $data);

  $book['sequences']=array();
  fb2p_parse_sequences($ti_tags, $book, $data);
  fb2p_parse_sequences($di_tags, $book, $data);
  fb2p_parse_sequences($pi_tags, $book, $data);
  fb2p_parse_sequences($sti_tags, $book, $data);
  

  return $book;
}

function fb2p_parse_genres(&$i_tags, &$book, &$data) {
  if (!$i_tags) return false;
  $gtags=fb2p_get_tags_path($i_tags, array('genre'));
  if (!$gtags) return false;
  $len=count($gtags);
  for ($i=0;$i<$len;$i++) {
     $gstr=fb2p_to_1251(fb2p_get_inner_text($gtags[$i], $data), $book['encoding']);
     $gnames=explode(',', $gstr);
     foreach ($gnames as $gname) {
       $gname=trim($gname);
       if ($gname<>'') $book['genres'][]=$gname;
     }
  }
  return true;
}

function fb2p_parse_autors(&$i_tags, &$book, &$data) {
  if (!$i_tags) return false;
  $atags=fb2p_get_tags_path($i_tags, array('author'));
  if (!$atags) return false;
  $len=count($atags);
  $a=false;
  for ($i=0;$i<$len;$i++) {
     $a=fb2p_get_prop($atags[0],'content');
     $book['authors'][]=array(
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('last-name'), $data), $book['encoding']),
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('first-name'), $data), $book['encoding']),
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('middle-name'), $data), $book['encoding']),
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('nickname'), $data), $book['encoding']),
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('home-page'), $data), $book['encoding']),
                            fb2p_to_1251(fb2p_find_1tag_val($a, array('email'), $data), $book['encoding']),
                            );
  }
  return true;
}

function fb2p_parse_sequences(&$i_tags, &$book, &$data) {
  if (!$i_tags) return false;
  $stags=fb2p_get_tags_path($i_tags, array('sequence'));
  if (!$stags) return false;
  $len=count($stags);
  $name='';
  $num=0;
  for ($i=0;$i<$len;$i++) {
     if ($attr=fb2p_get_prop($stags[$i], 'attributes')) {
       $name=(string)fb2p_to_1251(fb2p_get_prop($attr, 'name', ''), $book['encoding']);
       $num=(int)fb2p_to_1251(fb2p_get_prop($attr, 'number', ''), $book['encoding']);
       $book['sequences'][]=array($name, $num);
     }
  }
  return true;
}

function fb2p_parse($data) {
  $book=array();
  $local_data=preg_replace("/\&([\dA-Zaz]{0,10}\;){0}/", '&amp;', $data);

  $xml = simplexml_load_string($local_data);
  if ($xml && $xml[0]->{'description'}) {
    $ti=$xml[0]->{'description'}[0]->{'title-info'}[0];
    $di=$xml[0]->{'description'}[0]->{'document-info'}[0];
    $pi=$xml[0]->{'description'}[0]->{'publish-info'}[0];
    $sti=$xml[0]->{'description'}[0]->{'src-title-info'}[0];
     
    $book['id']=''; if ($di) $book['id']=fb2p_un_utf((string)$di->{'id'}[0]);
    $book['isbn']=''; if ($pi) $book['isbn']=fb2p_un_utf((string)$pi->{'isbn'}[0]);
  
    $book['title']=fb2p_un_utf((string)$ti->{'book-title'}[0]);
    $book['lang']=fb2p_un_utf((string)$ti->{'lang'}[0]);
    $book['src_lang']=fb2p_un_utf((string)$ti->{'src-lang'}[0]);
  
    $book['genres']=array();
    if ($ti && $ti->{'genre'}) fb2p_parse_genres1($book, $ti->{'genre'});
    if ($di && $di->{'genre'}) fb2p_parse_genres1($book, $di->{'genre'});
    if ($pi && $pi->{'genre'}) fb2p_parse_genres1($book, $pi->{'genre'});
    if ($sti && $sti->{'genre'}) fb2p_parse_genres1($book, $sti->{'genre'});
     
    $book['authors']=array();
    if ($ti && $ti->{'author'}) fb2p_parse_autors1($book, $ti->{'author'});
    if ($sti && $sti->{'author'}) fb2p_parse_autors1($book, $sti->{'author'});
  
    $book['sequences']=array();
    if ($ti && $ti->{'sequence'}) fb2p_parse_sequences1($book, $ti->{'sequence'});
    if ($di && $di->{'sequence'}) fb2p_parse_sequences1($book, $di->{'sequence'});
    if ($pi && $pi->{'sequence'}) fb2p_parse_sequences1($book, $pi->{'sequence'});
    if ($sti && $sti->{'sequence'}) fb2p_parse_sequences1($book, $sti->{'sequence'});
  
    unset($ti);
    unset($di);
    unset($pi);
    unset($sti);
  } else $book=false;
  unset($xml);
  unset($local_data);

  return $book;
}

function fb2p_parse_sequences1(&$book, $s_obj) {
  $seq=false;
  for ($i=0;$i<count($s_obj);$i++) {
     $seq=$s_obj[$i]->attributes();
     $book['sequences'][]=array(fb2p_un_utf((string)$seq['name']), (int)fb2p_un_utf((string)$seq['number']));
     unset($seq);
  }
  unset($seq);
}

function fb2p_parse_genres1(&$book, $g_obj) {
  $gstr='';
  $gname='';
  $gnames=false;
  for ($i=0;$i<count($g_obj);$i++) {
     $gstr=fb2p_un_utf((string)$g_obj[$i]);
     $gnames=explode(',', $gstr);
     foreach ($gnames as $gname) {
       $gname=trim($gname);
       if ($gname<>'') $book['genres'][]=$gname;
     }
  }
  unset($gstr);
  unset($gname);
  unset($gnames);
}

function fb2p_parse_autors1(&$book, $a_obj) {
  $a=false;
  for ($i=0;$i<count($a_obj);$i++) {
     $a=$a_obj[$i];
     $book['authors'][]=array(
                            fb2p_un_utf((string)$a->{'last-name'}[0]),
                            fb2p_un_utf((string)$a->{'first-name'}[0]),
                            fb2p_un_utf((string)$a->{'middle-name'}[0]),
                            fb2p_un_utf((string)$a->{'nickname'}[0]),
                            fb2p_un_utf((string)$a->{'home-page'}[0]),
                            fb2p_un_utf((string)$a->{'email'}[0]),
                            );
  }
  unset($a);
}


} // end incl_h
?>
