From 550e0062508de14939dd518c5b7d6efefaf5b655 Mon Sep 17 00:00:00 2001 From: Igor Date: Sun, 31 Oct 2010 13:34:54 +0300 Subject: RAW BiBTeX - works --- bibtex.php | 545 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 bibtex.php (limited to 'bibtex.php') diff --git a/bibtex.php b/bibtex.php new file mode 100644 index 0000000..ef81614 --- /dev/null +++ b/bibtex.php @@ -0,0 +1,545 @@ + 'matveev' => "В. И. Матвеев" + protected $STRINGS_o = array(); // 'jetp' => 1 - to sort by journal importance + public $ENTRIES = array(); + public $SELECTION = array(); + + + /* + * Expand strings like 'gusarevich # " and " # matveev', + * substituting 'gusarevich' and 'matveev' + * from $STRINGS + * + */ + public function expand_string($str) + { + $chunks = preg_split('/\s*#\s*/', $str); + $len = count($chunks); + for ($i = 0; $i < count($chunks) ; $i++) + { + if (preg_match('/"(.*?)"/', $chunks[$i], $matches)) + { + $chunks[$i] = $matches[1]; + } + elseif (isset($this->STRINGS[$chunks[$i]])) // not !empty(), but isset() ! + { + $chunks[$i] = $this->STRINGS[$chunks[$i]]; + } + } + + $r = implode($chunks); + return $r; + } + + + /* + * Parse a line of BiBTeX data + * and collect strings and bib-entries + * + */ + protected function parse_string($line) + { + static $bibent = ''; + static $string_no = 0; + + if (preg_match('/@STRING\s*\((.+?)\s*=\s*"(.*?)"\)/u', $line, $matches)) + { + $this->STRINGS[$matches[1]] = $matches[2]; + $this->STRINGS_o[$matches[1]] = $string_no++; + } + elseif (preg_match('/@(\w+?)\s*\{\s*(\w+?)\s*,/u', $line, $matches)) + { // TODO: Ignore wrong fields + $bibent = $matches[2]; + $this->ENTRIES[$bibent]['entry'] = strtolower($matches[1]); + $this->ENTRIES[$bibent]['id'] = strtolower($matches[2]); + // e. g. $ENTRIES['pashev_2010_axiom']['entry'] = 'book' + } + elseif (preg_match('/(\w+?)\s*=\s*(.*?)\s*,?$/u', $line, $matches)) + { + $this->ENTRIES[$bibent][strtolower($matches[1])] = $matches[2]; + /* e. g. + * $ENTRIES['pashev_2010_axiom']['year'] = 2010 + * $ENTRIES['pashev_2010_axiom']['numpages'] = 68 + * and so on... + * + */ + } + } + + + /* + * Read file line by line + * and pass every line to parse_string() + * + */ + public function read_file($filename) + { + $handle = fopen($filename, 'rb'); + if ($handle) + { + while (!feof($handle)) + { + $line = fgets($handle); + $this->parse_string($line); + } + fclose($handle); + } + } + + /* + * Read raw bibtex data (text) + * and pass every line to parse_string() + * + */ + public function read_text($text) + { + if (!is_array($text)) + { + $text = preg_split('/\n/u', $text); + } + foreach ($text as &$line) + { + $this->parse_string($line); + } + } + + /* + * $SELECTION keeps only references + * to BiBTeX entries stored in $ENTRIES + * + * If entry is selected for the first time + * a new field is added - formatted HTML - + * which should be ready to display + * + */ + public function select($search = array()) + { + foreach ($this->ENTRIES as &$entry) + { + $select = true; + foreach ($search as $key => $value) + { + $key = strtolower($key); + if (!empty($entry[$key]) && !preg_match($value, $entry[$key])) + { + $select = false; + break; + } + } + if ($select) + { + if (!!empty($entry['html'])) + { + $entry['html'] = $this->format($entry); + } + $this->SELECTION[] = $entry; + } + } + } + + public function latex2html($text) + { + $text = preg_replace('/([^\\\\])~/u', '\\1 ', $text); + $text = preg_replace('/<<(.*?)>>/u', '«\1»', $text); + $text = preg_replace('/(\d+)\s*-{1,3}\s*(\d+)/u', '\1–\2', $text); + $text = preg_replace('/---/u', '—', $text); + $text = preg_replace('/--/u', '–', $text); + $text = preg_replace('/\^\{(.+?)\}/u', '\1', $text); + $text = preg_replace('/_\{(.+?)\}/u', '\1', $text); + $text = preg_replace('/\$(.+?)\$/u', '\1', $text); + + $text = preg_replace('/\{(.*?)\}/u', '\1', $text); + + return $text; + } + + + /* + * Format one BiBTeX entry in HTML + * + */ + public function format($entry) + { + $res = 'This is an abstract method'; + return $res; + } + + +} + + +/* + * Example class for very special purpose + * + */ +class BibtexParserGoga extends BibtexParser +{ + protected $entry; + + protected $I18N = array( + 'p.' => array('russian' => 'с.'), + 'pp.' => array('russian' => 'с.'), + 'P.' => array('russian' => 'С.'), + 'Pp.' => array('russian' => 'С.'), + 'Vol.' => array('russian' => 'Т.'), + 'no.' => array('russian' => '№'), + 'et al.' => array('russian' => 'и др.'), + 'Ed. by' => array('russian' => 'Под ред.'), + ); + + protected function _($str) + { + if (empty($this->entry['language'])) {return $str;}; + if (empty($this->I18N[$str])) {return $str;}; + if (empty($this->I18N[$str][$this->entry['language']])) {return $str;}; + + return $this->I18N[$str][$this->entry['language']]; + } + + + /* + * Compare entries for sorting + * + */ + protected function cmp_entries(&$a, &$b) + { + // by year (if range - by last year) + $x = preg_match('/.*([0-9]{4})/ui', $a['year'], $matches) ? $matches[1] : 0; + $y = preg_match('/.*([0-9]{4})/ui', $b['year'], $matches) ? $matches[1] : 0; + // die ("$x < $y"); + if ($x > $y) {return -1;}; + if ($x < $y) {return 1;}; + + // by entry type + $type = array ( + 'article' => 10, + 'book' => 20, + 'inbook' => 30, + 'booklet' => 40, + 'inproceedings' => 50, + 'grant' => 1000, // for grants, not for publications ;-) + 'misc' => 999999, // We use misc for articles in non-reviewed journals + ); + $x = $type[$a['entry']]; // FIXME : other entry type if needed + $y = $type[$b['entry']]; + if ($x < $y) {return -1;}; + if ($x > $y) {return 1;}; + + + // by journal importance, + // which is defined by order of @STRING commands for BiBTeX + // (strings are stored in $this->STRINGS) + $x = empty($a['journal']) ? 'NONE' : $a['journal']; + $y = empty($b['journal']) ? 'NONE' : $b['journal']; + // Not a journal. Maybe grant? + if (($x === 'NONE') && ($y === 'NONE')) + { // 'organization' is my (Igor's) extention + $x = empty($a['organization']) ? 'NONE' : $a['organization']; + $y = empty($b['organization']) ? 'NONE' : $b['organization']; + } + $x = empty($this->STRINGS_o[$x]) ? 999999 : $this->STRINGS_o[$x]; + $y = empty($this->STRINGS_o[$y]) ? 999999 : $this->STRINGS_o[$y]; + if ($x < $y) {return -1;}; + if ($x > $y) {return 1;}; + + return 0; + } + + public function sort() + { + usort($this->SELECTION, array($this, 'cmp_entries')); + } + + protected function format_field_default($field) + { + $this->entry[$field] = $this->latex2html( + $this->expand_string($this->entry[$field]) + ); + } + + protected function format_pages() + { + $this->format_field_default('pages'); + $pp = preg_match('/\d+\D+\d+/', $this->entry['pages']) ? $this->_('Pp.') : $this->_('P.'); + $this->entry['pages'] = $pp . ' ' . $this->entry['pages']; + } + + protected function format_numpages() + { + $this->format_field_default('numpages'); + $this->entry['numpages'] = $this->entry['numpages'] . ' ' + . $this->_(($this->entry['numpages'] > 1) ? 'pp.' : 'p.'); + } + + protected function format_editor() + { + $this->format_field_default('editor'); + $this->entry['editor'] = $this->_('Ed. by') . ' ' . $this->entry['editor']; + } + + protected function format_volume() + { + $this->format_field_default('volume'); + $this->entry['volume'] = $this->_('Vol.') . ' ' . $this->entry['volume']; + } + + protected function format_url() + { + $this->format_field_default('url'); + $this->entry['url'] = ' ' . htmlentities(urldecode($this->entry['url'])) . ''; + } + + protected function format_number() + { + $this->format_field_default('number'); + $this->entry['number'] = $this->_('no.') . ' ' . $this->entry['number']; + } + + protected function format_author1($author) + { + $res = ''; + $res = $this->latex2html($author); + return $res; + } + + protected function format_author() + { + $res = ''; + $authors_array = preg_split('/\s+and\s+/', + $this->expand_string($this->entry['author'])); + + $this->entry['count_authors'] = count($authors_array); + array_splice($authors_array, 3); + + foreach ($authors_array as &$a) + { + $a = $this->format_author1($a); + } + + $res = implode(', ', $authors_array); + if ($this->entry['count_authors'] > 3) + { + $res .= ' ' . $this->_('et al.'); + } + + $this->entry['author'] = $res; + } + + /* + * Format one BiBTeX entry in HTML + * + */ + public function format(&$entry) + { + $res = ''; + + // test + // $entry = $this->SELECTION[0]; + // test + + $this->entry = $entry; + + foreach ($this->entry as $field => $value) + { + $method = "format_$field"; + if (method_exists($this, $method)) + { + $this->$method(); // prepare a field for final HTML output + } + else + { + $this->format_field_default($field); + } + } + + $method = 'format_' . $this->entry['entry']; + if (method_exists($this, $method)) + { + $res = $this->$method(); + } + else + { + $res = 'Not implemented for ' . $this->entry['entry']; + } + + $res .= '.'; + if (!empty($this->entry['url'])) + { + $res .= $this->entry['url']; + } + $res = preg_replace('/\.(<[^>]+>)*?\.+/', '.\1', $res); + return $res; + } + + protected function format_book() + { + $parts = array(); // All parts are connected with '. — ' + + $part = ''; + // FIXME : If $this->entry['count_authors'] > 3, place them after title? + if (!empty($this->entry['author'])) + { + $part = '' . $this->entry['author'] . ''; + } + if (!empty($this->entry['title'])) + { + $part .= (empty($part) ? '' : '. ') . $this->entry['title']; + } + if (!empty($this->entry['editor'])) + { + $part .= ' / ' . $this->entry['editor']; + } + $parts[] = $part; + + if (!empty($this->entry['edition'])) + { + $parts[] = $this->entry['edition']; + } + + if (!empty($this->entry['volume'])) + { + $parts[] = $this->entry['volume']; + } + + $part = ''; + if (!empty($this->entry['address'])) + { + $part .= $this->entry['address']; + } + if (!empty($this->entry['publisher'])) + { + $part .= (empty($part) ? '' : ': ') . $this->entry['publisher']; + } + if (!empty($this->entry['year'])) // We are ignoring month + { + $part .= (empty($part) ? '' : ', ') . $this->entry['year']; + } + $parts[] = $part; + + if (!empty($this->entry['numpages'])) + { + $parts[] = $this->entry['numpages']; + } + elseif (!empty($this->entry['pages'])) + { + $parts[] = $this->entry['pages']; + } + + + + return implode('. — ', $parts); + } + + protected function format_article() + { + $parts = array(); // All parts are connected with '. — ' + + $part = ''; + // FIXME : If $this->entry['count_authors'] > 3, place them after title? + if (!empty($this->entry['author'])) + { + $part = '' . $this->entry['author'] . ''; + } + if (!empty($this->entry['title'])) + { + $part .= (empty($part) ? '' : '. ') . $this->entry['title']; + } + if (!empty($this->entry['journal'])) + { + $part .= ' // ' . $this->entry['journal'] . ''; + } + $parts[] = $part; + + if (!empty($this->entry['year'])) + { + $parts[] = $this->entry['year']; + } + + if (!empty($this->entry['manth'])) + { + $parts[] = $this->entry['month']; + } + + $part = ''; + if (!empty($this->entry['volume'])) + { + $part .= $this->entry['volume']; + } + if (!empty($this->entry['number'])) + { + $part .= (empty($part) ? '' : ', ') . $this->entry['number']; + } + $parts[] = $part; + + if (!empty($this->entry['pages'])) + { + $parts[] = $this->entry['pages']; + } + + return implode('. — ', $parts); + } + + + protected function format_inproceedings() + { + $parts = array(); // All parts are connected with '. — ' + + $part = ''; + if (!empty($this->entry['author'])) + { + $part = '' . $this->entry['author'] . ''; + } + if (!empty($this->entry['title'])) + { + $part .= (empty($part) ? '' : '. ') . $this->entry['title']; + } + if (!empty($this->entry['booktitle'])) + { + $part .= ' // ' . $this->entry['booktitle']; + } + $parts[] = $part; + + if (!empty($this->entry['volume'])) + { + $parts[] = $this->entry['volume']; + } + + $part = ''; + if (!empty($this->entry['address'])) + { + $part .= $this->entry['address']; + } + if (!empty($this->entry['year'])) // We are ignoring month + { + $part .= (empty($part) ? '' : ', ') . $this->entry['year']; + } + $parts[] = $part; + + if (!empty($this->entry['pages'])) + { + $parts[] = $this->entry['pages']; + } + + return implode('. — ', $parts); + } + + + protected function format_booklet() + { + return $this->format_book(); + } + + + protected function format_misc() + { + return $this->format_article(); // Use @misc for articles in non-reviewed journals + } +} + +?> + -- cgit v1.2.3