aboutsummaryrefslogtreecommitdiff
path: root/bibtex.php
diff options
context:
space:
mode:
Diffstat (limited to 'bibtex.php')
-rw-r--r--bibtex.php545
1 files changed, 545 insertions, 0 deletions
diff --git a/bibtex.php b/bibtex.php
new file mode 100644
index 0000000..ef81614
--- /dev/null
+++ b/bibtex.php
@@ -0,0 +1,545 @@
+<?php
+
+class BibtexParser
+{
+
+ public $STRINGS = array(); // @STRING(matveev="В. И. Матвеев") -> 'matveev' => "В. И. Матвеев"
+ protected $STRINGS_o = array(); // 'jetp' => 1 - to sort by journal importance
+ public $ENTRIES = array();
+ public $SELECTION = array();
+
+
+ /*
+ * Expand strings like 'gusarevich # " and " # matveev',
+ * substituting 'gusarevich' and 'matveev'
+ * from $STRINGS
+ *
+ */
+ public function expand_string($str)
+ {
+ $chunks = preg_split('/\s*#\s*/', $str);
+ $len = count($chunks);
+ for ($i = 0; $i < count($chunks) ; $i++)
+ {
+ if (preg_match('/"(.*?)"/', $chunks[$i], $matches))
+ {
+ $chunks[$i] = $matches[1];
+ }
+ elseif (isset($this->STRINGS[$chunks[$i]])) // not !empty(), but isset() !
+ {
+ $chunks[$i] = $this->STRINGS[$chunks[$i]];
+ }
+ }
+
+ $r = implode($chunks);
+ return $r;
+ }
+
+
+ /*
+ * Parse a line of BiBTeX data
+ * and collect strings and bib-entries
+ *
+ */
+ protected function parse_string($line)
+ {
+ static $bibent = '';
+ static $string_no = 0;
+
+ if (preg_match('/@STRING\s*\((.+?)\s*=\s*"(.*?)"\)/u', $line, $matches))
+ {
+ $this->STRINGS[$matches[1]] = $matches[2];
+ $this->STRINGS_o[$matches[1]] = $string_no++;
+ }
+ elseif (preg_match('/@(\w+?)\s*\{\s*(\w+?)\s*,/u', $line, $matches))
+ { // TODO: Ignore wrong fields
+ $bibent = $matches[2];
+ $this->ENTRIES[$bibent]['entry'] = strtolower($matches[1]);
+ $this->ENTRIES[$bibent]['id'] = strtolower($matches[2]);
+ // e. g. $ENTRIES['pashev_2010_axiom']['entry'] = 'book'
+ }
+ elseif (preg_match('/(\w+?)\s*=\s*(.*?)\s*,?$/u', $line, $matches))
+ {
+ $this->ENTRIES[$bibent][strtolower($matches[1])] = $matches[2];
+ /* e. g.
+ * $ENTRIES['pashev_2010_axiom']['year'] = 2010
+ * $ENTRIES['pashev_2010_axiom']['numpages'] = 68
+ * and so on...
+ *
+ */
+ }
+ }
+
+
+ /*
+ * Read file line by line
+ * and pass every line to parse_string()
+ *
+ */
+ public function read_file($filename)
+ {
+ $handle = fopen($filename, 'rb');
+ if ($handle)
+ {
+ while (!feof($handle))
+ {
+ $line = fgets($handle);
+ $this->parse_string($line);
+ }
+ fclose($handle);
+ }
+ }
+
+ /*
+ * Read raw bibtex data (text)
+ * and pass every line to parse_string()
+ *
+ */
+ public function read_text($text)
+ {
+ if (!is_array($text))
+ {
+ $text = preg_split('/\n/u', $text);
+ }
+ foreach ($text as &$line)
+ {
+ $this->parse_string($line);
+ }
+ }
+
+ /*
+ * $SELECTION keeps only references
+ * to BiBTeX entries stored in $ENTRIES
+ *
+ * If entry is selected for the first time
+ * a new field is added - formatted HTML -
+ * which should be ready to display
+ *
+ */
+ public function select($search = array())
+ {
+ foreach ($this->ENTRIES as &$entry)
+ {
+ $select = true;
+ foreach ($search as $key => $value)
+ {
+ $key = strtolower($key);
+ if (!empty($entry[$key]) && !preg_match($value, $entry[$key]))
+ {
+ $select = false;
+ break;
+ }
+ }
+ if ($select)
+ {
+ if (!!empty($entry['html']))
+ {
+ $entry['html'] = $this->format($entry);
+ }
+ $this->SELECTION[] = $entry;
+ }
+ }
+ }
+
+ public function latex2html($text)
+ {
+ $text = preg_replace('/([^\\\\])~/u', '\\1&nbsp;', $text);
+ $text = preg_replace('/<<(.*?)>>/u', '«\1»', $text);
+ $text = preg_replace('/(\d+)\s*-{1,3}\s*(\d+)/u', '\1&ndash;\2', $text);
+ $text = preg_replace('/---/u', '&mdash;', $text);
+ $text = preg_replace('/--/u', '&ndash;', $text);
+ $text = preg_replace('/\^\{(.+?)\}/u', '<sup>\1</sup>', $text);
+ $text = preg_replace('/_\{(.+?)\}/u', '<sub>\1</sub>', $text);
+ $text = preg_replace('/\$(.+?)\$/u', '<tt>\1</tt>', $text);
+
+ $text = preg_replace('/\{(.*?)\}/u', '\1', $text);
+
+ return $text;
+ }
+
+
+ /*
+ * Format one BiBTeX entry in HTML
+ *
+ */
+ public function format($entry)
+ {
+ $res = 'This is an abstract method';
+ return $res;
+ }
+
+
+}
+
+
+/*
+ * Example class for very special purpose
+ *
+ */
+class BibtexParserGoga extends BibtexParser
+{
+ protected $entry;
+
+ protected $I18N = array(
+ 'p.' => array('russian' => 'с.'),
+ 'pp.' => array('russian' => 'с.'),
+ 'P.' => array('russian' => 'С.'),
+ 'Pp.' => array('russian' => 'С.'),
+ 'Vol.' => array('russian' => 'Т.'),
+ 'no.' => array('russian' => '№'),
+ 'et&nbsp;al.' => array('russian' => 'и&nbsp;др.'),
+ 'Ed.&nbsp;by' => array('russian' => 'Под&nbsp;ред.'),
+ );
+
+ protected function _($str)
+ {
+ if (empty($this->entry['language'])) {return $str;};
+ if (empty($this->I18N[$str])) {return $str;};
+ if (empty($this->I18N[$str][$this->entry['language']])) {return $str;};
+
+ return $this->I18N[$str][$this->entry['language']];
+ }
+
+
+ /*
+ * Compare entries for sorting
+ *
+ */
+ protected function cmp_entries(&$a, &$b)
+ {
+ // by year (if range - by last year)
+ $x = preg_match('/.*([0-9]{4})/ui', $a['year'], $matches) ? $matches[1] : 0;
+ $y = preg_match('/.*([0-9]{4})/ui', $b['year'], $matches) ? $matches[1] : 0;
+ // die ("$x < $y");
+ if ($x > $y) {return -1;};
+ if ($x < $y) {return 1;};
+
+ // by entry type
+ $type = array (
+ 'article' => 10,
+ 'book' => 20,
+ 'inbook' => 30,
+ 'booklet' => 40,
+ 'inproceedings' => 50,
+ 'grant' => 1000, // for grants, not for publications ;-)
+ 'misc' => 999999, // We use misc for articles in non-reviewed journals
+ );
+ $x = $type[$a['entry']]; // FIXME : other entry type if needed
+ $y = $type[$b['entry']];
+ if ($x < $y) {return -1;};
+ if ($x > $y) {return 1;};
+
+
+ // by journal importance,
+ // which is defined by order of @STRING commands for BiBTeX
+ // (strings are stored in $this->STRINGS)
+ $x = empty($a['journal']) ? 'NONE' : $a['journal'];
+ $y = empty($b['journal']) ? 'NONE' : $b['journal'];
+ // Not a journal. Maybe grant?
+ if (($x === 'NONE') && ($y === 'NONE'))
+ { // 'organization' is my (Igor's) extention
+ $x = empty($a['organization']) ? 'NONE' : $a['organization'];
+ $y = empty($b['organization']) ? 'NONE' : $b['organization'];
+ }
+ $x = empty($this->STRINGS_o[$x]) ? 999999 : $this->STRINGS_o[$x];
+ $y = empty($this->STRINGS_o[$y]) ? 999999 : $this->STRINGS_o[$y];
+ if ($x < $y) {return -1;};
+ if ($x > $y) {return 1;};
+
+ return 0;
+ }
+
+ public function sort()
+ {
+ usort($this->SELECTION, array($this, 'cmp_entries'));
+ }
+
+ protected function format_field_default($field)
+ {
+ $this->entry[$field] = $this->latex2html(
+ $this->expand_string($this->entry[$field])
+ );
+ }
+
+ protected function format_pages()
+ {
+ $this->format_field_default('pages');
+ $pp = preg_match('/\d+\D+\d+/', $this->entry['pages']) ? $this->_('Pp.') : $this->_('P.');
+ $this->entry['pages'] = $pp . '&nbsp;' . $this->entry['pages'];
+ }
+
+ protected function format_numpages()
+ {
+ $this->format_field_default('numpages');
+ $this->entry['numpages'] = $this->entry['numpages'] . '&nbsp;'
+ . $this->_(($this->entry['numpages'] > 1) ? 'pp.' : 'p.');
+ }
+
+ protected function format_editor()
+ {
+ $this->format_field_default('editor');
+ $this->entry['editor'] = $this->_('Ed.&nbsp;by') . '&nbsp;' . $this->entry['editor'];
+ }
+
+ protected function format_volume()
+ {
+ $this->format_field_default('volume');
+ $this->entry['volume'] = $this->_('Vol.') . '&nbsp;' . $this->entry['volume'];
+ }
+
+ protected function format_url()
+ {
+ $this->format_field_default('url');
+ $this->entry['url'] = ' <a href="' . $this->entry['url']
+ . '" >' . htmlentities(urldecode($this->entry['url'])) . '</a>';
+ }
+
+ protected function format_number()
+ {
+ $this->format_field_default('number');
+ $this->entry['number'] = $this->_('no.') . '&nbsp;' . $this->entry['number'];
+ }
+
+ protected function format_author1($author)
+ {
+ $res = '';
+ $res = $this->latex2html($author);
+ return $res;
+ }
+
+ protected function format_author()
+ {
+ $res = '';
+ $authors_array = preg_split('/\s+and\s+/',
+ $this->expand_string($this->entry['author']));
+
+ $this->entry['count_authors'] = count($authors_array);
+ array_splice($authors_array, 3);
+
+ foreach ($authors_array as &$a)
+ {
+ $a = $this->format_author1($a);
+ }
+
+ $res = implode(', ', $authors_array);
+ if ($this->entry['count_authors'] > 3)
+ {
+ $res .= ' ' . $this->_('et&nbsp;al.');
+ }
+
+ $this->entry['author'] = $res;
+ }
+
+ /*
+ * Format one BiBTeX entry in HTML
+ *
+ */
+ public function format(&$entry)
+ {
+ $res = '';
+
+ // test
+ // $entry = $this->SELECTION[0];
+ // test
+
+ $this->entry = $entry;
+
+ foreach ($this->entry as $field => $value)
+ {
+ $method = "format_$field";
+ if (method_exists($this, $method))
+ {
+ $this->$method(); // prepare a field for final HTML output
+ }
+ else
+ {
+ $this->format_field_default($field);
+ }
+ }
+
+ $method = 'format_' . $this->entry['entry'];
+ if (method_exists($this, $method))
+ {
+ $res = $this->$method();
+ }
+ else
+ {
+ $res = 'Not implemented for ' . $this->entry['entry'];
+ }
+
+ $res .= '.';
+ if (!empty($this->entry['url']))
+ {
+ $res .= $this->entry['url'];
+ }
+ $res = preg_replace('/\.(<[^>]+>)*?\.+/', '.\1', $res);
+ return $res;
+ }
+
+ protected function format_book()
+ {
+ $parts = array(); // All parts are connected with '.&nbsp;&mdash; '
+
+ $part = '';
+ // FIXME : If $this->entry['count_authors'] > 3, place them after title?
+ if (!empty($this->entry['author']))
+ {
+ $part = '<em>' . $this->entry['author'] . '</em>';
+ }
+ if (!empty($this->entry['title']))
+ {
+ $part .= (empty($part) ? '' : '. ') . $this->entry['title'];
+ }
+ if (!empty($this->entry['editor']))
+ {
+ $part .= '&nbsp;/ ' . $this->entry['editor'];
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['edition']))
+ {
+ $parts[] = $this->entry['edition'];
+ }
+
+ if (!empty($this->entry['volume']))
+ {
+ $parts[] = $this->entry['volume'];
+ }
+
+ $part = '';
+ if (!empty($this->entry['address']))
+ {
+ $part .= $this->entry['address'];
+ }
+ if (!empty($this->entry['publisher']))
+ {
+ $part .= (empty($part) ? '' : ': ') . $this->entry['publisher'];
+ }
+ if (!empty($this->entry['year'])) // We are ignoring month
+ {
+ $part .= (empty($part) ? '' : ', ') . $this->entry['year'];
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['numpages']))
+ {
+ $parts[] = $this->entry['numpages'];
+ }
+ elseif (!empty($this->entry['pages']))
+ {
+ $parts[] = $this->entry['pages'];
+ }
+
+
+
+ return implode('.&nbsp;&mdash; ', $parts);
+ }
+
+ protected function format_article()
+ {
+ $parts = array(); // All parts are connected with '.&nbsp;&mdash; '
+
+ $part = '';
+ // FIXME : If $this->entry['count_authors'] > 3, place them after title?
+ if (!empty($this->entry['author']))
+ {
+ $part = '<em>' . $this->entry['author'] . '</em>';
+ }
+ if (!empty($this->entry['title']))
+ {
+ $part .= (empty($part) ? '' : '. ') . $this->entry['title'];
+ }
+ if (!empty($this->entry['journal']))
+ {
+ $part .= '&nbsp;// <em>' . $this->entry['journal'] . '</em>';
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['year']))
+ {
+ $parts[] = $this->entry['year'];
+ }
+
+ if (!empty($this->entry['manth']))
+ {
+ $parts[] = $this->entry['month'];
+ }
+
+ $part = '';
+ if (!empty($this->entry['volume']))
+ {
+ $part .= $this->entry['volume'];
+ }
+ if (!empty($this->entry['number']))
+ {
+ $part .= (empty($part) ? '' : ', ') . $this->entry['number'];
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['pages']))
+ {
+ $parts[] = $this->entry['pages'];
+ }
+
+ return implode('.&nbsp;&mdash; ', $parts);
+ }
+
+
+ protected function format_inproceedings()
+ {
+ $parts = array(); // All parts are connected with '.&nbsp;&mdash; '
+
+ $part = '';
+ if (!empty($this->entry['author']))
+ {
+ $part = '<em>' . $this->entry['author'] . '</em>';
+ }
+ if (!empty($this->entry['title']))
+ {
+ $part .= (empty($part) ? '' : '. ') . $this->entry['title'];
+ }
+ if (!empty($this->entry['booktitle']))
+ {
+ $part .= '&nbsp;// ' . $this->entry['booktitle'];
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['volume']))
+ {
+ $parts[] = $this->entry['volume'];
+ }
+
+ $part = '';
+ if (!empty($this->entry['address']))
+ {
+ $part .= $this->entry['address'];
+ }
+ if (!empty($this->entry['year'])) // We are ignoring month
+ {
+ $part .= (empty($part) ? '' : ', ') . $this->entry['year'];
+ }
+ $parts[] = $part;
+
+ if (!empty($this->entry['pages']))
+ {
+ $parts[] = $this->entry['pages'];
+ }
+
+ return implode('.&nbsp;&mdash; ', $parts);
+ }
+
+
+ protected function format_booklet()
+ {
+ return $this->format_book();
+ }
+
+
+ protected function format_misc()
+ {
+ return $this->format_article(); // Use @misc for articles in non-reviewed journals
+ }
+}
+
+?>
+