aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2022-08-28 15:40:21 +0200
committerIgor Pashev <pashev.igor@gmail.com>2022-09-25 20:23:05 +0200
commitf0881305402f3dff56f99fcc4cbc8b615581b780 (patch)
tree5d475e2e95138f63ad249f4d44a453999c1c14f6
downloadmendeleev-f0881305402f3dff56f99fcc4cbc8b615581b780.tar.gz
C, Haskell, Python, Fortran
-rw-r--r--LICENSE13
-rw-r--r--Makefile108
-rw-r--r--README38
-rw-r--r--README_ru36
-rw-r--r--mendeleev.c196
-rw-r--r--mendeleev.f90200
-rw-r--r--mendeleev.hs71
-rw-r--r--mendeleev.py102
-rw-r--r--test.txt36
9 files changed, 800 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c6c7def
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+ DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+ Version 2, December 2004
+
+ Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+
+ Everyone is permitted to copy and distribute verbatim or modified
+ copies of this license document, and changing it is allowed as long
+ as the name is changed.
+
+ DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. You just DO WHAT THE FUCK YOU WANT TO.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c2abbd8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,108 @@
+BINARIES := \
+ mendeleev-c \
+ mendeleev-c-cpp \
+ mendeleev-f \
+ mendeleev-hs
+
+SCRIPTS := \
+ python2:mendeleev.py \
+ python3:mendeleev.py \
+ pypy:mendeleev.py
+
+.PHONY: build
+build: $(BINARIES)
+
+RM := rm -f -v
+MV := mv -f -v
+.PHONY: clean
+clean:
+ $(RM) $(BINARIES)
+ $(RM) prof-*
+
+
+# Testing
+TEST_FILE := test.txt
+WORDS := $(shell awk -F: '/:/ {print "\""$$1"\""}' $(TEST_FILE))
+EMPTY :=
+SPACE := $(EMPTY) $(EMPTY)
+.PHONY: test
+define mktest
+test: test-$(subst $(SPACE),-,$(1))
+.PHONY: test-$(subst $(SPACE),-,$(1))
+test-$(subst $(SPACE),-,$(1)): $(lastword $(1))
+ $(wordlist 2, $(words $(1)), _ $(1)) ./$(lastword $(1)) $(WORDS) | diff -u $(TEST_FILE) -
+endef
+$(foreach t,$(SCRIPTS),$(eval $(call mktest,$(subst :, ,$(t)))))
+$(foreach t,$(BINARIES),$(eval $(call mktest,$(t))))
+
+
+# Profiling
+PROF_TEST := hehehehehehehehehehehehehehehehe
+.PHONY: prof
+prof: \
+ prof-mendeleev-c.txt \
+ prof-mendeleev-f.txt \
+ prof-mendeleev-hs.txt \
+ prof-mendeleev-py.txt
+
+%.gmon: %
+ $(RM) $<-gmon.*
+ GMON_OUT_PREFIX=$<-gmon ./$< $(PROF_TEST) > /dev/null
+ $(MV) $<-gmon.* $@
+
+prof-mendeleev-py.dat: mendeleev.py
+ python3 -m cProfile -o $@ $< $(PROF_TEST) > /dev/null
+
+prof-mendeleev-py.txt: prof-mendeleev-py.dat
+ python3 -c 'import pstats; pstats.Stats("$<").sort_stats("tottime").print_stats()' > $@
+
+
+# C
+CC = gcc
+CFLAGS = -std=c99 -Wall -Wextra -O2
+mendeleev-c: mendeleev.c
+ $(CC) $(CFLAGS) $< -o $@
+
+CFLAGS_PROF = -O0 -g -pg
+prof-mendeleev-c: mendeleev.c
+ $(CC) $(CFLAGS_PROF) $< -o $@
+
+prof-mendeleev-c.txt: prof-mendeleev-c.gmon
+ gprof --brief prof-mendeleev-c $< > $@
+
+
+# C++
+CXX = g++
+CXXFLAGS = -std=c++98 -Wall -Wextra -O2
+mendeleev-c-cpp: mendeleev.c
+ $(CXX) $(CXXFLAGS) $< -o $@
+
+
+# Fortran
+FC = gfortran
+FFLAGS = -std=f2003 -Wall -Wextra -O2
+mendeleev-f: mendeleev.f90
+ $(FC) $(FFLAGS) $< -o $@
+
+FFLAGS_PROF = -O0 -g -pg -fcheck=all
+prof-mendeleev-f: mendeleev.f90
+ $(FC) $(FFLAGS_PROF) $< -o $@
+
+prof-mendeleev-f.txt: prof-mendeleev-f.gmon
+ gprof --brief prof-mendeleev-f $< > $@
+
+
+# Haskell
+HC = ghc
+HCFLAGS = -XHaskell98 -no-keep-hi-files -no-keep-o-files -Wall -O2
+mendeleev-hs: mendeleev.hs
+ $(HC) $(HCFLAGS) $< -o $@
+
+HCFLAGS_PROF = -prof -fprof-auto -rtsopts -no-keep-hi-files -no-keep-o-files
+prof-mendeleev-hs: mendeleev.hs
+ $(HC) $(HCFLAGS_PROF) $< -o $@
+
+prof-mendeleev-hs.txt: prof-mendeleev-hs
+ ./$< +RTS -p -RTS $(PROF_TEST) > /dev/null
+ $(MV) $<.prof $@
+
diff --git a/README b/README
new file mode 100644
index 0000000..2205d7c
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+Mendeleev — find chemical elements in words.
+
+For example, the word "Moscow" can be split in the following ways:
+ Mo S C O W,
+ Mo S Co W,
+ Mo Sc O W,
+where:
+ Mo — Molybdenum,
+ S — Sulfur,
+ Sc — Scandium,
+ C — Carbon,
+ Co — Cobalt,
+ W — Wolfram (Tungsten).
+
+When a character cannot be found among the chemical elements, it's replaced
+with a question mark:
+ language -> La N ? U Ag ?
+
+There are implementations in several programming languages. Each program
+accepts words as arguments and prints all the possible combinations,
+for example:
+
+ $ ./mendeleev-c preprocessor
+ preprocessor:
+ P Re P ? O C Es S O ?
+ P Re P ? O Ce S S O ?
+ P Re Pr O C Es S O ?
+ P Re Pr O Ce S S O ?
+ Pr ? P ? O C Es S O ?
+ Pr ? P ? O Ce S S O ?
+ Pr ? Pr O C Es S O ?
+ Pr ? Pr O Ce S S O ?
+
+This software is Public Domain. See the LICENSE file for details.
+
+Dmitri Ivanovich Mendeleev (1834–1907) was a Russian chemist and inventor
+known for formulating the Periodic Law.
+
diff --git a/README_ru b/README_ru
new file mode 100644
index 0000000..3e29715
--- /dev/null
+++ b/README_ru
@@ -0,0 +1,36 @@
+«Менделеев» — поиск химических элементов в словах.
+
+Например, в английском слове "Moscow" (Москва) можно обнаружить следующие элементы:
+ Mo S C O W,
+ Mo S Co W,
+ Mo Sc O W,
+где:
+ Mo — молибден,
+ S — сера,
+ Sc — скандий,
+ C — углерод,
+ Co — кобальт,
+ W — вольфрам.
+
+Символы, не подходящие ни для одного элемента, обозначаются вопросительными знаками:
+ language -> La N ? U Ag ?
+
+Алгоритм реализован на нескольких языках программирования. Каждая программа
+принимает список слов в виде аргументов командной строки и выводит все
+все возможные комбинации элементов. Например:
+
+ $ ./mendeleev-c preprocessor
+ preprocessor:
+ P Re P ? O C Es S O ?
+ P Re P ? O Ce S S O ?
+ P Re Pr O C Es S O ?
+ P Re Pr O Ce S S O ?
+ Pr ? P ? O C Es S O ?
+ Pr ? P ? O Ce S S O ?
+ Pr ? Pr O C Es S O ?
+ Pr ? Pr O Ce S S O ?
+
+Разрешается использовать этот код в любых целях без каких бы то ни было условий.
+
+Дмитрий Иванович Менделеев (1834–1907) — русский учёный, открывший периодический
+закон химических элементов.
diff --git a/mendeleev.c b/mendeleev.c
new file mode 100644
index 0000000..d90ee23
--- /dev/null
+++ b/mendeleev.c
@@ -0,0 +1,196 @@
+#include<ctype.h>
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+static const char *const ELEMENTS[] = { "?",
+ "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+ "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+ "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+ "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+ "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+ "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+ "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+ "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+ "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+ "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+};
+
+static const size_t NELEMENTS = sizeof (ELEMENTS) / sizeof (const char *) - 1;
+
+typedef struct formula
+{
+ const char *tail;
+ size_t n;
+ size_t *els;
+ struct formula *next;
+} formula_t;
+
+
+static void
+search (size_t *start, size_t *len, size_t shift, char c)
+{
+ size_t l, m, u;
+ int c_ = tolower (c);
+
+ u = *start + *len;
+ l = *start;
+ while (l < u)
+ {
+ m = (l + u) / 2;
+ if (tolower (ELEMENTS[m][shift]) < c_)
+ l = m + 1;
+ else
+ u = m;
+ }
+
+ if ((l == *start + *len) || (tolower (ELEMENTS[l][shift]) != c_))
+ {
+ *len = 0;
+ return;
+ }
+
+ u = *start + *len;
+ *start = l;
+ while (l < u)
+ {
+ m = (l + u) / 2;
+ if (c_ < tolower (ELEMENTS[m][shift]))
+ u = m;
+ else
+ l = m + 1;
+ }
+
+ *len = u - *start;
+}
+
+static formula_t *
+new_formula (const char *tail, size_t n, const size_t *els)
+{
+ formula_t *f = (formula_t *) malloc (sizeof (formula_t));
+ if (!f)
+ return NULL;
+
+ f->els = (size_t *) malloc (sizeof (*f->els) * (n + strlen (tail)));
+ if (!f->els)
+ {
+ free (f);
+ return NULL;
+ }
+
+ if (n > 0)
+ (void) memcpy (f->els, els, n * sizeof (*f->els));
+
+ f->n = n;
+ f->tail = tail;
+ f->next = NULL;
+
+ return f;
+}
+
+static void
+free_formula (formula_t * f)
+{
+ while (f)
+ {
+ formula_t *next = f->next;
+ if (f->els)
+ free (f->els);
+ free (f);
+ f = next;
+ }
+}
+
+static void
+advance (formula_t * f)
+{
+ const char *tail = f->tail;
+ size_t n = f->n;
+
+ size_t start = 1;
+ size_t len = NELEMENTS;
+ size_t shift = 0;
+
+ while (tail[shift])
+ {
+ search (&start, &len, shift, tail[shift]);
+ if (!len)
+ break;
+
+ shift++;
+ if (!ELEMENTS[start][shift])
+ {
+ if (n != f->n)
+ {
+ formula_t *g = new_formula (tail, n, f->els);
+ if (!g)
+ break;
+ g->next = f->next;
+ f->next = g;
+ f = g;
+ }
+
+ f->els[f->n++] = start;
+ f->tail = &tail[shift];
+ start++;
+ len--;
+ }
+ }
+
+ if (n == f->n)
+ {
+ f->els[f->n++] = 0;
+ f->tail += 1;
+ }
+}
+
+static formula_t *
+explode (const char *word)
+{
+ formula_t *formula = new_formula (word, 0, NULL);
+ if (!formula)
+ return NULL;
+
+ while (*word)
+ {
+ word = NULL;
+ formula_t *f = formula;
+ while (f)
+ {
+ if (*f->tail)
+ {
+ advance (f);
+ if (!word)
+ word = f->tail;
+ }
+ f = f->next;
+ }
+ }
+
+ return formula;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ for (int w = 1; w < argc; w++)
+ {
+ const char *word = argv[w];
+ printf ("%s:\n", word);
+
+ formula_t *formula = explode (word);
+ if (!formula)
+ return EXIT_FAILURE;
+
+ for (formula_t * f = formula; f; f = f->next)
+ {
+ for (size_t i = 0; i < f->n; i++)
+ printf (" %s", ELEMENTS[f->els[i]]);
+ if (f->n)
+ printf ("\n");
+ }
+ free_formula (formula);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/mendeleev.f90 b/mendeleev.f90
new file mode 100644
index 0000000..3108b5e
--- /dev/null
+++ b/mendeleev.f90
@@ -0,0 +1,200 @@
+program mendeleev
+
+ implicit none
+
+ character(len=2), dimension(0:118), parameter :: ELEMENTS = (/ "? ", &
+ "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B ", "Ba", "Be", "Bh", &
+ "Bi", "Bk", "Br", "C ", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co", &
+ "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F ", "Fe", "Fl", &
+ "Fm", "Fr", "Ga", "Gd", "Ge", "H ", "He", "Hf", "Hg", "Ho", "Hs", "I ", &
+ "In", "Ir", "K ", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg", &
+ "Mn", "Mo", "Mt", "N ", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np", &
+ "O ", "Og", "Os", "P ", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu", &
+ "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S ", "Sb", "Sc", "Se", &
+ "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", &
+ "Tm", "Ts", "U ", "V ", "W ", "Xe", "Y ", "Yb", "Zn", "Zr" /)
+
+ type :: formula_t
+ integer :: tail = 1
+ integer :: n = 0
+ integer, dimension(:), allocatable :: elements
+ type(formula_t), pointer :: next => null()
+ end type formula_t
+
+ type(formula_t), pointer :: formula, f
+ character(len=:), allocatable :: word
+ integer :: length, argc, i, j
+
+ argc = command_argument_count()
+ do i = 1, argc
+ call get_command_argument(i, length=length)
+ allocate(character(len=length) :: word)
+ call get_command_argument(i, value=word)
+ write (*, "(A, ':')") word
+
+ formula => explode(word)
+
+ f => formula
+ do while (associated(f))
+ if (f%n > 0) then
+ do j = 1, f%n
+ write (*, "(' ', A)", advance="no") trim(ELEMENTS(f%elements(j)))
+ end do
+ write (*, "()")
+ end if
+ f => f%next
+ end do
+
+ call free_formula(formula)
+ deallocate(word)
+ end do
+
+
+contains
+
+ pure subroutine free_formula(formula)
+ type(formula_t), pointer, intent(in out) :: formula
+
+ type(formula_t), pointer :: next
+
+ do while (associated(formula))
+ next => formula%next
+ deallocate(formula%elements)
+ deallocate(formula)
+ formula => next
+ end do
+ end subroutine free_formula
+
+
+ pure integer function tolower(c)
+ character(len=1), intent(in) :: c
+
+ integer :: i
+
+ i = iachar(c)
+ if (i >= 65 .and. i < 90) then
+ tolower = i + 32
+ else
+ tolower = i
+ endif
+ end function tolower
+
+
+ pure subroutine search(start, length, sh, c)
+ integer, intent(in out) :: start, length
+ integer, intent(in) :: sh
+ character(len=1), intent(in) :: c
+
+ integer :: l, m, u, c_
+
+ c_ = tolower(c)
+
+ u = start + length
+ l = start
+ do while (l < u)
+ m = (u + l) / 2
+ if (tolower(ELEMENTS(m)(sh:sh)) < c_) then
+ l = m + 1
+ else
+ u = m
+ endif
+ end do
+
+ if (l == start + length) then
+ length = 0
+ return
+ end if
+
+ if (tolower(ELEMENTS(l)(sh:sh)) /= c_) then
+ length = 0
+ return
+ end if
+
+ u = start + length
+ start = l
+ do while (l < u)
+ m = (u + l) / 2
+ if (c_ < tolower(ELEMENTS(m)(sh:sh))) then
+ u = m
+ else
+ l = m + 1
+ endif
+ end do
+
+ length = u - start
+ end subroutine search
+
+
+ pure subroutine advance(word, f)
+ character(len=*), intent(in) :: word
+ type(formula_t), pointer, intent(in out) :: f
+
+ integer :: n, tail
+ integer :: start, length, sh, c
+ type(formula_t), pointer :: g
+
+ tail = f%tail
+ n = f%n
+
+ sh = 0
+ start = 1
+ length = ubound(ELEMENTS, 1)
+ do
+ c = tail + sh
+ if (len(word) < c) exit
+
+ sh = sh + 1
+ call search(start, length, sh, word(c:c))
+ if (length == 0) exit
+
+ if (sh == len_trim(ELEMENTS(start))) then
+ if (n /= f%n) then
+ allocate(g)
+ allocate(g%elements(len(word)))
+ g%n = n
+ g%elements(1:n) = f%elements(1:n)
+ g%next => f%next
+ f%next => g
+ f => g
+ end if
+
+ f%n = f%n + 1
+ f%elements(f%n) = start
+ f%tail = c + 1
+ start = start + 1
+ length = length - 1
+ end if
+ end do
+
+ if (n == f%n) then
+ f%tail = f%tail + 1
+ f%n = f%n + 1
+ f%elements(f%n) = 0
+ end if
+ end subroutine advance
+
+
+ pure function explode(word) result(formula)
+ character(len=*), intent(in) :: word
+ type(formula_t), pointer :: formula
+
+ logical :: has_tail
+ type(formula_t), pointer :: f
+
+ allocate(formula)
+ allocate(formula%elements(len(word)))
+
+ do
+ f => formula
+ has_tail = .false.
+ do while (associated(f))
+ if (f%tail <= len(word)) then
+ call advance(word, f)
+ if (.not. has_tail) has_tail = f%tail <= len(word)
+ end if
+ f => f%next
+ end do
+ if (.not. has_tail) exit
+ end do
+ end function explode
+end program mendeleev
diff --git a/mendeleev.hs b/mendeleev.hs
new file mode 100644
index 0000000..cfd6ac5
--- /dev/null
+++ b/mendeleev.hs
@@ -0,0 +1,71 @@
+module Main
+ ( main
+ ) where
+
+import Data.Char (toLower)
+import System.Environment (getArgs)
+
+elements :: [String]
+elements = [
+ "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+ "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+ "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+ "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+ "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+ "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+ "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+ "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+ "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+ "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+ ]
+
+search :: Char -> [String] -> [String]
+search c = takeWhile start . dropWhile (not . start)
+ where
+ start [] = False
+ start (x:_) = toLower x == c'
+ c' = toLower c
+
+divide :: String -> [(String, String)]
+divide [] = []
+divide (x:xs) =
+ if null res
+ then [("?", xs)]
+ else res
+ where
+ res = go [] [] xs (search x elements)
+ go r _ _ [] = r
+ go r el rest candidates@(c:_) =
+ let el' = head c : el
+ candidates'@(c':_) = map tail candidates
+ r' =
+ if null c'
+ then (reverse el', rest) : r
+ else r
+ in case rest of
+ (y:ys) -> go r' el' ys (search y candidates')
+ _ -> r'
+
+advance :: ([String], String) -> [([String], String)]
+advance (els, rest) = map collect $ divide rest
+ where
+ collect (el, rest') = (el : els, rest')
+
+explode :: String -> [[String]]
+explode word = reverse . map (reverse . fst) $ go [([], word)]
+ where
+ go :: [([String], String)] -> [([String], String)]
+ go [] = []
+ go (x:xs) =
+ if null (snd x)
+ then x : go xs
+ else go (advance x) ++ go xs
+
+printFormula :: String -> IO ()
+printFormula word = do
+ putStr word
+ putStrLn ":"
+ mapM_ (putStrLn . (:) ' ' . unwords) . filter (not . null) $ explode word
+
+main :: IO ()
+main = getArgs >>= mapM_ printFormula
diff --git a/mendeleev.py b/mendeleev.py
new file mode 100644
index 0000000..b026ab1
--- /dev/null
+++ b/mendeleev.py
@@ -0,0 +1,102 @@
+import sys
+
+
+# fmt: off
+ELEMENTS = [
+ "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+ "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+ "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+ "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+ "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+ "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+ "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+ "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+ "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+ "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+]
+# fmt: on
+
+elements = [el.lower() for el in ELEMENTS]
+
+
+def search(start, length, shift, char):
+ upper = start + length
+ lower = start
+ while lower < upper:
+ mid = int((lower + upper) / 2)
+ if elements[mid][shift] < char:
+ lower = mid + 1
+ else:
+ upper = mid
+
+ if lower == start + length:
+ return (0, 0)
+
+ if elements[lower][shift] != char:
+ return (0, 0)
+
+ upper = start + length
+ start = lower
+ while lower < upper:
+ mid = int((lower + upper) / 2)
+ if char < elements[mid][shift]:
+ upper = mid
+ else:
+ lower = mid + 1
+
+ length = upper - start
+
+ return (start, length)
+
+
+def divide(tail):
+ result = []
+
+ start = 0
+ length = len(ELEMENTS)
+ shift = 0
+
+ while shift < len(tail):
+ start, length = search(start, length, shift, tail[shift])
+ if length == 0:
+ break
+
+ shift += 1
+ if len(ELEMENTS[start]) == shift:
+ result.append((ELEMENTS[start], tail[shift:]))
+ start += 1
+ length -= 1
+
+ return result or [("?", tail[1:])]
+
+
+def advance(els, tail):
+ return [(els + [e], t) for (e, t) in divide(tail)]
+
+
+def explode(word):
+ result = [([], word.lower())]
+ while True:
+ new = []
+ tail = None
+ for res in result:
+ if res[1]:
+ adv = advance(*res)
+ new.extend(adv)
+ if not tail:
+ tail = adv[0][1]
+ else:
+ new.append(res)
+
+ result = new
+
+ if not tail:
+ break
+
+ return [els for els, _ in result]
+
+
+for w in sys.argv[1:]:
+ print(w + ":")
+ for f in filter(None, explode(w)):
+ print(" " + " ".join(f))
diff --git a/test.txt b/test.txt
new file mode 100644
index 0000000..be3d00d
--- /dev/null
+++ b/test.txt
@@ -0,0 +1,36 @@
+Mendeleev:
+ ? ? N ? ? ? ? ? V
+ ? ? Nd ? ? ? ? V
+Russia:
+ Ru S S I ?
+ Ru S Si ?
+Moscow:
+ Mo S C O W
+ Mo S Co W
+ Mo Sc O W
+English:
+ ? N ? Li S H
+Spanish:
+ S P ? N I S H
+ S P ? Ni S H
+ S Pa N I S H
+ S Pa Ni S H
+language:
+ La N ? U Ag ?
+preprocessor:
+ P Re P ? O C Es S O ?
+ P Re P ? O Ce S S O ?
+ P Re Pr O C Es S O ?
+ P Re Pr O Ce S S O ?
+ Pr ? P ? O C Es S O ?
+ Pr ? P ? O Ce S S O ?
+ Pr ? Pr O C Es S O ?
+ Pr ? Pr O Ce S S O ?
+abcdefghijklmnopqrstuvwxyz:
+ ? B C ? ? F ? H I ? K ? Mn O P ? ? S ? U V W ? Y ?
+ ? B Cd ? F ? H I ? K ? Mn O P ? ? S ? U V W ? Y ?
+xxxxx:
+ ? ? ? ? ?
+{0}:
+ ? ? ?
+: