C, Haskell, Python, Fortran

author: Igor Pashev <pashev.igor@gmail.com> 2022-08-28 15:40:21 +0200
committer: Igor Pashev <pashev.igor@gmail.com> 2022-09-25 20:23:05 +0200
commit: f0881305402f3dff56f99fcc4cbc8b615581b780 (patch)
tree: 5d475e2e95138f63ad249f4d44a453999c1c14f6
download: mendeleev-f0881305402f3dff56f99fcc4cbc8b615581b780.tar.gz
9 files changed, 800 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c6c7def
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+        DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+                    Version 2, December 2004
+
+ Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+
+ Everyone is permitted to copy and distribute verbatim or modified
+ copies of this license document, and changing it is allowed as long
+ as the name is changed.
+
+            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. You just DO WHAT THE FUCK YOU WANT TO.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c2abbd8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,108 @@
+BINARIES := \
+	mendeleev-c \
+	mendeleev-c-cpp \
+	mendeleev-f \
+	mendeleev-hs
+
+SCRIPTS := \
+	python2:mendeleev.py \
+	python3:mendeleev.py \
+	pypy:mendeleev.py
+
+.PHONY: build
+build: $(BINARIES)
+
+RM := rm -f -v
+MV := mv -f -v
+.PHONY: clean
+clean:
+	$(RM) $(BINARIES)
+	$(RM) prof-*
+
+
+# Testing
+TEST_FILE := test.txt
+WORDS := $(shell awk -F: '/:/ {print "\""$$1"\""}' $(TEST_FILE))
+EMPTY :=
+SPACE := $(EMPTY) $(EMPTY)
+.PHONY: test
+define mktest
+test: test-$(subst $(SPACE),-,$(1))
+.PHONY: test-$(subst $(SPACE),-,$(1))
+test-$(subst $(SPACE),-,$(1)): $(lastword $(1))
+	$(wordlist 2, $(words $(1)), _ $(1)) ./$(lastword $(1)) $(WORDS) | diff -u $(TEST_FILE) -
+endef
+$(foreach t,$(SCRIPTS),$(eval $(call mktest,$(subst :, ,$(t)))))
+$(foreach t,$(BINARIES),$(eval $(call mktest,$(t))))
+
+
+# Profiling
+PROF_TEST := hehehehehehehehehehehehehehehehe
+.PHONY: prof
+prof: \
+	prof-mendeleev-c.txt \
+	prof-mendeleev-f.txt \
+	prof-mendeleev-hs.txt \
+	prof-mendeleev-py.txt
+
+%.gmon: %
+	$(RM) $<-gmon.*
+	GMON_OUT_PREFIX=$<-gmon ./$< $(PROF_TEST) > /dev/null
+	$(MV) $<-gmon.* $@
+
+prof-mendeleev-py.dat: mendeleev.py
+	python3 -m cProfile -o $@ $< $(PROF_TEST) > /dev/null
+
+prof-mendeleev-py.txt: prof-mendeleev-py.dat
+	python3 -c 'import pstats; pstats.Stats("$<").sort_stats("tottime").print_stats()' > $@
+
+
+# C
+CC = gcc
+CFLAGS = -std=c99 -Wall -Wextra -O2
+mendeleev-c: mendeleev.c
+	$(CC) $(CFLAGS) $< -o $@
+
+CFLAGS_PROF = -O0 -g -pg
+prof-mendeleev-c: mendeleev.c
+	$(CC) $(CFLAGS_PROF) $< -o $@
+
+prof-mendeleev-c.txt: prof-mendeleev-c.gmon
+	gprof --brief prof-mendeleev-c $< > $@
+
+
+# C++
+CXX = g++
+CXXFLAGS = -std=c++98 -Wall -Wextra -O2
+mendeleev-c-cpp: mendeleev.c
+	$(CXX) $(CXXFLAGS) $< -o $@
+
+
+# Fortran
+FC = gfortran
+FFLAGS = -std=f2003 -Wall -Wextra -O2
+mendeleev-f: mendeleev.f90
+	$(FC) $(FFLAGS) $< -o $@
+
+FFLAGS_PROF = -O0 -g -pg -fcheck=all
+prof-mendeleev-f: mendeleev.f90
+	$(FC) $(FFLAGS_PROF) $< -o $@
+
+prof-mendeleev-f.txt: prof-mendeleev-f.gmon
+	gprof --brief prof-mendeleev-f $< > $@
+
+
+# Haskell
+HC = ghc
+HCFLAGS = -XHaskell98 -no-keep-hi-files -no-keep-o-files -Wall -O2
+mendeleev-hs: mendeleev.hs
+	$(HC) $(HCFLAGS) $< -o $@
+
+HCFLAGS_PROF = -prof -fprof-auto -rtsopts -no-keep-hi-files -no-keep-o-files
+prof-mendeleev-hs: mendeleev.hs
+	$(HC) $(HCFLAGS_PROF) $< -o $@
+
+prof-mendeleev-hs.txt: prof-mendeleev-hs
+	./$< +RTS -p -RTS $(PROF_TEST) > /dev/null
+	$(MV) $<.prof $@
+
diff --git a/README b/README
new file mode 100644
index 0000000..2205d7c
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+Mendeleev — find chemical elements in words.
+
+For example, the word "Moscow" can be split in the following ways:
+    Mo S C O W,
+    Mo S Co W,
+    Mo Sc O W,
+where:
+    Mo — Molybdenum,
+    S  — Sulfur,
+    Sc — Scandium,
+    C  — Carbon,
+    Co — Cobalt,
+    W  — Wolfram (Tungsten).
+
+When a character cannot be found among the chemical elements, it's replaced
+with a question mark:
+    language -> La N ? U Ag ?
+
+There are implementations in several programming languages.  Each program
+accepts words as arguments and prints all the possible combinations,
+for example:
+
+    $ ./mendeleev-c preprocessor
+    preprocessor:
+     P Re P ? O C Es S O ?
+     P Re P ? O Ce S S O ?
+     P Re Pr O C Es S O ?
+     P Re Pr O Ce S S O ?
+     Pr ? P ? O C Es S O ?
+     Pr ? P ? O Ce S S O ?
+     Pr ? Pr O C Es S O ?
+     Pr ? Pr O Ce S S O ?
+
+This software is Public Domain. See the LICENSE file for details.
+
+Dmitri Ivanovich Mendeleev (1834–1907) was a Russian chemist and inventor
+known for formulating the Periodic Law.
+
diff --git a/README_ru b/README_ru
new file mode 100644
index 0000000..3e29715
--- /dev/null
+++ b/README_ru
@@ -0,0 +1,36 @@
+«Менделеев» — поиск химических элементов в словах.
+
+Например, в английском слове "Moscow" (Москва) можно обнаружить следующие элементы:
+    Mo S C O W,
+    Mo S Co W,
+    Mo Sc O W,
+где:
+    Mo — молибден,
+    S  — сера,
+    Sc — скандий,
+    C  — углерод,
+    Co — кобальт,
+    W  — вольфрам.
+
+Символы, не подходящие ни для одного элемента, обозначаются вопросительными знаками:
+    language -> La N ? U Ag ?
+
+Алгоритм реализован на нескольких языках программирования. Каждая программа
+принимает список слов в виде аргументов командной строки и выводит все
+все возможные комбинации элементов. Например:
+
+    $ ./mendeleev-c preprocessor
+    preprocessor:
+     P Re P ? O C Es S O ?
+     P Re P ? O Ce S S O ?
+     P Re Pr O C Es S O ?
+     P Re Pr O Ce S S O ?
+     Pr ? P ? O C Es S O ?
+     Pr ? P ? O Ce S S O ?
+     Pr ? Pr O C Es S O ?
+     Pr ? Pr O Ce S S O ?
+
+Разрешается использовать этот код в любых целях без каких бы то ни было условий.
+
+Дмитрий Иванович Менделеев (1834–1907) — русский учёный, открывший  периодический
+закон химических элементов.
diff --git a/mendeleev.c b/mendeleev.c
new file mode 100644
index 0000000..d90ee23
--- /dev/null
+++ b/mendeleev.c
@@ -0,0 +1,196 @@
+#include<ctype.h>
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+
+static const char *const ELEMENTS[] = { "?",
+  "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+  "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+  "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+  "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+  "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+  "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+  "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+  "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+  "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+  "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+};
+
+static const size_t NELEMENTS = sizeof (ELEMENTS) / sizeof (const char *) - 1;
+
+typedef struct formula
+{
+  const char *tail;
+  size_t n;
+  size_t *els;
+  struct formula *next;
+} formula_t;
+
+
+static void
+search (size_t *start, size_t *len, size_t shift, char c)
+{
+  size_t l, m, u;
+  int c_ = tolower (c);
+
+  u = *start + *len;
+  l = *start;
+  while (l < u)
+    {
+      m = (l + u) / 2;
+      if (tolower (ELEMENTS[m][shift]) < c_)
+        l = m + 1;
+      else
+        u = m;
+    }
+
+  if ((l == *start + *len) || (tolower (ELEMENTS[l][shift]) != c_))
+    {
+      *len = 0;
+      return;
+    }
+
+  u = *start + *len;
+  *start = l;
+  while (l < u)
+    {
+      m = (l + u) / 2;
+      if (c_ < tolower (ELEMENTS[m][shift]))
+        u = m;
+      else
+        l = m + 1;
+    }
+
+  *len = u - *start;
+}
+
+static formula_t *
+new_formula (const char *tail, size_t n, const size_t *els)
+{
+  formula_t *f = (formula_t *) malloc (sizeof (formula_t));
+  if (!f)
+    return NULL;
+
+  f->els = (size_t *) malloc (sizeof (*f->els) * (n + strlen (tail)));
+  if (!f->els)
+    {
+      free (f);
+      return NULL;
+    }
+
+  if (n > 0)
+    (void) memcpy (f->els, els, n * sizeof (*f->els));
+
+  f->n = n;
+  f->tail = tail;
+  f->next = NULL;
+
+  return f;
+}
+
+static void
+free_formula (formula_t * f)
+{
+  while (f)
+    {
+      formula_t *next = f->next;
+      if (f->els)
+        free (f->els);
+      free (f);
+      f = next;
+    }
+}
+
+static void
+advance (formula_t * f)
+{
+  const char *tail = f->tail;
+  size_t n = f->n;
+
+  size_t start = 1;
+  size_t len = NELEMENTS;
+  size_t shift = 0;
+
+  while (tail[shift])
+    {
+      search (&start, &len, shift, tail[shift]);
+      if (!len)
+        break;
+
+      shift++;
+      if (!ELEMENTS[start][shift])
+        {
+          if (n != f->n)
+            {
+              formula_t *g = new_formula (tail, n, f->els);
+              if (!g)
+                break;
+              g->next = f->next;
+              f->next = g;
+              f = g;
+            }
+
+          f->els[f->n++] = start;
+          f->tail = &tail[shift];
+          start++;
+          len--;
+        }
+    }
+
+  if (n == f->n)
+    {
+      f->els[f->n++] = 0;
+      f->tail += 1;
+    }
+}
+
+static formula_t *
+explode (const char *word)
+{
+  formula_t *formula = new_formula (word, 0, NULL);
+  if (!formula)
+    return NULL;
+
+  while (*word)
+    {
+      word = NULL;
+      formula_t *f = formula;
+      while (f)
+        {
+          if (*f->tail)
+            {
+              advance (f);
+              if (!word)
+                word = f->tail;
+            }
+          f = f->next;
+        }
+    }
+
+  return formula;
+}
+
+int
+main (int argc, const char *argv[])
+{
+  for (int w = 1; w < argc; w++)
+    {
+      const char *word = argv[w];
+      printf ("%s:\n", word);
+
+      formula_t *formula = explode (word);
+      if (!formula)
+        return EXIT_FAILURE;
+
+      for (formula_t * f = formula; f; f = f->next)
+        {
+          for (size_t i = 0; i < f->n; i++)
+            printf (" %s", ELEMENTS[f->els[i]]);
+          if (f->n)
+            printf ("\n");
+        }
+      free_formula (formula);
+    }
+
+  return EXIT_SUCCESS;
+}
diff --git a/mendeleev.f90 b/mendeleev.f90
new file mode 100644
index 0000000..3108b5e
--- /dev/null
+++ b/mendeleev.f90
@@ -0,0 +1,200 @@
+program mendeleev
+
+   implicit none
+
+   character(len=2), dimension(0:118), parameter :: ELEMENTS = (/ "? ", &
+     "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B ", "Ba", "Be", "Bh", &
+     "Bi", "Bk", "Br", "C ", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co", &
+     "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F ", "Fe", "Fl", &
+     "Fm", "Fr", "Ga", "Gd", "Ge", "H ", "He", "Hf", "Hg", "Ho", "Hs", "I ", &
+     "In", "Ir", "K ", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg", &
+     "Mn", "Mo", "Mt", "N ", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np", &
+     "O ", "Og", "Os", "P ", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu", &
+     "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S ", "Sb", "Sc", "Se", &
+     "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", &
+     "Tm", "Ts", "U ", "V ", "W ", "Xe", "Y ", "Yb", "Zn", "Zr" /)
+
+   type :: formula_t
+      integer :: tail = 1
+      integer :: n = 0
+      integer, dimension(:), allocatable :: elements
+      type(formula_t), pointer :: next => null()
+   end type formula_t
+
+   type(formula_t), pointer :: formula, f
+   character(len=:), allocatable :: word
+   integer :: length, argc, i, j
+
+   argc = command_argument_count()
+   do i = 1, argc
+      call get_command_argument(i, length=length)
+      allocate(character(len=length) :: word)
+      call get_command_argument(i, value=word)
+      write (*, "(A, ':')") word
+
+      formula => explode(word)
+
+      f => formula
+      do while (associated(f))
+         if (f%n > 0) then
+            do j = 1, f%n
+               write (*, "(' ', A)", advance="no") trim(ELEMENTS(f%elements(j)))
+            end do
+            write (*, "()")
+         end if
+         f => f%next
+      end do
+
+      call free_formula(formula)
+      deallocate(word)
+   end do
+
+
+contains
+
+   pure subroutine free_formula(formula)
+      type(formula_t), pointer, intent(in out) :: formula
+
+      type(formula_t), pointer :: next
+
+      do while (associated(formula))
+         next => formula%next
+         deallocate(formula%elements)
+         deallocate(formula)
+         formula => next
+      end do
+   end subroutine free_formula
+
+
+   pure integer function tolower(c)
+      character(len=1), intent(in) :: c
+
+      integer :: i
+
+      i = iachar(c)
+      if (i >= 65 .and. i < 90) then
+         tolower = i + 32
+      else
+         tolower = i
+      endif
+   end function tolower
+
+
+   pure subroutine search(start, length, sh, c)
+      integer, intent(in out) :: start, length
+      integer, intent(in) :: sh
+      character(len=1), intent(in) :: c
+
+      integer :: l, m, u, c_
+
+      c_ = tolower(c)
+
+      u = start + length
+      l = start
+      do while (l < u)
+         m = (u + l) / 2
+         if (tolower(ELEMENTS(m)(sh:sh)) < c_) then
+           l = m + 1
+         else
+           u = m
+        endif
+      end do
+
+      if (l == start + length) then
+        length = 0
+        return
+      end if
+
+      if (tolower(ELEMENTS(l)(sh:sh)) /= c_) then
+        length = 0
+        return
+      end if
+
+      u = start + length
+      start = l
+      do while (l < u)
+         m = (u + l) / 2
+         if (c_ < tolower(ELEMENTS(m)(sh:sh))) then
+           u = m
+         else
+           l = m + 1
+        endif
+      end do
+
+      length = u - start
+   end subroutine search
+
+
+   pure subroutine advance(word, f)
+      character(len=*), intent(in) :: word
+      type(formula_t), pointer, intent(in out) :: f
+
+      integer :: n, tail
+      integer :: start, length, sh, c
+      type(formula_t), pointer :: g
+
+      tail = f%tail
+      n = f%n
+
+      sh = 0
+      start = 1
+      length = ubound(ELEMENTS, 1)
+      do
+         c = tail + sh
+         if (len(word) < c) exit
+
+         sh = sh + 1
+         call search(start, length, sh, word(c:c))
+         if (length == 0) exit
+
+         if (sh == len_trim(ELEMENTS(start))) then
+            if (n /= f%n) then
+               allocate(g)
+               allocate(g%elements(len(word)))
+               g%n = n
+               g%elements(1:n) = f%elements(1:n)
+               g%next => f%next
+               f%next => g
+               f => g
+            end if
+
+            f%n = f%n + 1
+            f%elements(f%n) = start
+            f%tail = c + 1
+            start = start + 1
+            length = length - 1
+         end if
+      end do
+
+      if (n == f%n) then
+         f%tail = f%tail + 1
+         f%n = f%n + 1
+         f%elements(f%n) = 0
+      end if
+   end subroutine advance
+
+
+   pure function explode(word) result(formula)
+      character(len=*), intent(in) :: word
+      type(formula_t), pointer :: formula
+
+      logical :: has_tail
+      type(formula_t), pointer :: f
+
+      allocate(formula)
+      allocate(formula%elements(len(word)))
+
+      do
+         f => formula
+         has_tail = .false.
+         do while (associated(f))
+            if (f%tail <= len(word)) then
+               call advance(word, f)
+               if (.not. has_tail) has_tail = f%tail <= len(word)
+            end if
+            f => f%next
+         end do
+         if (.not. has_tail) exit
+      end do
+   end function explode
+end program mendeleev
diff --git a/mendeleev.hs b/mendeleev.hs
new file mode 100644
index 0000000..cfd6ac5
--- /dev/null
+++ b/mendeleev.hs
@@ -0,0 +1,71 @@
+module Main
+  ( main
+  ) where
+
+import Data.Char (toLower)
+import System.Environment (getArgs)
+
+elements :: [String]
+elements = [
+  "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+  "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+  "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+  "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+  "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+  "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+  "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+  "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+  "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+  "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+  ]
+
+search :: Char -> [String] -> [String]
+search c = takeWhile start . dropWhile (not . start)
+  where
+    start [] = False
+    start (x:_) = toLower x == c'
+    c' = toLower c
+
+divide :: String -> [(String, String)]
+divide [] = []
+divide (x:xs) =
+  if null res
+    then [("?", xs)]
+    else res
+  where
+    res = go [] [] xs (search x elements)
+    go r _ _ [] = r
+    go r el rest candidates@(c:_) =
+      let el' = head c : el
+          candidates'@(c':_) = map tail candidates
+          r' =
+            if null c'
+              then (reverse el', rest) : r
+              else r
+       in case rest of
+            (y:ys) -> go r' el' ys (search y candidates')
+            _ -> r'
+
+advance :: ([String], String) -> [([String], String)]
+advance (els, rest) = map collect $ divide rest
+  where
+    collect (el, rest') = (el : els, rest')
+
+explode :: String -> [[String]]
+explode word = reverse . map (reverse . fst) $ go [([], word)]
+  where
+    go :: [([String], String)] -> [([String], String)]
+    go [] = []
+    go (x:xs) =
+      if null (snd x)
+        then x : go xs
+        else go (advance x) ++ go xs
+
+printFormula :: String -> IO ()
+printFormula word = do
+  putStr word
+  putStrLn ":"
+  mapM_ (putStrLn . (:) ' ' . unwords) . filter (not . null) $ explode word
+
+main :: IO ()
+main = getArgs >>= mapM_ printFormula
diff --git a/mendeleev.py b/mendeleev.py
new file mode 100644
index 0000000..b026ab1
--- /dev/null
+++ b/mendeleev.py
@@ -0,0 +1,102 @@
+import sys
+
+
+# fmt: off
+ELEMENTS = [
+  "Ac", "Ag", "Al", "Am", "Ar", "As", "At", "Au", "B", "Ba", "Be", "Bh",
+  "Bi", "Bk", "Br", "C", "Ca", "Cd", "Ce", "Cf", "Cl", "Cm", "Cn", "Co",
+  "Cr", "Cs", "Cu", "Db", "Ds", "Dy", "Er", "Es", "Eu", "F", "Fe", "Fl",
+  "Fm", "Fr", "Ga", "Gd", "Ge", "H", "He", "Hf", "Hg", "Ho", "Hs", "I",
+  "In", "Ir", "K", "Kr", "La", "Li", "Lr", "Lu", "Lv", "Mc", "Md", "Mg",
+  "Mn", "Mo", "Mt", "N", "Na", "Nb", "Nd", "Ne", "Nh", "Ni", "No", "Np",
+  "O", "Og", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Pu",
+  "Ra", "Rb", "Re", "Rf", "Rg", "Rh", "Rn", "Ru", "S", "Sb", "Sc", "Se",
+  "Sg", "Si", "Sm", "Sn", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl",
+  "Tm", "Ts", "U", "V", "W", "Xe", "Y", "Yb", "Zn", "Zr"
+]
+# fmt: on
+
+elements = [el.lower() for el in ELEMENTS]
+
+
+def search(start, length, shift, char):
+    upper = start + length
+    lower = start
+    while lower < upper:
+        mid = int((lower + upper) / 2)
+        if elements[mid][shift] < char:
+            lower = mid + 1
+        else:
+            upper = mid
+
+    if lower == start + length:
+        return (0, 0)
+
+    if elements[lower][shift] != char:
+        return (0, 0)
+
+    upper = start + length
+    start = lower
+    while lower < upper:
+        mid = int((lower + upper) / 2)
+        if char < elements[mid][shift]:
+            upper = mid
+        else:
+            lower = mid + 1
+
+    length = upper - start
+
+    return (start, length)
+
+
+def divide(tail):
+    result = []
+
+    start = 0
+    length = len(ELEMENTS)
+    shift = 0
+
+    while shift < len(tail):
+        start, length = search(start, length, shift, tail[shift])
+        if length == 0:
+            break
+
+        shift += 1
+        if len(ELEMENTS[start]) == shift:
+            result.append((ELEMENTS[start], tail[shift:]))
+            start += 1
+            length -= 1
+
+    return result or [("?", tail[1:])]
+
+
+def advance(els, tail):
+    return [(els + [e], t) for (e, t) in divide(tail)]
+
+
+def explode(word):
+    result = [([], word.lower())]
+    while True:
+        new = []
+        tail = None
+        for res in result:
+            if res[1]:
+                adv = advance(*res)
+                new.extend(adv)
+                if not tail:
+                    tail = adv[0][1]
+            else:
+                new.append(res)
+
+        result = new
+
+        if not tail:
+            break
+
+    return [els for els, _ in result]
+
+
+for w in sys.argv[1:]:
+    print(w + ":")
+    for f in filter(None, explode(w)):
+        print(" " + " ".join(f))
diff --git a/test.txt b/test.txt
new file mode 100644
index 0000000..be3d00d
--- /dev/null
+++ b/test.txt
@@ -0,0 +1,36 @@
+Mendeleev:
+ ? ? N ? ? ? ? ? V
+ ? ? Nd ? ? ? ? V
+Russia:
+ Ru S S I ?
+ Ru S Si ?
+Moscow:
+ Mo S C O W
+ Mo S Co W
+ Mo Sc O W
+English:
+ ? N ? Li S H
+Spanish:
+ S P ? N I S H
+ S P ? Ni S H
+ S Pa N I S H
+ S Pa Ni S H
+language:
+ La N ? U Ag ?
+preprocessor:
+ P Re P ? O C Es S O ?
+ P Re P ? O Ce S S O ?
+ P Re Pr O C Es S O ?
+ P Re Pr O Ce S S O ?
+ Pr ? P ? O C Es S O ?
+ Pr ? P ? O Ce S S O ?
+ Pr ? Pr O C Es S O ?
+ Pr ? Pr O Ce S S O ?
+abcdefghijklmnopqrstuvwxyz:
+ ? B C ? ? F ? H I ? K ? Mn O P ? ? S ? U V W ? Y ?
+ ? B Cd ? F ? H I ? K ? Mn O P ? ? S ? U V W ? Y ?
+xxxxx:
+ ? ? ? ? ?
+{0}:
+ ? ? ?
+:
author	Igor Pashev <pashev.igor@gmail.com>	2022-08-28 15:40:21 +0200
committer	Igor Pashev <pashev.igor@gmail.com>	2022-09-25 20:23:05 +0200
commit	f0881305402f3dff56f99fcc4cbc8b615581b780 (patch)
tree	5d475e2e95138f63ad249f4d44a453999c1c14f6
download	mendeleev-f0881305402f3dff56f99fcc4cbc8b615581b780.tar.gz