/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright 2009--2026 by Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */

#include <QFile>
#include <QIODevice>


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/PkaPhPiDataParser.hpp"
#include "MsXpS/libXpertMassCore/ChemicalGroup.hpp"

namespace MsXpS
{
namespace libXpertMassCore
{


/*!
\class MsXpS::libXpertMassCore::PkaPhPiDataParser
\inmodule libXpertMassCore
\ingroup PolChemDefBuildingdBlocks
\inheaderfile PkaPhPiDataParser.hpp

\brief The PkaPhPiDataParser class provides a file reader for the pKa, pH,
pI data XML file.

The format is the following:

\code
<pkaphpidata>
  <monomers>
    <monomer>
      <code>A</code>
      <mnmchemgroup>
        <name>N-term NH2</name>
        <pka>9.6</pka>
        <acidcharged>TRUE</acidcharged>
        <polrule>left_trapped</polrule>
        <chemgrouprule>
          <entity>LE_PLM_MODIF</entity>
          <name>Acetylation</name>
          <outcome>LOST</outcome>
        </chemgrouprule>
      </mnmchemgroup>
      <mnmchemgroup>
        <name>C-term COOH</name>
        <pka>2.35</pka>
        <acidcharged>FALSE</acidcharged>
        <polrule>right_trapped</polrule>
      </mnmchemgroup>
    </monomer>
    [...]
    <monomer>
      <code>C</code>
      <mnmchemgroup>
        <name>N-term NH2</name>
        <pka>9.6</pka>
        <acidcharged>TRUE</acidcharged>
        <polrule>left_trapped</polrule>
        <chemgrouprule>
          <entity>LE_PLM_MODIF</entity>
          <name>Acetylation</name>
          <outcome>LOST</outcome>
        </chemgrouprule>
      </mnmchemgroup>
      <mnmchemgroup>
        <name>C-term COOH</name>
        <pka>2.35</pka>
        <acidcharged>FALSE</acidcharged>
        <polrule>right_trapped</polrule>
      </mnmchemgroup>
      <mnmchemgroup>
        <name>Lateral SH2</name>
        <pka>8.3</pka>
        <acidcharged>FALSE</acidcharged>
        <polrule>never_trapped</polrule>
      </mnmchemgroup>
    </monomer>
  </monomers>
  <modifs>
    <modif>
      <name>Phosphorylation</name>
      <mdfchemgroup>
        <name>none_set</name>
        <pka>1.2</pka>
        <acidcharged>FALSE</acidcharged>
      </mdfchemgroup>
      <mdfchemgroup>
        <name>none_set</name>
        <pka>6.5</pka>
        <acidcharged>FALSE</acidcharged>
      </mdfchemgroup>
    </modif>
  </modifs>
</pkaphpidata>
\endcode
*/


/*!
\variable MsXpS::libXpertMassCore::PkaPhPiDataParser::mcsp_polChemDef

\brief The polymer chemistry definition context in which this PkaPhPiDataParser
instance exists.
*/

/*!
\variable MsXpS::libXpertMassCore::PkaPhPiDataParser::m_filePath

\brief The path of the file that contains all the specifications for chemical
groups and chemical rules.
*/

/*!
\brief Constructs a PkaPhPiDataParser instance.

\list
\li \a pol_chem_def_csp: The polymer chemistry definition (cannot be nullptr).

\li \a file_path: the specification file path.
\endlist
*/
PkaPhPiDataParser::PkaPhPiDataParser(const PolChemDefCstSPtr &pol_chem_def_csp,
                                     const QString &file_path)
  : mcsp_polChemDef(pol_chem_def_csp), m_filePath(file_path)
{
  if(mcsp_polChemDef == nullptr || mcsp_polChemDef.get() == nullptr)
    qFatal() << "Programming error. Cannot be that pointer is nullptr.";
}

/*!
\brief Destructs this PkaPhPiDataParser instance
*/
PkaPhPiDataParser::~PkaPhPiDataParser()
{
}

/*!
\brief Sets the \a file_path.
*/
void
PkaPhPiDataParser::setFilePath(const QString &file_path)
{
  m_filePath = file_path;
}

/*!
\brief Returns the file path.
*/
const QString &
PkaPhPiDataParser::filePath()
{
  return m_filePath;
}

/*!
\brief Parses the file and fills-in the \a monomers and the \a modifs
containers.

The two container hold specific Monomer and Modif instances as desribed in the
loaded file.

Returns true upon success, false otherwise.
*/
bool
PkaPhPiDataParser::renderXmlFile(std::vector<MonomerSPtr> &monomers,
                                 std::vector<ModifSPtr> &modifs)
{
  //    <pkaphpidata>
  //      <monomers>
  //        <monomer>
  //          <code>A</code>
  //          <mnmchemgroup>
  //            <name>N-term NH2</name>
  //            <pka>9.6</pka>
  //            <acidcharged>TRUE</acidcharged>
  //            <polrule>left_trapped</polrule>
  //            <chemgrouprule>
  //              <entity>LE_PLM_MODIF</entity>
  //              <name>Acetylation</name>
  //              <outcome>LOST</outcome>
  //            </chemgrouprule>
  //          </mnmchemgroup>
  //          <mnmchemgroup>
  //            <name>C-term COOH</name>
  //            <pka>2.35</pka>
  //            <acidcharged>FALSE</acidcharged>
  //            <polrule>right_trapped</polrule>
  //          </mnmchemgroup>
  //        </monomer>
  //        [...]
  //        <monomer>
  //          <code>C</code>
  //          <mnmchemgroup>
  //            <name>N-term NH2</name>
  //            <pka>9.6</pka>
  //            <acidcharged>TRUE</acidcharged>
  //            <polrule>left_trapped</polrule>
  //            <chemgrouprule>
  //              <entity>LE_PLM_MODIF</entity>
  //              <name>Acetylation</name>
  //              <outcome>LOST</outcome>
  //            </chemgrouprule>
  //          </mnmchemgroup>
  //          <mnmchemgroup>
  //            <name>C-term COOH</name>
  //            <pka>2.35</pka>
  //            <acidcharged>FALSE</acidcharged>
  //            <polrule>right_trapped</polrule>
  //          </mnmchemgroup>
  //          <mnmchemgroup>
  //            <name>Lateral SH2</name>
  //            <pka>8.3</pka>
  //            <acidcharged>FALSE</acidcharged>
  //            <polrule>never_trapped</polrule>
  //          </mnmchemgroup>
  //        </monomer>
  //      </monomers>
  //      <modifs>
  //        <modif>
  //          <name>Phosphorylation</name>
  //          <mdfchemgroup>
  //            <name>none_set</name>
  //            <pka>1.2</pka>
  //            <acidcharged>FALSE</acidcharged>
  //          </mdfchemgroup>
  //          <mdfchemgroup>
  //            <name>none_set</name>
  //            <pka>6.5</pka>
  //            <acidcharged>FALSE</acidcharged>
  //          </mdfchemgroup>
  //        </modif>
  //      </modifs>
  //    </pkaphpidata>
  //
  // The DTD stipulates that:
  //
  // <!ELEMENT pkaphpidata(monomers,modifs*)>
  // <!ELEMENT monomers(monomer*)>
  // <!ELEMENT modifs(modif*)>
  // <!ELEMENT monomer(code,mnmchemgroup*)>
  // <!ELEMENT modif(name,mdfchemgroup*)>

  QDomDocument doc("pkaPhPiData");
  QDomElement element;
  QDomElement child;
  QDomElement indentedChild;

  QFile file(m_filePath);

  if(!file.open(QIODevice::ReadOnly))
    return false;

  if(!doc.setContent(&file))
    {
      file.close();
      return false;
    }

  file.close();

  element = doc.documentElement();

  if(element.tagName() != "pkaphpidata")
    {
      qDebug() << __FILE__ << __LINE__ << "pKa-pH-pI data file is erroneous\n";
      return false;
    }

  // The first child element must be <monomers>.

  child = element.firstChildElement();
  if(child.tagName() != "monomers")
    {
      qDebug() << "The PkaPhPi data file could not be parsed: the "
                  "<monomers> element was not found.";
      return false;
    }

  // Parse the <monomer> elements.

  indentedChild = child.firstChildElement();
  while(!indentedChild.isNull())
    {
      if(indentedChild.tagName() != "monomer")
        return false;

      QDomElement superIndentedElement = indentedChild.firstChildElement();

      if(superIndentedElement.tagName() != "code")
        {
          qDebug() << "The PkaPhPi data file could not be parsed: the "
                      "<code> element was not found.";
          return false;
        }

      MonomerSPtr monomer_sp =
        std::make_shared<Monomer>(mcsp_polChemDef,
                                  /*name*/ "",
                                  superIndentedElement.text(),
                                  /*formula string*/ "",
                                  0.0,
                                  0.0);

      // All the <mnmchemgroup> elements, if any.

      superIndentedElement = superIndentedElement.nextSiblingElement();

      while(!superIndentedElement.isNull())
        {
          if(superIndentedElement.tagName() != "mnmchemgroup")
            {
              qDebug() << "The PkaPhPi data file could not be parsed: the "
                          "<mnmchemgroup> element was not found.";

              monomer_sp.reset();
              return false;
            }

          ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET");

          if(!chemGroup->renderXmlMnmElement(superIndentedElement))
            {
              qDebug() << "The PkaPhPi data file could not be parsed: the "
                          "<mnmchemgroup> element failed to render.";

              monomer_sp.reset();
              delete chemGroup;
              return false;
            }

          ChemicalGroupProp *prop =
            new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup);

          monomer_sp->appendProp(prop);

          superIndentedElement = superIndentedElement.nextSiblingElement();
        }

      monomers.push_back(monomer_sp);

      indentedChild = indentedChild.nextSiblingElement();
    }

#if 0

    qDebug() << __FILE__ << __LINE__
	      << "Debug output of all the monomers parsed:";

    for (int iter = 0; iter < monomerList->size(); ++iter)
      {
	Monomer *monomer = monomerList->at(iter);
	qDebug() << __FILE__ << __LINE__
		  << "Monomer:" << monomer->name();

	for(int jter = 0; jter < monomer->propList()->size(); ++jter)
	  {
	    Prop *prop = monomer->propList()->at(jter);

	    if (prop->name() == "CHEMICAL_GROUP")
	      {
		const ChemicalGroup *chemGroup =
		  static_cast<const ChemicalGroup *>(prop->data());

		qDebug() << __FILE__ << __LINE__
			  << "Chemical group:"
			  << chemGroup->name() << chemGroup->pka();
	      }
	  }
      }

#endif

  // And now parse the <modifs> elements, if any, this time, as
  // this element is not compulsory.

  child = child.nextSiblingElement();
  if(child.isNull())
    return true;

  if(child.tagName() != "modifs")
    {
      qDebug() << "The PkaPhPi data file could not be parsed: the "
                  "<modifs> element was not found.";
      return false;
    }

  // Parse the <modif> elements.

  indentedChild = child.firstChildElement();
  while(!indentedChild.isNull())
    {
      if(indentedChild.tagName() != "modif")
        {
          qDebug() << "The PkaPhPi data file could not be parsed: the "
                      "<modif> element was not found.";
          return false;
        }

      QDomElement superIndentedElement = indentedChild.firstChildElement();

      if(superIndentedElement.tagName() != "name")
        {
          qDebug() << "The PkaPhPi data file could not be parsed: the "
                      "<name> element was not found.";
          return false;
        }

      ModifSPtr modif_sp = std::make_shared<Modif>(
        mcsp_polChemDef, superIndentedElement.text(), "H0");

      // All the <mdfchemgroup> elements, if any.

      superIndentedElement = superIndentedElement.nextSiblingElement();
      while(!superIndentedElement.isNull())
        {
          if(superIndentedElement.tagName() != "mdfchemgroup")
            {
              qDebug() << "The PkaPhPi data file could not be parsed: the "
                          "<mdfchemgroup> element was not found.";
              modif_sp.reset();
              return false;
            }

          ChemicalGroup *chemGroup = new ChemicalGroup("NOT_SET");

          if(!chemGroup->renderXmlMdfElement(superIndentedElement))
            {
              qDebug() << "The PkaPhPi data file could not be parsed: the "
                          "<mdfchemgroup> element failed to render.";
              modif_sp.reset();
              delete chemGroup;
              return false;
            }

          ChemicalGroupProp *prop =
            new ChemicalGroupProp("CHEMICAL_GROUP", chemGroup);

          modif_sp->appendProp(prop);

          superIndentedElement = superIndentedElement.nextSiblingElement();
        }

      modifs.push_back(modif_sp);

      indentedChild = indentedChild.nextSiblingElement();
    }

#if 0

    qDebug() << __FILE__ << __LINE__
	      << "Debug output of all the modifs parsed:";

    for (int iter = 0; iter < modifList->size(); ++iter)
      {
	Modif *modif = modifList->at(iter);

	//       qDebug() << __FILE__ << __LINE__
	// 		<< "Modif:" << modif->name();

	for(int jter = 0; jter < modif->propList()->size(); ++jter)
	  {
	    Prop *prop = modif->propList()->at(jter);

	    if (prop->name() == "CHEMICAL_GROUP")
	      {
		const ChemicalGroup *chemGroup =
		  static_cast<const ChemicalGroup *>(prop->data());

		qDebug() << __FILE__ << __LINE__
			  << "Chemical group:"
			  << chemGroup->name() << chemGroup->pka();
	      }
	  }
      }

#endif

  return true;
}

} // namespace libXpertMassCore

} // namespace MsXpS
