/* Copyright (C) 2000-2009 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <string.h>
#include "udm_common.h"
#include "udm_utils.h"
#include "udm_searchtool.h"
#include "udm_db.h" /* For UDM_FINDWORD_ARGS */

void
UdmURLCRDListListInit(UDM_URLCRDLISTLIST *Lst)
{
  bzero((void*)Lst, sizeof(*Lst));
}


int
UdmURLCRDListListAdd(UDM_URLCRDLISTLIST *LstLst, UDM_URLCRDLIST *List)
{
  size_t nbytes= (LstLst->nlists+1) * sizeof(UDM_URLCRDLIST);
  if (!(LstLst->List= (UDM_URLCRDLIST*) UdmRealloc(LstLst->List, nbytes)))
    return UDM_ERROR;
  LstLst->List[LstLst->nlists]= *List;
  LstLst->nlists++;
  return UDM_OK;
}


void
UdmURLCRDListListFree(UDM_URLCRDLISTLIST *Lst)
{
  size_t i;
  
  for(i=0;i<Lst->nlists;i++)
  {
    UDM_FREE(Lst->List[i].word);
    UDM_FREE(Lst->List[i].Coords);
  }
  UDM_FREE(Lst->List);
  if(Lst->freeme)UDM_FREE(Lst);
}


static int
cmpurlid(UDM_URL_CRD *s1, UDM_URL_CRD *s2)
{
  if (s1->url_id > s2->url_id) return(1);
  if (s1->url_id < s2->url_id) return(-1);
  if (s1->secno > s2->secno) return 1;
  if (s1->secno < s2->secno) return -1;
  return (int) s1->pos - (int) s2->pos;
}


void
UdmURLCRDListSortByURLThenSecnoThenPos(UDM_URLCRDLIST *L)
{
  if(L->Coords && L->ncoords)
    UdmSort((void*)L->Coords, L->ncoords,
            sizeof(*L->Coords),(udm_qsort_cmp)cmpurlid);
}


void
UdmURLCRDListMergeMultiWord(UDM_URLCRDLIST *Phrase, size_t wordnum, size_t nparts)
{
  UDM_URL_CRD *To= Phrase->Coords;
  UDM_URL_CRD *End= Phrase->Coords + Phrase->ncoords;
  UDM_URL_CRD *From= Phrase->Coords + nparts - 1;
  UDM_URL_CRD *Prev= Phrase->Coords + nparts - 2;
  
#if 0
  fprintf(stderr, "merge: wordnum=%d nparts=%d ncoords=%d\n",
          wordnum, nparts, Phrase->ncoords);
#endif
  
  if (nparts < 2) /* If one part, keep Phrase unchanged */
    return;
  
  if (Phrase->ncoords < nparts) /* Nothing found */
  {
    Phrase->ncoords= 0;
    return;
  }
  
  for ( ; From < End ; From++, Prev++)
  {
    if (Prev->url_id == From->url_id)
    {
      size_t pos= From->pos;
      size_t sec= From->secno;
      size_t num= From->num;
      if (pos == Prev->pos + 1 &&
          sec == Prev->secno  &&
          num == Prev->num + 1)
      {
        size_t i, nmatches;
        for (nmatches= 2, i= 2; i < nparts; i++)
        {
          if (From[-i].url_id != From->url_id         ||
              From[-i].secno != sec       ||
              From[-i].pos != (pos - i) ||
              From[-i].num != (num - i))
            break;
            
          nmatches++;
        }
        if (nmatches == nparts)
        {
          To->url_id= From->url_id;
          To->pos= pos - nparts + 1;
          To->secno= sec;
          To->num= wordnum;
          To++;
        }
      }
    }
  }
  
  Phrase->ncoords= To - Phrase->Coords;
}


/*
  Convert URLCRDList to SectionList.
  Only single word is supported.
*/
int
UdmURLCRDListToSectionList(UDM_FINDWORD_ARGS *args,
                           UDM_SECTIONLIST *SectionList,
                           UDM_URLCRDLIST *CoordList)
{
  size_t ncoords= CoordList->ncoords;
  UDM_URL_CRD *CrdFrom, *CrdCurr;
  UDM_URL_CRD *CrdLast= CoordList->Coords + ncoords;
  UDM_COORD2 *Coord;
  UDM_SECTION *Section;
  
  UdmSectionListAlloc(SectionList, CoordList->ncoords, CoordList->ncoords);

#if 0
  UdmCoordListPrint(CoordList);
#endif
  
  if (!CoordList->ncoords)
    return UDM_OK;
  Coord= SectionList->Coord;
  Section= SectionList->Section;
  
  for (CrdFrom= CoordList->Coords; CrdFrom < CrdLast; CrdFrom= CrdCurr)
  {
    Section->Coord= Coord;
    Section->secno= CrdFrom->secno;
    Section->wordnum= CrdFrom->num;
    Section->order= args->WWList->Word[CrdFrom->num].order;
    for (CrdCurr= CrdFrom;
         CrdCurr < CrdLast &&
         CrdCurr->url_id == CrdFrom->url_id &&
         CrdCurr->secno == CrdFrom->secno ;
         CrdCurr++)
    {
      Coord->pos= CrdCurr->pos;
      Section->maxpos= CrdCurr->pos;
      Coord++;
    }
    Section->url_id= CrdFrom->url_id;
    Section->ncoords= CrdCurr - CrdFrom;
    Section->seclen= CrdFrom->seclen;
    Section->minpos= Section->Coord->pos;
    Section++;
  }
  SectionList->ncoords= Coord - SectionList->Coord;
  SectionList->nsections= Section - SectionList->Section;
  
#if 0
  UdmSectionListPrint(SectionList);
#endif
  
  return UDM_OK;
}


int
UdmURLCRDListListAddWithSort2(UDM_FINDWORD_ARGS *args,
                              UDM_URLCRDLISTLIST *List,
                              UDM_URLCRDLIST *CoordList)
{
  UDM_SECTIONLIST SectionList;
  UdmURLCRDListToSectionList(args, &SectionList, CoordList);
  UdmSectionListListAdd(&args->SectionListList, &SectionList);
  UDM_FREE(CoordList->Coords);
  return UDM_OK;
}
