summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htnet/HtCookieMemJar.cc
blob: 25922b27d1d71a1cbd47fecbc84c35314ea409aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576

// HtCookieMemJar.cc
//
// HtCookieMemJar: This class stores/retrieves cookies.
//
// by Robert La Ferla.  Started 12/9/2000.
// Reviewed by G.Bartolini - since 24 Feb 2001
//
////////////////////////////////////////////////////////////
//
// The HtCookieMemJar class stores/retrieves cookies
// directly into memory. It is derived from HtCookieJar class.
//
// See "PERSISTENT CLIENT STATE HTTP COOKIES" Specification
// at http://www.netscape.com/newsref/std/cookie_spec.html
// Modified according to RFC2109 (max age and version attributes)
//
///////
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Part of the ht://Check package   <http://htcheck.sourceforge.net/>
// Copyright (c) 2001-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: HtCookieMemJar.cc,v 1.10 2004/05/28 13:15:23 lha Exp $ 
//

#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif

#include "HtCookieMemJar.h"
#include "HtCookie.h"
#include "List.h"
#include "Dictionary.h"
#include <stdlib.h>
#include <ctype.h>

#ifdef HAVE_STD
#include <iostream>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <iostream.h>
#endif /* HAVE_STD */

// Constructor
HtCookieMemJar::HtCookieMemJar()
: _key(0), _list(0), _idx(0)
{
   cookieDict = new Dictionary();
   cookieDict->Start_Get(); // reset the iterator
}

// Copy constructor
HtCookieMemJar::HtCookieMemJar(const HtCookieMemJar& rhs)
: _key(0), _list(0), _idx(0)
{

	if (rhs.cookieDict)
	{
		// Let's perform a deep copy of the 'jar'
		cookieDict = new Dictionary();
		rhs.cookieDict->Start_Get();

		// Let's walk the domains
		while (char* d = rhs.cookieDict->Get_Next())
		{
			List* l = new List();
			cookieDict->Add(d, l); // add that domain

			// Let's walk the cookies for that domain
			if (List* rhsl = (List*) rhs.cookieDict->Find(d))
			{
				
				rhsl->Start_Get();
			
				while (HtCookie* cookie = ((HtCookie *)rhsl->Get_Next()))
				{
					HtCookie* new_cookie = new HtCookie(*cookie);
					l->Add((Object *)new_cookie); // add this cookie
				}
			}
		}
	}
	else
	   cookieDict = new Dictionary();

   cookieDict->Start_Get(); // reset the iterator
}

// Destructor
HtCookieMemJar::~HtCookieMemJar()
{
   if (debug>4)
      printDebug();

   if (cookieDict)
      delete cookieDict;
}

// Add a cookie to the Jar
int HtCookieMemJar::AddCookie(const String &CookieString, const URL &url)
{

   // Builds a new Cookie object
   HtCookie *Cookie = new HtCookie(CookieString, url.get());

   // Interface to the insert method   
   // If the cookie has not been added, we'd better delete it
   if (!AddCookieForHost (Cookie, url.host()))
      delete Cookie;

   return true;

}


// Add a cookie to a host
int HtCookieMemJar::AddCookieForHost(HtCookie *cookie, String HostName)
{

   List *list; // pointer to the Cookie list of an exact host
   HtCookie *theCookie;
   bool inList = false;

/////////////////////////////////////////////////////////////   
// That's an abstract from the Netscape Cookies specification
/////////////////////////////////////////////////////////////   
//
// When searching the cookie list for valid cookies,
// a comparison of the domain attributes of the cookie
// is made with the Internet domain name of the host from which the URL
// will be fetched. If there is a tail match, then the cookie
// will go through path matching to see if it should be sent.
//
// "Tail matching" means that domain attribute is matched against
// the tail of the fully qualified domain name of the host.
// A domain attribute of "acme.com" would match host names "anvil.acme.com"
// as well as "shipping.crate.acme.com". 
//
// Only hosts within the specified domain can set a cookie
// for a domain and domains must have at least two (2)
// or three (3) periods in them to prevent domains of
// the form: ".com", ".edu", and "va.us".
//
// Any domain that fails within one of the seven special top level domains
// listed below only require two periods.
// Any other domain requires at least three.
//
// The seven special top level domains are:
// "COM", "EDU", "NET", "ORG", "GOV", "MIL", and "INT".
// 
// The default value of domain is the host name of the
// server which generated the cookie response.
//
/////////////////////////////////////////////////////////////   


   // Let's get the domain of the cookie
   String Domain(cookie->GetDomain());

   // Lowercase the HostName
   HostName.lowercase();
       
   if (!Domain.length())
      Domain = HostName;
   else
   {
        Domain.lowercase(); // lowercase the domain
                
        // The cookie's domain must have a minimum number of periods
        // inside, as stated by the abstract cited above
        int minimum_periods = GetDomainMinNumberOfPeriods(Domain);

        if (!minimum_periods)
        {
            if (debug > 2)
                cout << "Cookie - Invalid domain "
                    << "(minimum number of periods): " << Domain << endl;

            cookie->SetIsDomainValid(false);
        }
        else
        {
            // Let's see if the domain is now valid
            const char* s = Domain.get();
            const char* r = s + strlen(s) - 1;  // go to the last char
            int num_periods = 1;    // at minimum is one
            
            while (r > s && *r)
            {
                if (*r == '.' && *(r+1) && *(r+1) != '.')
                    ++num_periods;  // when a 'dot' is found increment
                                    // the number of periods
                --r;
            }
                    
            if (num_periods >= minimum_periods) // here is a so-far valid domain
            {
                while (*r && *r == '.')
                    ++r;    // goes beyond the first dot

                if (r>s)
                    Domain.set((char*) r);  // Set the new 'shorter' domain


                if (HostName.indexOf(Domain.get()) != -1)
                {
                    if (debug > 2)
                        cout << "Cookie - valid domain: "
                            << Domain << endl;
                }
                else if (HostName.length() == 0)
		{
                    if (debug > 2)
                        cout << "Imported cookie - valid domain: "
                            << Domain << endl;
		}
		else
                {
                    cookie->SetIsDomainValid(false);
                    if (debug > 2)
                        cout << "Cookie - Invalid domain "
                            << "(host not within the specified domain): " << Domain << endl;
                }
            }
            else
            {
                cookie->SetIsDomainValid(false);
                if (debug > 2)
                    cout << "Cookie - Invalid domain "
                        << "(minimum number of periods): " << Domain << endl;
            }
        }
   }

   if (! cookie->getIsDomainValid())   // Not a valid domain
        Domain = HostName;  // Set the default

   // Is the host in the dictionary?
   if (cookieDict->Exists(Domain) == 0)
   {
      // No, add a list instance
      list = new List();
      cookieDict->Add(Domain, list);
   }
   else list = (List *)cookieDict->Find(Domain);
   
   // Is cookie already in list?
   list->Start_Get();

   // Let's start looking for it
   // The match is made on the name and the path

   if (debug > 5)
      cout << "- Let's go searching for the cookie '"
         << cookie->GetName() << "' in the list" << endl;

   while (!inList && (theCookie = (HtCookie *)list->Get_Next()))
   {
      if ( (theCookie->GetName().compare(cookie->GetName()) == 0 )
      	 && ( theCookie->GetPath().compare(cookie->GetPath()) == 0 ))
      {
         // The cookie has been found
         inList = true;

         // Let's update the expiration datetime
         if (debug > 5)
            cout << " - Found: Update cookie expire time." << endl;

         theCookie->SetExpires(cookie->GetExpires());

      }
   }

   // Well ... the cookie wasn't in the list. Until now! ;-)
   // Let's go add it!
   if (inList == false)
   {
      if (debug > 5)
         cout << " - Not Found: let's go add it." << endl;

      list->Add((Object *)cookie);
   }

   return !inList;
}


// Retrieve all cookies that are valid for a domain
List * HtCookieMemJar::cookiesForDomain(const String &DomainName)
{
  List * list;

  list = (List *)cookieDict->Find(DomainName);
     return list;
}



int HtCookieMemJar::SetHTTPRequest_CookiesString(const URL &_url,
   String &RequestString)
{

    // Let's split the URL domain and get all of the subdomains.
    // For instance:
    // 	 - bar.com
    // 	 - foo.bar.com
    // 	 - www.foo.bar.com                                                                                               

    String Domain(_url.host());
    Domain.lowercase();
    
    int minimum_periods = GetDomainMinNumberOfPeriods(Domain);

    if (debug > 3)
        cout << "Looking for cookies - Domain: "
            << Domain 
            << " (Minimum periods: " << minimum_periods << ")" << endl;

    // Let's get the subdomains, starting from the end
    const char* s = Domain.get();
    const char* r = s + strlen(s) - 1;  // go to the last char
    int num_periods = 1;    // at minimum is one
            
    while (r > s && *r)
    {
        if (*r == '.' && *(r+1) && *(r+1) != '.')
        {
            ++num_periods;  // when a 'dot' is found increment
                            // the number of periods
            
            if (num_periods > minimum_periods) // here is a so-far valid domain
            {
                const String SubDomain(r+1);
                if (debug > 3)
                    cout << "Trying to find cookies for subdomain: "
                        << SubDomain << endl;

                if (cookieDict->Exists(SubDomain))
                    WriteDomainCookiesString(_url, SubDomain, RequestString);
            }
        }
        
        --r;
    }
                    
    if (num_periods >= minimum_periods
        && cookieDict->Exists(Domain))
            // Let's send cookies for this domain to the Web server ...
            WriteDomainCookiesString(_url, Domain, RequestString);

    return true;
}



/////////////////////////////////////////////////////////////   
// That's an abstract from the Netscape Cookies specification
/////////////////////////////////////////////////////////////   
//
//
// When requesting a URL from an HTTP server, the browser will match
// the URL against all cookies and if any of them match,
// a line containing the name/value pairs of all matching cookies
// will be included in the HTTP request.
//
// Here is the format of that line: 
// Cookie: NAME1=OPAQUE_STRING1; NAME2=OPAQUE_STRING2 ...
//
// This method writes on a string (RequestString) the headers
// for cookies settings as defined by Netscape standard
//
/////////////////////////////////////////////////////////////   

int HtCookieMemJar::WriteDomainCookiesString(const URL &_url,
   const String &Domain, String &RequestString)
{

   // Cookie support. We need a list of cookies and a cookie object
   List *cookieList;
   HtCookie *cookie;
   const HtDateTime now;   // Instant time, used for checking
                           // cookies expiration time

   // Let's find all the valid cookies depending on the specified domain
   cookieList = cookiesForDomain(Domain);

   if (cookieList)
   {
      // Let's store the number of cookies eventually sent
      int NumCookies = 0;

      if (debug > 5)
      	 cout << "Found a cookie list for: '" << Domain << "'" << endl;

      // Let's crawl the list for getting the 'path' matching ones
      cookieList->Start_Get();

      while ((cookie = (HtCookie *)cookieList->Get_Next()))
      {
      	 const String cookiePath = cookie->GetPath();
      	 const String urlPath = _url.path();

         //
         // Let's see if the cookie has expired
         // by checking the Expires value of it
         // If it's not empty and the datetime
         // is before now.
         //
		 // Another way of determining whether a
		 // cookie is expired is checking the
		 // max_age property that is to say:
		 // (now - issuetime <= maxage).
		 //
         const bool expired =
		    (cookie->GetExpires() && (*(cookie->GetExpires()) < now))	// Expires
			|| (HtDateTime::GetDiff(now, cookie->GetIssueTime())
			   <= cookie->GetMaxAge()); // Max-age

         if (debug > 5)
      	    cout << "Trying to match paths and expiration time: "
	       << urlPath << " in " << cookiePath;

      	 // Is the path matching
      	 if (!expired && !strncmp(cookiePath, urlPath, cookiePath.length()))
	 {

            if (debug > 5)
	       cout << " (passed)" << endl;

      	    ++NumCookies;
	    
      	    // Write the string by passing the cookie to the superclass' method
	    WriteCookieHTTPRequest(*cookie, RequestString, NumCookies);

      	 }
	 else if (debug > 5) cout << " (discarded)" << endl;

     }
     
     // Have we sent one cookie at least?
     if (NumCookies > 0)
       RequestString <<"\r\n";

   }

   // That's the end of function
   return true;
}


// Debug info
void HtCookieMemJar::printDebug()
{
   char * key;
  
   cookieDict->Start_Get();
   
   cout << "Summary of the cookies stored so far" << endl;
   
   while ((key = cookieDict->Get_Next()))
   {
      List * list;
      HtCookie * cookie;

      cout << " - View cookies for: '" << key << "'" << endl;
      list = (List *)cookieDict->Find(key);
      list->Start_Get();
      
      while ((cookie = (HtCookie *)list->Get_Next()))
      	 cookie->printDebug();
   }
}


///////
   //    Show the summary of the stored cookies
///////

ostream &HtCookieMemJar::ShowSummary(ostream &out)
{

   char * key;
   int num_cookies = 0; // Global number of cookies
   int num_server = 0;	// Number of servers with cookies
  
   cookieDict->Start_Get();
   
   out << endl << "Summary of the cookies" << endl;
   out << "======================" << endl;
   
   while ((key = cookieDict->Get_Next()))
   {
      List * list;
      HtCookie * cookie;
      int num_cookies_server = 0;

      ++num_server;	// Number of servers with cookies

      out << " Host: '" << key << "'" << endl;
      list = (List *)cookieDict->Find(key);
      list->Start_Get();
      
      while ((cookie = (HtCookie *)list->Get_Next()))
      {
      	 ++num_cookies_server;
      	 cookie->printDebug();
      }
      
      out << "   Number of cookies: " << num_cookies_server << endl << endl;

      // Global number of cookies
      num_cookies += num_cookies_server;
   }

   out << "Total number of cookies: " << num_cookies << endl;
   out << "Servers with cookies: " << num_server << endl << endl;

   return out;

}


// Get the next cookie. It is a bit tricky, but for now it is good
const HtCookie* HtCookieMemJar::NextCookie()
{
   if (!cookieDict)
      return 0;

   if (!_idx && (_key = cookieDict->Get_Next())
      && (_list = (List *)cookieDict->Find(_key)))
         _list->Start_Get();   // the first time we position at the beginning

   ++_idx;
   	 
   if (!_key)
      return 0;   // ends

   if (!_list)
      return 0;   // ends

#ifdef _MSC_VER /* _WIN32 */
   const HtCookie *cookie = ((const HtCookie*)_list->Get_Next()); // Cookie object
#else
   const HtCookie* cookie( (const HtCookie*)(_list->Get_Next()) ); // Cookie object
#endif
      
   if (cookie)
      return cookie;
   else
   {
      // Non ci sono cookie per l'host. Si passa a quello seguente
      if ((_key = cookieDict->Get_Next()) &&
      	 (_list = (List *)cookieDict->Find(_key)))
      {
         _list->Start_Get();
	 if ((cookie = (const HtCookie*)_list->Get_Next()))
	    return cookie;
      }
   }
   
   return 0;         
}

// Reset the iterator
void HtCookieMemJar::ResetIterator()
{
   cookieDict->Start_Get();
   _idx = 0;
}