The MASM Forum

General => The Laboratory => Topic started by: Siekmanski on December 01, 2013, 01:45:23 AM

Title: Getting html content of google search
Post by: Siekmanski on December 01, 2013, 01:45:23 AM
Pulling hairs out of my head...   :(

I'm trying to get the html content of google search and load it in to memory.
Then i can search for the image urls and show those images in my program.

I've written a routine to connect with the google server and send a search request.
But it redirects (me to the https page and i don't know how to connect to that page.


HTTP/1.1 302 Found
Location: https://www.google.com/search?q=rammstein&tbm=isch
Cache-Control: private
Content-Type: text/html; charset=UTF-8
Set-Cookie: PREF=ID=2580d3f61611f780:FF=0:TM=1385822026:LM=1385822026:S=bScjZ7XszAVzBc_T; expires=Mon, 30-Nov-2015 14:33:46 GMT; path=/; domain=.google.com
Set-Cookie: NID=67=PS3svhZFPKcHfZgV1sfXdsBY4nSYd3cGCQgn6JbVrVc-7XmghKnx5NPmqSlsJ2Ib0MZ--IhhpCuxwbdCTtC2hiOWM4GLLwdM_qrIotBmHkzRs08GZvP0sHWMs9ExZP8j; expires=Sun, 01-Jun-2014 14:33:46 GMT; path=/; domain=.google.com; HttpOnly
P3P: CP="This is not a P3P policy! See http://www.google.com/support/accounts/bin/answer.py?hl=en&answer=151657 for more info."
Date: Sat, 30 Nov 2013 14:33:46 GMT
Server: gws
Content-Length: 251
X-XSS-Protection: 1; mode=block
X-Frame-Options: SAMEORIGIN
Alternate-Protocol: 80:quic
Connection: close

<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 Moved</TITLE></HEAD><BODY>


I found out that https goes via port 443 but it doesn't work.
Don't know how to make the request header...........

I can save the html content file with the "URLDownloadToFile" api but that's not what i want.
There must be a way to do it with my routine.


    .486
    .model      flat,stdcall
    option      casemap:none

    include     windows.inc
    include     user32.inc
    includelib  user32.lib
    include     kernel32.inc
    includelib  kernel32.lib

    include     wsock32.inc
    includelib  wsock32.lib
    include     urlmon.inc
    includelib  urlmon.lib

    include     Console.Inc

;https://www.google.com/search?q=rammstein&tbm=isch

GetImages   db "GET /search?q=rammstein&tbm=isch HTTP/1.1",13,10
            db "User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",13,10
; db "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; nl; rv:1.8.0.3)",13,10
            db "Host: www.google.com",13,10
            db "Accept: */*",13,10
            db "Connection: close",13,10
            db 13,10,0

.data?
align 4
Internet_Buffer     db 500*1024 dup (?)
szString_buffer     db 128 dup (?)

.code

InternetServer proc uses edi Port:dword,Internet_page:dword,Internet_String:dword

LOCAL   Internet_Socket,Bytes_received:dword
LOCAL   wsaData:WSADATA
LOCAL   Socket_adres:sockaddr_in

    lea     edi,Internet_Buffer
    xor     eax,eax
    mov     ecx,sizeof Internet_Buffer / 4
    cld
    rep     stosd

    mov     Internet_Socket,0
    invoke  WSAStartup,0202h,addr wsaData
    test    eax,eax
    jz      winsock_open
    ret
winsock_open:
    invoke  socket,AF_INET,SOCK_STREAM,IPPROTO_TCP
    cmp     eax,INVALID_SOCKET
    jne     socket_ok
    jmp     Sluit_socket
socket_ok:
    mov     Internet_Socket,eax
    mov     Socket_adres.sin_family,AF_INET
    invoke  htons,Port
    mov     Socket_adres.sin_port,ax
    invoke  gethostbyname,Internet_page
    test    eax,eax
    jnz     server_gevonden
    jmp     Sluit_socket
server_gevonden:
    mov     eax,[eax+12]
    mov     eax,[eax]
    mov     eax,[eax]
inet_adres_ok:
    mov     Socket_adres.sin_addr,eax
    invoke  connect,Internet_Socket,addr Socket_adres,sizeof Socket_adres
    cmp     eax,SOCKET_ERROR
    jne     contact_met_host
    jmp     Sluit_socket

contact_met_host:
    invoke  Print,TEXT_(13,10," Connected to Google server.",13,10,13,10)
    invoke  lstrlen,Internet_String
    invoke  send,Internet_Socket,Internet_String,eax,0
   
    mov     Bytes_received,0
Lees_socket:
    lea     eax,Internet_Buffer
    add     eax,Bytes_received
    invoke  recv,Internet_Socket,eax,256,0
    cmp     eax,SOCKET_ERROR
    jne     Antwoord_ok
    jmp     Antwoord_klaar
Antwoord_ok:
    test    eax,eax
    jz      Antwoord_klaar
    add     Bytes_received,eax 
    cmp     Bytes_received,sizeof Internet_Buffer-256
    jae     Antwoord_klaar
    jmp     Lees_socket
Antwoord_klaar:
    mov     eax,Bytes_received
    lea     edx,Internet_Buffer
    mov     byte ptr[edx+eax-1],0
    invoke  Print,addr Internet_Buffer

    invoke  wsprintf,addr szString_buffer,TEXT_(13,10,13,10,"We are done: %d bytes received....",13,10),Bytes_received
    invoke  Print,addr szString_buffer

Sluit_socket:
    cmp     Internet_Socket,0
    jz      Sluit_socket_ok
    invoke  closesocket,Internet_Socket
    mov     Internet_Socket,0
Sluit_socket_ok:
    invoke  WSACleanup
    ret
InternetServer endp

start:
    invoke  Print,TEXT_("Internet server",13,10)
    invoke  InternetServer,80,TEXT_("google.com"),addr GetImages

;    this is what i want in memory with my server code, just what URLDownloadToFile saves to disk....

;    invoke  URLDownloadToFile,0,TEXT_("https://www.google.com/search?q=rammstein&tbm=isch"),TEXT_("google_html.txt"),0,0

    invoke  Wait_Key
    invoke  ExitProcess,0
end start

Title: Re: Getting html content of google search
Post by: traphunter on December 01, 2013, 09:16:37 AM
Hello,

I think you have to manage SSL/TSL by yourself or you can use the Winsock Secure Socket Extensions. Thats my ideas.

do you know this http://msdn.microsoft.com/en-us/library/windows/desktop/ms740139%28v=vs.85%29.aspx (http://msdn.microsoft.com/en-us/library/windows/desktop/ms740139%28v=vs.85%29.aspx)?

c++ sample: http://msdn.microsoft.com/en-us/library/windows/desktop/bb394814%28v=vs.85%29.aspx (http://msdn.microsoft.com/en-us/library/windows/desktop/bb394814%28v=vs.85%29.aspx)
Title: Re: Getting html content of google search
Post by: Siekmanski on December 01, 2013, 08:34:25 PM
Thank you for the hint traphunter. I'm going to try the secure socket.
Title: Re: Getting html content of google search
Post by: Antariy on December 03, 2013, 12:05:17 AM
Hi Marinus :t

Try this code:



include \masm32\include\masm32rt.inc
include \masm32\include\wininet.inc
includelib \masm32\lib\wininet.lib

.686
.mmx
.xmm

.data

.code

start proc
LOCAL hio:DWORD
LOCAL hic:DWORD
LOCAL hir:DWORD
LOCAL tdd:DWORD
LOCAL buf[129]:BYTE

invoke InternetOpen,CTXT("ASM example"),0,0,0,0
mov hio,eax
invoke InternetConnect,eax,CTXT("www.google.com"),443,0,0,INTERNET_SERVICE_HTTP,0,0
mov hic,eax
invoke HttpOpenRequest,eax,CTXT("GET"),CTXT("/search?q=rammstein&tbm=isch"),0,0,0,INTERNET_FLAG_NO_CACHE_WRITE or INTERNET_FLAG_SECURE,0
mov hir,eax
invoke HttpSendRequest,eax,0,0,0,0
@@:
invoke InternetReadFile,hir,addr buf,sizeof buf-1,addr tdd
test eax,eax
jz @F
mov eax,tdd
test eax,eax
jz @F
mov byte ptr [buf+eax],0
invoke crt_printf,CTXT("%s"),addr buf
jmp @B

@@:

invoke InternetCloseHandle,hir
invoke InternetCloseHandle,hic
invoke InternetCloseHandle,hio

invoke crt__getch
invoke crt_exit,0
            
start endp


end start


This code will connect to the HTTPS server, if you want to connect to HTTP server, then just change the blue from 443 (port) to 80, and remove red part. The specific of this function is that if it will be redirected from HTTP to HTTPS, it will silently go to the redirection and will get the data you need from the secure HTTP server.
I'm not very experienced with these functions, so you may find something interesting you need, I just give an idea, if it is suitable for you :t
Title: Re: Getting html content of google search
Post by: dedndave on December 03, 2013, 04:42:20 AM
it dumps a rather complex HTML to the console, here, Alex   :t

built with no errors,
running XP SP3
Title: Re: Getting html content of google search
Post by: Antariy on December 03, 2013, 05:58:05 AM
Thank you for test, Dave! :biggrin:
Title: Re: Getting html content of google search
Post by: Siekmanski on December 03, 2013, 10:28:19 AM
Thank you Antariy,

I'm not very experienced with those functions too, already tried those wininet api functions
and can't get it to save the whole page, which should be +/- 480.000 bytes long.

It only saves about the first 37.000 bytes of the page and all the image urls are not included in that part.
I'm still trying the AcceptTypes and different flags for HttpOpenRequest to get the whole page.
No succes so far..... but keep on trying.

If you use "Mozilla/5.0" instead of "ASM example" it saves 71.000 bytes. ??????
Title: Re: Getting html content of google search
Post by: jj2007 on December 03, 2013, 12:45:42 PM
include \masm32\MasmBasic\MasmBasic.inc        ; download (http://masm32.com/board/index.php?topic=94.0)
  Init
  FileWrite "Ramstein.html", FileRead$("http://www.google.com/search?q=rammstein&tbm=isch")
  Inkey "ok?"
  Exit
end start


68k, but it looks complete (see attachment - the page ends with "Privacy & Terms About Google", and there are many images). What's missing, and what's wrong with URLDownloadToFile?
Title: Re: Getting html content of google search
Post by: dedndave on December 03, 2013, 01:29:05 PM
when it dumps to my console, the text ends with </HTML>

now, you just have to parse through all that to get the URLs for each pic/page ? (another HTML)   :P
Title: Re: Getting html content of google search
Post by: Siekmanski on December 03, 2013, 01:37:52 PM
Hi jj2007,

There is nothing wrong with URLDownloadToFile, but i don't want to write a file to disk and then read it back to memory.
Your Ramstein.html has no urls to the images found by google search.

Look for imgurl=http://xxxxxxxxx.jpg

But i figured it all out, and now i can load the complete html result from google to memory.
Finaly i can search for images and show them in my program.

Tommorrow i'll clean up my code and post it, must go to bed now.

Here's a test, Pictures? means 0 = no images found, 1 = yeahhhhh images found

Title: Re: Getting html content of google search
Post by: Siekmanski on December 03, 2013, 01:40:20 PM
Hi Dave,

That's exactly the reason why i needed the complete content and get the image urls.
Title: Re: Getting html content of google search
Post by: jj2007 on December 03, 2013, 01:48:36 PM
Quote from: dedndave on December 03, 2013, 01:29:05 PM
now, you just have to parse through all that to get the URLs for each pic/page ? (another HTML)   :P

Apparently, that file contains only URLs to thumbnails like this one:
http://t3.gstatic.com/images?q=tbn:ANd9GcTj_a-RuWed7-ZC9ab-vmS_98FFRU6Eye1qvMSiixpVsk1g0CbSYyXiXsGv
(http://t3.gstatic.com/images?q=tbn:ANd9GcTj_a-RuWed7-ZC9ab-vmS_98FFRU6Eye1qvMSiixpVsk1g0CbSYyXiXsGv)
Title: Re: Getting html content of google search
Post by: dedndave on December 03, 2013, 02:04:01 PM
i think there is also a link (or script)
the problem is that the link may be encoded
and - unless you can emulate the PHP file that is on the server - you can't break the code

now - maybe it's not that difficult
but, i doubt google wants just anyone to have the power of google without sticking a google logo and marketing data collector on there   :P

the guy didn't make 35 billion by being stupid
Title: Re: Getting html content of google search
Post by: jj2007 on December 03, 2013, 02:20:16 PM
Actually, it's not that difficult to extract the image locations. I had thought Google would protect its servers*) but nope, they load just fine. You can even grab the images, see second attachment.

include \masm32\MasmBasic\MasmBasic.inc        ; download (http://masm32.com/board/index.php?topic=94.0)
  Init        ; uses Extract$() (http://www.webalice.it/jj2006/MasmBasicQuickReference.htm#Mb1156)

  FileWrite "Ramstein.html", FileRead$("http://www.google.com/search?q=rammstein&tbm=isch")
  Let esi=FileRead$("Ramstein.html")
  Dim ImgUrl$()
  xor ecx, ecx
  .Repeat
        Let edi=Extract$(esi, 'src="http://', '" width="', xsIncL or xsExcR or xsLoop)
        .Break .if byte ptr [edi]=="?"
        Let ImgUrl$(ecx)=Mid$(edi, 6)
        PrintLine Str$(ecx), Tb$, ImgUrl$(ecx)
        inc ecx
  .Until ecx>99
  Inkey Str$("\n%i images found. Store URLs to file? (y)", ecx)
  .if eax=="y"
        Store "MyURLs.txt", ImgUrl$()        ; write URLs to disk ...
        ShEx "MyURLs.txt"        ; ... and open in Notepad
  .endif

  Exit
end start


When Notepad pops up, copy a URL and paste it in your browser...
P.S.: Second attachment allows to pick and see an individual image.

*) On a different machine, the page loads only when previously loaded manually; so it's probably from cache. Besides, file size is 480k, and the format is different :(
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 04:09:43 AM
Finally reached my goal.  :biggrin:
Getting the complete html content so i can find the images-urls by google search and loading them to memory.
Hope it works on all Windows systems....

Edit: better to read .asm file in new attachment.
Title: Re: Getting html content of google search
Post by: jj2007 on December 04, 2013, 04:37:05 AM
Quote from: Siekmanski on December 04, 2013, 04:09:43 AMEdit: better to read .asm file in new attachment.

Hmmm... hidden payload?  ;)

Output:

Internet server

Satus Code: 200 OK

CONTENT_LENGTH: 527516

http://weirdestband.files.wordpress.com/2011/11/rammstein.jpg

Saving Rammstein.jpg ....


It takes a while, though... ca. 30 seconds or so.
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 04:41:28 AM
works here, Marinus   :t
XP SP3
maybe 20 seconds - didn't time it - lol
but, it's a big image

don't know where Jochen got 527516
the one i got was a little over 4 MB
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 04:48:14 AM
QuoteHmmm... hidden payload?  ;)

No, just forgot to change tabs to spaces to make the source code more readable.  :biggrin:

30 seconds, that's a long long time.
I'll rewrite the code to search and load every 1024 bytes at a time, that should speed things up.
Title: Re: Getting html content of google search
Post by: GoneFishing on December 04, 2013, 04:49:12 AM
Win8 32 bit : works OK
Quote
Internet server

Satus Code: 200 OK


CONTENT_LENGTH: 526874

http://weirdestband.files.wordpress.com/2011/11/rammstein.jpg

Saving Rammstein.jpg ....


Press any key to continue...


The image is the same as in Jochen's post but the CONTENT_LENGTH DIFFERS
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 04:57:08 AM
Thanks guys  :biggrin:

4MB that's also part of the long time i guess, but you can search for smaller images if you like.

(TBM=isch)

When you search for images, TBM=isch, you can also use the following TBS values:

•Large images: tbs=isz:l
•Medium images: tbs=isz:m
•Icon sized images: tba=isz:i
•Image size larger than 400×300: tbs=isz:lt,islt:qsvga
•Image size larger than 640×480: tbs=isz:lt,islt:vga
•Image size larger than 800×600: tbs=isz:lt,islt:svga
•Image size larger than 1024×768: tbs=isz:lt,islt:xga
•Image size larger than 1600×1200: tbs=isz:lt,islt:2mp
•Image size larger than 2272×1704: tbs=isz:lt,islt:4mp
•Image sized exactly 1000×1000: tbs=isz:ex,iszw:1000,iszh:1000
•Images in full color: tbs=ic:color
•Images in black and white: tbs=ic:gray
•Images that are red: tbs=ic:specific,isc:red [orange, yellow, green, teal, blue, purple, pink, white, gray, black, brown]
•Image type Face: tbs=itp:face
•Image type Photo: tbs=itp:photo
•Image type Clipart: tbs=itp:clipart
•Image type Line drawing: tbs=itp:lineart
•Group images by subject: tbs=isg:to
•Show image sizes in search results: tbs=imgo:1

Example URL: Search in images for "michael jackson" as a phrase, and limit results to 4 megapixel images or larger, color images, face images, and group the results by topic:

http://www.google.com/search?q=%22michael+jackson%22&tbm=isch&tbs=ic:color,isz:lt,islt:4mp,itp:face,isg:to


Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 05:00:39 AM
QuoteThe image is the same as in Jochen's post but the CONTENT_LENGTH DIFFERS

The content differs from time to time, maybe it's updated then with new info.?
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 05:07:19 AM
well - don't know how you select which image to d/l
i didn't look at the code

but - google selects results based on location and past search history
i may get the same images as Jochen, but in a different order
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 05:11:44 AM
try creating an HTML page from the first 100 available images
<a> tags are pretty easy

i.e., rather than downloading,
just see what's available to help understand the selection issues
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 07:54:59 AM
Hi Dave,

CONTENT_LENGTH: is the total length of the html file from google.

Finding the urls by searching for imgurl=http: in the html file and check if it ends with .jpg
The image in my source code is the first one it finds in the html file, but there are many more in the html file.
Downloading the image was pure for checking if the found image-url works.
Next i'll code a routine that gathers all the urls and put them in a list from where i can choose one.
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 08:26:54 AM
oh - gotcha   :t
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 10:26:23 AM
Now it finds all jpg urls and checks the length of the url ( no longer then 259 bytes + trailing 0 )
And put them in an image list with all the addresses to the url strings.
Some of the strings look like this:

http://www.supermusic.sk/obrazky/2585635_P%252520R%252520Brown%2525202011.jpg

I'll work on a routine to convert those to plain ascii text.
At the bottom of the source is a routine to save one of the images found by image number. ( remove semicolons )

edit: new attachment, added maximum of 128 images to prevent buffer overflow and removed 2 lines of unused code.
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 11:04:53 AM
when you write the conversion routine, you might want to support something like the following
%2520
that's a tricky one, because "%25" is "%"   ;)
so, "%2520" is a space - normally, you'd see it as "%20"
i have seen that in URL's, before
Title: Re: Getting html content of google search
Post by: jj2007 on December 04, 2013, 01:16:05 PM
Ever heard of/used OpenSSL (http://slproweb.com/products/Win32OpenSSL.html)?
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 01:54:16 PM
Url decoding routine done.  :biggrin:

% == 25 hex
example:
%252520 is encoded 3 times and represent a space character ( 20 == hex 20 == 32 dec == space )
%2520 is space is encoded 2 times
%20 is 1 time encoded

Decoding routine checks for multiple % and then calculates the value that follows.

http://i1223.photobucket.com/albums/dd517/jgwicked/Rammstein%252520Dec%25252011%2525202010/Rammstein1992.jpg
decoded: http://i1223.photobucket.com/albums/dd517/jgwicked/Rammstein Dec 11 2010/Rammstein1992.jpg
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 02:33:24 PM
found an error in line 147

    cmp     edx,260
    jz      url_to_long

change it to:

    cmp     edx,260
    je      url_to_long
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 02:45:39 PM
 :redface:

JZ and JE are the same opcode
Title: Re: Getting html content of google search
Post by: Siekmanski on December 04, 2013, 03:01:08 PM
Oooh yeahhh   ::)

I really need a break it's 5 AM and i need to go to bed  :biggrin:
Title: Re: Getting html content of google search
Post by: dedndave on December 04, 2013, 03:13:53 PM
me too - see you tomorrow, Marinus   :t
Title: Re: Getting html content of google search
Post by: Siekmanski on December 05, 2013, 11:55:19 AM
Thanks guys for testing and helping out.  :t
I'm done now with the search routines.
Now i can use it for searching album covers in my program.

It's also nice to search for fixed sizes, in this case images 512 by 512 pixels.

final version,
Title: Re: Getting html content of google search
Post by: Magnum on December 05, 2013, 12:10:45 PM
Excellent job.  :t

What kind of download time is average for that file size ?

Andy
Title: Re: Getting html content of google search
Post by: Siekmanski on December 05, 2013, 12:40:03 PM
Thanks  :biggrin:

It depends on the speed of your Internet connection i think.
But you can search for different image sizes.
Title: Re: Getting html content of google search
Post by: Magnum on December 05, 2013, 01:35:57 PM
Does the code search for rammstein.jpg or just download that file if it finds it ?

Andy
Title: Re: Getting html content of google search
Post by: Siekmanski on December 05, 2013, 01:56:20 PM
if you want to search for album cover art,

artist = Rammstein
album = Sehnsucht

the search frase is then this one, it searches for album art images with exact sizes (512 by 512)

/search?q=rammstein sehnsucht album cover&tbm=isch&tbs=isz:ex,iszw:512,iszh:512

But you can search for any image you want,

search for panda?

/search?q=panda&tbm=isch

in the source code the save name  "rammstein.jpg" is fixed and saves the first found image ( for testing only )
Title: Re: Getting html content of google search
Post by: Antariy on December 05, 2013, 08:31:00 PM
Hi Marinus :t You get the large HTTP answer from google when you used the UserAgent string that is used currently?
Title: Re: Getting html content of google search
Post by: Siekmanski on December 05, 2013, 08:44:19 PM
Yes. :biggrin:

But nothing mentioned by microsoft or MSDN. I was playing with it because ""Mozilla/5.0" returned 70 Kb instead of 36 Kb.
So i searched the net for useragent examples as i used it before in the winsock example.
Title: Re: Getting html content of google search
Post by: traphunter on December 05, 2013, 09:22:47 PM
maybe your own real user-agent works: http://www.viewmyuseragent.com/ (http://www.viewmyuseragent.com/)
Title: Re: Getting html content of google search
Post by: Antariy on December 05, 2013, 09:43:10 PM
Quote from: Siekmanski on December 05, 2013, 08:44:19 PM
Yes. :biggrin:

But nothing mentioned by microsoft or MSDN. I was playing with it because ""Mozilla/5.0" returned 70 Kb instead of 36 Kb.
So i searched the net for useragent examples as i used it before in the winsock example.

I think Google servers try to filter automated requests by checking UserAgent, and if it looks like not very similar to the real browser's string, it returns not full answer. Also I noticed that if the're is too many / too frequent requests from one IP, then google blocks the request and provides a captcha to verify that the request was done by people, so the program should not ask for searches too frequently - that is not looks like the people do the search. But you may use the proxies as well - specify one external proxy in there:

invoke InternetOpen,CTXT("ASM example"),INTERNET_OPEN_TYPE_PROXY,CTXT("proxyaddress:proxyport"),CTXT("<local>"),0

so your request will be routed through external proxy with its address "seeing" to google :t If one IP address gets blocked after frequent searches, you may change the proxy and continue seaches :biggrin:
Title: Re: Getting html content of google search
Post by: Siekmanski on December 05, 2013, 09:58:12 PM
Thanks Antariy, i'll keep this "proxy address" trick in mind.  :t
Title: Re: Getting html content of google search
Post by: Evan_ on December 19, 2013, 03:32:45 PM
Weird stuff. Make a fake browser or something. Fork another; idk.
Run it so you don't even have to look at the web pages anymore.

Or just script it with your creepy requests.
Title: Re: Getting html content of google search
Post by: dedndave on December 19, 2013, 06:14:32 PM
yes.....
Marinus is very "creepy" - lol
(those creepy Nederlanders)
that's just how he rolls   8)

we've all learned something from Marinus, though   :t
Title: Re: Getting html content of google search
Post by: Siekmanski on December 20, 2013, 07:59:29 AM
 :biggrin:
Title: Re: Getting html content of google search
Post by: dedndave on December 20, 2013, 10:23:48 AM
lol
that's just creepy   :biggrin:
Title: Re: Getting html content of google search
Post by: guga on April 30, 2019, 01:54:23 PM
Hi Marinus. I know this is an old post, but did you suceed to make the proxy trick ?
Title: Re: Getting html content of google search
Post by: Siekmanski on April 30, 2019, 04:45:43 PM
No, never used the proxy trick.
Title: Re: Getting html content of google search
Post by: guga on April 30, 2019, 04:47:43 PM
But did it worked without google blocking ? I´m trying to use google translator, but it keeps blocking (and ask for a captcha) after a few open/close requests.
Title: Re: Getting html content of google search
Post by: Siekmanski on April 30, 2019, 04:54:50 PM
I don't know, never tested the proxy strategy.
What I do know is, google change their page structures from time to time.