mameth
February 2nd, 2009, 03:24 AM
Hi everybody!
I wrote some code to get html from web pages. here's the code:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <netdb.h>
#include <unistd.h>
int main()
{
char ch;
char web_address_temp[60];
char *web_address=NULL;
int i=0;
int dummy=0;
printf("Write the address you wanted to visit: ");
scanf("%c",&ch);
while(ch!='\n')
{
web_address_temp[i] = ch;
scanf("%c",&ch);
i++;
}
web_address = (char *) realloc(NULL,strlen(web_address_temp)*sizeof(char) );
strcpy(web_address,web_address_temp);
/*
char *web_host=NULL;
i=0;
dummy = strlen(web_address);
while(web_address[i]!='/')
{
if(i>=dummy)
{
break;
}
web_address_temp[i] = web_address[i];
i++;
}
web_address_temp[i] = '\0';
web_host = (char *)realloc(NULL,strlen(web_address_temp)*sizeof(cha r));
*/
int sockid=0;
int conn=0;
char msgbuff_temp[100];
strcpy(msgbuff_temp,"GET / HTTP/1.1\nHost: ");
strcat(msgbuff_temp, web_address);
strcat(msgbuff_temp, "\nUser-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)\n\n\0");
dummy = strlen(msgbuff_temp);
char *msgbuff=NULL;
msgbuff = (char *)realloc(NULL,dummy*sizeof(char));
strcpy(msgbuff,msgbuff_temp);
struct addrinfo hints, *res;
memset(&hints,0,sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
getaddrinfo(web_address,"80",&hints,&res);
sockid = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if(sockid<0)
{
perror("socket");
close(sockid);
return 0;
}
conn = connect(sockid,res->ai_addr,res->ai_addrlen);
if(conn<0)
{
perror("connect");
close(sockid);
return 0;
}
dummy = send(sockid,msgbuff, strlen(msgbuff),0);
if(dummy<0)
{
perror("send");
close(sockid);
return 0;
}
char msgbuff_recv[130000];
memset(&msgbuff_recv,'\0',sizeof(msgbuff_recv));
int recv_byts;
recv_byts =recv(sockid,msgbuff_recv,sizeof(msgbuff_recv),0);
msgbuff_recv[recv_byts] ='\0';
if(recv_byts<=0)
{
perror("recv");
close(sockid);
return 0;
}
FILE * fid;
fid = fopen("try.html","w");
for(i=0; i<= recv_byts; i++)
{
printf("%c",msgbuff_recv[i]);
fprintf(fid,"%c",msgbuff_recv[i]);
}
printf("\n");
close(sockid);
free(res);
free(web_address);
return 0;
}
recv function only receives some part of the html code, not all of it. i will use this code to build up some browser, so i need the full html of a page.
by the way, i changed the buffer size alot(i mean i have already written some values like 10000000) so i believe this isn't about the size.
I wrote some code to get html from web pages. here's the code:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <netdb.h>
#include <unistd.h>
int main()
{
char ch;
char web_address_temp[60];
char *web_address=NULL;
int i=0;
int dummy=0;
printf("Write the address you wanted to visit: ");
scanf("%c",&ch);
while(ch!='\n')
{
web_address_temp[i] = ch;
scanf("%c",&ch);
i++;
}
web_address = (char *) realloc(NULL,strlen(web_address_temp)*sizeof(char) );
strcpy(web_address,web_address_temp);
/*
char *web_host=NULL;
i=0;
dummy = strlen(web_address);
while(web_address[i]!='/')
{
if(i>=dummy)
{
break;
}
web_address_temp[i] = web_address[i];
i++;
}
web_address_temp[i] = '\0';
web_host = (char *)realloc(NULL,strlen(web_address_temp)*sizeof(cha r));
*/
int sockid=0;
int conn=0;
char msgbuff_temp[100];
strcpy(msgbuff_temp,"GET / HTTP/1.1\nHost: ");
strcat(msgbuff_temp, web_address);
strcat(msgbuff_temp, "\nUser-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)\n\n\0");
dummy = strlen(msgbuff_temp);
char *msgbuff=NULL;
msgbuff = (char *)realloc(NULL,dummy*sizeof(char));
strcpy(msgbuff,msgbuff_temp);
struct addrinfo hints, *res;
memset(&hints,0,sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
getaddrinfo(web_address,"80",&hints,&res);
sockid = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if(sockid<0)
{
perror("socket");
close(sockid);
return 0;
}
conn = connect(sockid,res->ai_addr,res->ai_addrlen);
if(conn<0)
{
perror("connect");
close(sockid);
return 0;
}
dummy = send(sockid,msgbuff, strlen(msgbuff),0);
if(dummy<0)
{
perror("send");
close(sockid);
return 0;
}
char msgbuff_recv[130000];
memset(&msgbuff_recv,'\0',sizeof(msgbuff_recv));
int recv_byts;
recv_byts =recv(sockid,msgbuff_recv,sizeof(msgbuff_recv),0);
msgbuff_recv[recv_byts] ='\0';
if(recv_byts<=0)
{
perror("recv");
close(sockid);
return 0;
}
FILE * fid;
fid = fopen("try.html","w");
for(i=0; i<= recv_byts; i++)
{
printf("%c",msgbuff_recv[i]);
fprintf(fid,"%c",msgbuff_recv[i]);
}
printf("\n");
close(sockid);
free(res);
free(web_address);
return 0;
}
recv function only receives some part of the html code, not all of it. i will use this code to build up some browser, so i need the full html of a page.
by the way, i changed the buffer size alot(i mean i have already written some values like 10000000) so i believe this isn't about the size.