Apache HTTP Server Configuration For ProxyPassReverseCookiePath

Working with SOA project, there was requirement to keep web UI interfaces, i.e. angular JSs and htmls, and APIs decoupled physically for development and for deployment.
Presentation implementation calls micro services to authenticate users. This requires special care at Apache Server, aka load balancer. Once authenticated, UI project should get coockies generated by web server so that it does not go in authentication loop.

This is what i did:

ProxyPass /service ajp://localhost:8088/service

ProxyPass /projectname/service ajp://localhost:8088/service
ProxyPassReverseCookiePath /service /projectname

ProxyPass /projectname ajp://localhost:8088/ui

Fastest way to update big size XML in Java

Problem context: I had a big size xml like below where i wanted to identify employee with wrong department Id. I needed an implementation which is not slow like DOM and still can update XML like DOM, I wanted code fast like SAX but still can travel in future to read Department nodes like DOM.

Example:

<MyTeam>
    <Employee depId="1">Samarth</Employee>
    <Employee depId="1">Het</Employee>
    <Employee depId="999">Marketing_Employee</Employee>
    <Employee depId="1">Sahil</Employee>
    <Employee depId="1">Jainil/Jaimin</Employee>
    <Employee depId="1">Freya</Employee>
					
    <Department id="192">Engineering</Department>
</MyTeam>

After Execution:

<?xml version="1.0"?>
<MyTeam>

	<Employee depId="1">Samarth</Employee>
	<Employee depId="1">Het</Employee>
	<Employee depId="999">Marketing_Employee-CHANGEDTO-192<Comment WrongValue="Marketing_Employee">DepartmentUpdated</Comment></Employee>
	<Employee depId="1">Sahil</Employee>
	<Employee depId="1">Jainil/Jaimin</Employee>
	<Employee depId="1">Freya</Employee>

	<Department id="192">Engineering</Department>

</MyTeam>

Implementation:

import java.io.ByteArrayInputStream;
import java.io.File;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

public class STAXParserMultiPointerReadAndRealtimeWrite {
	
	private static final String input=
			"<MyTeam>" +
					"<Employee depId=\"1\">Samarth</Employee>"+
					"<Employee depId=\"1\">Het</Employee>"+
					"<Employee depId=\"999\">Marketing_Employee</Employee>"+
					"<Employee depId=\"1\">Sahil</Employee>"+
					"<Employee depId=\"1\">Jainil/Jaimin</Employee>"+
					"<Employee depId=\"1\">Freya</Employee>"+
					
					"<Department id=\"192\">Engineering</Department>"+
			"</MyTeam>";
	
	private static final String EMPLOYEE = "Employee";
	private static final String DEPARTMENT = "Department";
	private static final String DEPT_ID = "id";
	private static final String EMP_DEPTID = "depId";
	
	private static class MyXMLHandler{
		
		private XMLEventReader reader = null, readerDepartment = null;
	        private XMLEventWriter writer = null;
	        private int matchingPointerDepartment=0;
	        private XMLEvent eventToWrite=null;
//	        private FileOutputStream fos = null;
	    
		public MyXMLHandler(File file, File outputFile) throws Exception{
			reader = XMLInputFactory.newInstance().createXMLEventReader(new ByteArrayInputStream(input.getBytes())); //new FileInputStream(file));
			readerDepartment = XMLInputFactory.newInstance().createXMLEventReader(new ByteArrayInputStream(input.getBytes())); //new FileInputStream(file));
			
//			fos = new FileOutputStream(outputFile);
			writer = XMLOutputFactory.newInstance().createXMLEventWriter(System.out);
		}
		
		public XMLEvent nextEvent() throws Exception{
			if(matchingPointerDepartment==0){
				readerDepartment.next();
			}
			else{
				matchingPointerDepartment--;//To bring both pointer on same place.
			}
			
			if(eventToWrite!=null){
				writerIt(eventToWrite); //Write last event if it was not changed
			}
			
			eventToWrite=(XMLEvent)reader.next(); 
			return eventToWrite;
		}
		
		public XMLEvent futureNextForDepartment(){
			matchingPointerDepartment++;//To bring back pointer on same place first you have to measure how far you are in future.
			return (XMLEvent)readerDepartment.next();
		}
		
		public boolean hasNext(){
			return reader.hasNext();
		}
		public void writerIt(XMLEvent event) throws Exception{//Only use to write update in XML.
			eventToWrite=null;
			writer.add(event);
		}
		public void generateFile() throws Exception{
			writer.flush();
			writer.close();
//			fos.close();
		}
	}
	
    
        private static void createCommentTags(MyXMLHandler myFile, XMLEventFactory eventFactory, String oldValue) throws Exception {
    	
    	    myFile.writerIt(eventFactory.createStartElement(new QName("Comment"), null, null));
    	    myFile.writerIt(eventFactory.createAttribute("WrongValue", oldValue));
    	    myFile.writerIt(eventFactory.createCharacters("DepartmentUpdated"));
    	    myFile.writerIt(eventFactory.createEndElement(new QName("Comment"), null));
	}
	
	public static void main(String[] args) throws Exception {
		
	    XMLEventFactory eventFactory = XMLEventFactory.newInstance();
	    
	    MyXMLHandler myFile = new MyXMLHandler(new File("employee.xml"), new File("new_employee.xml"));

	    String departmentId=null;
	    boolean employeeStarted = false;
	    boolean wrongDept=false;
	    
		while(myFile.hasNext()) {
		    XMLEvent event = myFile.nextEvent();
		    
		    switch (event.getEventType()) {
		    
				case XMLStreamConstants.START_ELEMENT:
					
					 StartElement startElement = event.asStartElement();
					 String startElementName = startElement.getName().getLocalPart();
					 if(EMPLOYEE.equals(startElementName)){
						 employeeStarted = true;
						 if(departmentId==null)
							 departmentId=findDepartmentId(myFile);
						 Attribute attribute = startElement.getAttributeByName(new QName(EMP_DEPTID));
						 if(!departmentId.equals(attribute.getValue())){
							 wrongDept=true;
						 }
					 }
					 break;
					 
				case XMLStreamConstants.END_ELEMENT:
					
					 String endElementName = event.asEndElement().getName().getLocalPart();
					 if(EMPLOYEE.equals(endElementName)){
						 employeeStarted=false;
					 }
					 
					 break;
					 
				case XMLStreamConstants.CHARACTERS:
					 if(wrongDept){
						 String itValue = event.asCharacters().getData();
						 myFile.writerIt(eventFactory.createCharacters(itValue+"-CHANGEDTO-"+departmentId));
						 createCommentTags(myFile, eventFactory, itValue);
						 wrongDept=false; 
 					 }
					 break;
			}
		}
	    myFile.generateFile();
	}


	private static String findDepartmentId(MyXMLHandler myFile) throws Exception{
		
		XMLEvent event = myFile.futureNextForDepartment();
		
		while(!event.isEndDocument()){
			if(event.getEventType() == XMLStreamConstants.START_ELEMENT){
				StartElement startElement = event.asStartElement();
				String elementName = startElement.getName().getLocalPart();
				if(DEPARTMENT.equals(elementName)){
					Attribute attribute = startElement.getAttributeByName(new QName(DEPT_ID));
					if(attribute!=null){
						return attribute.getValue();
					}
				}
			}
			event = myFile.futureNextForDepartment();
		}
		throw new RuntimeException("Couldn't find department.");
	}
}

Configuring tomcat jdbc connection pool for high concurrency

There will be case when you will require to tune your existing connection pool configuration to sustain more user load with your old product or you want to identify very first time that how many concurrent user load your new product can handle.

Being architect or application designer, high probability will be that you will target tuning of connection pool configurations first at your end.

Below are some crucial configurations with tomcat jdbc and apache dbcp one can use to get high concurrency with less to no database connection issues.

//I used this in my application to hit database server with 20 running threads to update 14 tables(average 10 fields per table).
//It was 238 insertion of 0.5MB of data(encrypted XML string) in a second.
//Encryption time is also considered in this otherwise database operation number can be higher than this.

poolProperties.setRemoveAbandonedTimeout(30); 
poolProperties.setMinEvictableIdleTimeMillis(5000); 
poolProperties.setTimeBetweenEvictionRunsMillis(1500); 

RemoveAbandonedTimeout – This is a timeout value. This should be the longest running query of your application however if your single connection object is used to fire multiple queries, value of timeout should be sum all those query execution. Keep this wide open to avoid ‘connection is already closed’ issue.
Above will not apply if you use ResetAbandonedTimer JDBC interceptor. In case of this, use timeout number as longest running single query.

poolProperties.setJdbcInterceptors(
	      "org.apache.tomcat.jdbc.pool.interceptor.ResetAbandonedTimer");

MinEvictableIdleTimeMillis – Minimum time a connection can stay idle before it gets evacuated by evacuation thread to free up the resources.

TimeBetweenEvictionRunsMillis – Evacuation thread will kick in every x milliseconds to evacuate idle or abandoned connection objects.

Please note: Performance is subject of tuning of multiple elements. It also depends on hardware where database files will get stored, network adapters and database server configuration itself. To get maximum throughput of any application it is necessary that all these elements are tuned/configured properly.

No Bluff Just Stuff – Role Of A Software Architect

Had an opportunity to under go in a three days technical sessions at NFJS. Initially it was mix feeling about sessions and speakers but when it was started I was kind of stuck with most sessions of three speakers. Neal, Kirk and Mark. I wish i could clone myself and attend other sessions too.  Mostly it was Mark who helped to understand what an architect should be  and architect’s do’s and don’ts. I am pointing out some very brief of one of the sessions from Mark.

An architect must have knowledge of all current trends of technology, industry and market of the company for which he is going to design system.

He should ensure that architecture design is followed by doing regular compliance check meeting.

He should have very good interpersonal skill. He should easily and without any resistance convey his things to other developers and product owner.

He should know 3 C – Communication with stakeholders, Collaboration with teams and most important Clarity on each element.

He may decide technologies to be used in any project but that is not an architectural decision.

He should have breadth of technology, its OK if he is not expert(depth of technology). One can become good architect if he has good brief knowledge of variety of technologies.

No RDD(Resume Driven Design)

He should not try to design solution of all the problems. Doing that will lead to failures mostly.

Enterprise Caching Techniques – Distributed Caching

Standalone caching is good solution when requests are for least items in the database. But what if the application has workload and all data is access all the time? Or what if we have the business need to store large volume of data in the cache? In such scenarios standalone caching may not be the right solution. Further stand-alone caching may not be useful if the application is running in the multiple-servers environment. This is when the need for distributed caching arises. The distributed caching is key factor behind many successfully deployed applications and therefore it is widely used. Distributed caching is now accepted as a key component of any scalable application architecture. Memcached which is one of the leading and the most popular distributed caching framework is used as part of highly scalable architectures at Facebook, Twitter, YouTube etc.

Distributed caching is a form of caching that allows the cache to span across multiple servers so that it can grow in size and in transactional capacity. It distributes data across multiple servers while still giving you a single logical view of the cache. The architecture it employs makes distributed caching highly scalable. However like any other distributed systems, distributed caching frameworks are inherently complex due to involvement of network elements. In fact slow network could be key performance and scalability barrier for distributed caching. The distributed caching is out-of-process caching service and from application standpoint it acts as L2 cache.

One of the widely used tool for distributed architecture is Memcache. With Memcache one can configure distributed architecture where two or more node can communicate with each other for data synchronization for any node failure. This n to n network communication can be more tuned for 1 to n where 1 node behaving as master and can hold copy of all data while slave always carry unique data. Both has its own advantage and problems. Below is cluster of same machine just for example point of view where two memcache instance can be executed on two different port. Code can tie them together to setup a cluster as given below.

Memcached –L 127.0.0.1 –P 1212
Memcached –L 127.0.0.1 –P 2705

This can be used with any memcached client API. Spyememcached is in java and examples are given here.

MemcachedClient c=new MemcachedClient(AddrUtil.getAddresses("127.0.0.1:1212 127.0.0.1:2705"));

Databases or IO operations are costly when there is millions of application transactions. Distributed caching is one of the solution to reduce load on backend and make them more scalable. The main idea behind distributed caching is to provide in memory data to hold large volume of the stored data. Distributed caching techniques have advanced and matured to manage massive data completely in memory. A database server usually requires a high-end machine whereas distributed caching performs well on lower cost machines (like those used for Web servers). This allows adding more small configured machines and scale-out application easily. Such caching can be included with small server network configuration as small memory entity which is small size of available memory for application.

Synchronizing java threads on a string value

We had a situation where we wanted to synchronize all threads to get a new value from database. This was purely a database design issue but a month effort to fix whole thing so we decided to write a code which can wait for database operation to get completed. This required a synchronization mechanism on value instead of an object. Below is the code.

I wrote a mechanism to clean up synchSemaphores object but it is advisable to use LRUMap or similar API which does it automatically.

One can also remove lockAcquiredCounter object, this was only used for cleaning up process with my custom code.

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Semaphore;

public final class SemaphoreLocker {
	
	private ConcurrentMap<String, Semaphore> synchSemaphores = new ConcurrentHashMap<String, Semaphore>();
	private long timeToClear = 5;
    private static SemaphoreLocker locker = new SemaphoreLocker();
    
    public static SemaphoreLocker getInstance(){
    	return locker;
    }
    
    /**
     * Constructor will initialize the thread which will do the cleanup when nobody is making any request
     */
    private SemaphoreLocker(){
        // Write a code which can clean	synchSemaphores objects periodically. 
    }
    
    /*
     * Indicator if any thread under lock mode
     */
    volatile int lockAcquiredCounter = 0;
    
    public void acquireLock(String key){
    	synchronized (locker) {
    		lockAcquiredCounter++;
		}
        Semaphore tempS = new Semaphore(1, true);
        Semaphore s = synchSemaphores.putIfAbsent(key, tempS);
        if(s == null){
            s = tempS;
        }
        s.acquireUninterruptibly();
    }

    public void releaseLock(String key){
        Semaphore s = synchSemaphores.get(key);
        if(s != null){
            s.release();
        }
        synchronized (locker) {
    		lockAcquiredCounter--;
		}
    }
}

Value of PI

Programming is always a fun.

Ever thought of what can be the possible value of PI? Well there is no fix value because it is irrational. However i’ve tried to get max possible number. My eclipse gone mad(use console 🙂 ) and at one point of time I was not able to see the result. What i could capture is in below screen.

The sequence does not repeat itself atleast for number I captured.PI_ValueCode

Enterprise Caching Techniques – Standalone Caching

Many application domains have more fetched concentric requirements and with very few store operations. Like in E-Commerce, where buyer’s search versus purchase ratio is 9:1 or sometime even wider. Such applications require additional layer of caching in their architecture. Caching is not something new and invented recently, it is there since era of hardware evolution started. What we see with any hardware architecture in form of L1 and L2 CPU cache, are caching mechanism and still in use. L1 and L2 reside in between of processor and RAM, and contain system critical information for processing. Fetching of data from those caches are faster as compare to RAM but size of it is quite small as compare to main memory. This further helps to bifurcate type of data and helps CPU to decide its storing location. Caching with enterprise application directly derives from that same concept. However here it is may be in same CPU or in different machines/nodes connected with parent with very high network cards. So mainly caching in enterprise application is divided into two parts i.e. Standalone Caching and Distribute Caching.

Standalone Caching

Sometime referred as embedded or in-process caching, is single virtual machine based technique of storing frequently asked data. Standalone caching acts as L1 cache from application perspective and resides in RAM.
The main purpose of using the standalone caching is to improve the performance of the business critical operations. The standalone caching has limited main memory as its disposal. Therefore only data that is frequently used and important for the business critical functions is cached. Standalone caching products are always used as a side-cache for an application’s data access layer. Sidecache refers to an architectural pattern in which the application manages the caching of data from a database or filesystem or from any source. In this scenario, cache is used to temporarily store the objects. Applications first checks existing copy of data and returns if present. When data is not present, it retrieves from data access layer and put into cache for next incoming request.
In caching, some mechanism is required to cope with invalid cached data, data which is updated and still not refreshed in cache. There are several techniques that can be used to deal with invalid data or to remove unused caches to free some memory for other in-demand data.

Such concerns can be handled with by writing API which can take care invalid cache.
The caching product like EHCache provides basic functionality to handle invalidate data. The application decides at what point cached data should be invalidated. Typically strategy employed is whenever data is updated at store, application invalidates the cached data. If copy of cached data is not vital to update on the spot, we can apply some other techniques which can periodically refresh cache by assigning some time based configuration. We can even combine both techniques for multi-server environment.

There are also some other ways to update and remove cached data. With TTL(time-to-live) or LRU(Least frequently used) configuration we can monitor individual cache and take action for them with the help of API.

Problem with Standalone cache is, it is very limited and only can be used with single node/machine architecture. Hence need for distributed cache arisen, next in same series.

java.util.regex.PatternSyntaxException: Dangling meta character

When you try to split any string with ? or * as below code.

String sqlParts[] = sql.split("?");

You will end up with unchecked PatternSyntaxException as given below.

java.util.regex.PatternSyntaxException: Dangling meta character '?' near index 0

Solution:
Avoid using dangling metacharacters like ‘?’, ‘+’ and ‘*’. Instead use it with escape sequences as like below.

String sqlParts[] = sql.split("\\?");